Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop

This commit is contained in:
Blaise Tine 2024-02-04 20:18:21 -08:00
commit e06e6646a9
21 changed files with 169 additions and 193 deletions

View file

@ -54,9 +54,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
$ cd Vortex
### Install prebuilt toolchain
By default, the toolchain will install to /opt folder.
You can install the toolchain to a different directory by overriding TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
By default, the toolchain will install to /opt folder which requires sudo access.
You can install the toolchain to a different location of your choice by setting TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
$ export TOOLDIR=/opt
$ ./ci/toolchain_install.sh --all
$ source ./ci/toolchain_env.sh
### Build Vortex sources

View file

@ -136,6 +136,18 @@
`endif
`endif
`ifdef L2_ENABLE
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
`else
`define L2_LINE_SIZE `L1_LINE_SIZE
`endif
`ifdef L3_ENABLE
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
`else
`define L3_LINE_SIZE `L2_LINE_SIZE
`endif
`ifdef XLEN_64
`ifndef STARTUP_ADDR

View file

@ -298,18 +298,6 @@
`define L1_ENABLE
`endif
`ifdef L2_ENABLE
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
`else
`define L2_LINE_SIZE `L1_LINE_SIZE
`endif
`ifdef L3_ENABLE
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
`else
`define L3_LINE_SIZE `L2_LINE_SIZE
`endif
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)

View file

@ -130,20 +130,20 @@ module VX_cache_bypass #(
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in);
wire core_req_nc_ready = ~mem_req_valid_in && mem_req_ready_out;
VX_generic_arbiter #(
.NUM_REQS (NUM_REQS),
.TYPE (PASSTHRU ? "R" : "P"),
.LOCK_ENABLE (1)
) req_arb (
) core_req_nc_arb (
.clk (clk),
.reset (reset),
.unlock (core_req_in_fire),
.reset (reset),
.requests (core_req_valid_in_nc),
.grant_index (core_req_nc_idx),
.grant_onehot (core_req_nc_sel),
.grant_valid (core_req_nc_valid)
.grant_valid (core_req_nc_valid),
.grant_unlock (core_req_nc_ready)
);
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
@ -164,7 +164,7 @@ module VX_cache_bypass #(
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i])
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (core_req_nc_ready && core_req_nc_sel[i])
: core_req_ready_out[i];
end

View file

@ -47,8 +47,6 @@ module VX_operands import VX_gpu_pkg::*; #(
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
reg valid_out_r;
reg [DATAW-1:0] data_out_r;
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
@ -60,7 +58,7 @@ module VX_operands import VX_gpu_pkg::*; #(
reg rs3_ready, rs3_ready_n;
reg data_ready, data_ready_n;
wire ready_out = operands_if[i].ready;
wire stg_valid_in, stg_ready_in;
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
@ -85,7 +83,7 @@ module VX_operands import VX_gpu_pkg::*; #(
case (state)
STATE_IDLE: begin
if (valid_out_r && ready_out) begin
if (operands_if[i].valid && operands_if[i].ready) begin
data_ready_n = 0;
end
if (scoreboard_if[i].valid && data_ready_n == 0) begin
@ -173,37 +171,15 @@ module VX_operands import VX_gpu_pkg::*; #(
end
always @(posedge clk) begin
if (reset) begin
if (reset) begin
state <= STATE_IDLE;
cache_eop <= {ISSUE_RATIO{1'b1}};
data_ready <= 0;
valid_out_r <= 0;
end else begin
state <= state_n;
cache_eop <= cache_eop_n;
data_ready <= data_ready_n;
if (~valid_out_r) begin
valid_out_r <= scoreboard_if[i].valid && data_ready;
end else if (ready_out) begin
valid_out_r <= 0;
end
data_ready <= data_ready_n;
end
if (~valid_out_r) begin
data_out_r <= {scoreboard_if[i].data.uuid,
scoreboard_if[i].data.wis,
scoreboard_if[i].data.tmask,
scoreboard_if[i].data.PC,
scoreboard_if[i].data.wb,
scoreboard_if[i].data.ex_type,
scoreboard_if[i].data.op_type,
scoreboard_if[i].data.op_mod,
scoreboard_if[i].data.use_PC,
scoreboard_if[i].data.use_imm,
scoreboard_if[i].data.imm,
scoreboard_if[i].data.rd};
end
gpr_rd_rid <= gpr_rd_rid_n;
gpr_rd_wis <= gpr_rd_wis_n;
rs2_ready <= rs2_ready_n;
@ -216,10 +192,35 @@ module VX_operands import VX_gpu_pkg::*; #(
cache_data <= cache_data_n;
cache_reg <= cache_reg_n;
cache_tmask <= cache_tmask_n;
end
end
assign operands_if[i].valid = valid_out_r;
assign {operands_if[i].data.uuid,
assign stg_valid_in = scoreboard_if[i].valid && data_ready;
assign scoreboard_if[i].ready = stg_ready_in && data_ready;
VX_toggle_buffer #(
.DATAW (DATAW)
) staging_buffer (
.clk (clk),
.reset (reset),
.valid_in (stg_valid_in),
.data_in ({
scoreboard_if[i].data.uuid,
scoreboard_if[i].data.wis,
scoreboard_if[i].data.tmask,
scoreboard_if[i].data.PC,
scoreboard_if[i].data.wb,
scoreboard_if[i].data.ex_type,
scoreboard_if[i].data.op_type,
scoreboard_if[i].data.op_mod,
scoreboard_if[i].data.use_PC,
scoreboard_if[i].data.use_imm,
scoreboard_if[i].data.imm,
scoreboard_if[i].data.rd
}),
.ready_in (stg_ready_in),
.valid_out (operands_if[i].valid),
.data_out ({
operands_if[i].data.uuid,
operands_if[i].data.wis,
operands_if[i].data.tmask,
operands_if[i].data.PC,
@ -230,13 +231,15 @@ module VX_operands import VX_gpu_pkg::*; #(
operands_if[i].data.use_PC,
operands_if[i].data.use_imm,
operands_if[i].data.imm,
operands_if[i].data.rd} = data_out_r;
operands_if[i].data.rd
}),
.ready_out (operands_if[i].ready)
);
assign operands_if[i].data.rs1_data = rs1_data;
assign operands_if[i].data.rs2_data = rs2_data;
assign operands_if[i].data.rs3_data = rs3_data;
assign scoreboard_if[i].ready = ~valid_out_r && data_ready;
// GPR banks
reg [RAM_ADDRW-1:0] gpr_rd_addr;

View file

@ -111,7 +111,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
reg [`SFU_WIDTH-1:0] sfu_type;
always @(*) begin
case (scoreboard_if[i].data.op_type)
case (ibuffer_if[i].data.op_type)
`INST_SFU_CSRRW,
`INST_SFU_CSRRS,
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
@ -152,51 +152,47 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
assign perf_issue_stalls_per_cycle[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
`endif
reg [DATAW-1:0] data_out_r;
reg valid_out_r;
wire ready_out;
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
wire operands_ready = ~(| operands_busy);
wire stg_valid_in, stg_ready_in;
assign stg_valid_in = ibuffer_if[i].valid && operands_ready;
assign ibuffer_if[i].ready = stg_ready_in && operands_ready;
wire [3:0] ready_masks = ~{inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
wire deps_ready = (& ready_masks);
wire valid_in = ibuffer_if[i].valid && deps_ready;
wire ready_in = ~valid_out_r && deps_ready;
wire [DATAW-1:0] data_in = ibuffer_if[i].data;
assign ready_out = scoreboard_if[i].ready;
VX_stream_buffer #(
.DATAW (DATAW)
) staging_buffer (
.clk (clk),
.reset (reset),
.valid_in (stg_valid_in),
.data_in (ibuffer_if[i].data),
.ready_in (stg_ready_in),
.valid_out (scoreboard_if[i].valid),
.data_out (scoreboard_if[i].data),
.ready_out (scoreboard_if[i].ready)
);
always @(posedge clk) begin
if (reset) begin
valid_out_r <= 0;
inuse_regs <= '0;
end else begin
if (writeback_fire) begin
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
end
if (~valid_out_r) begin
valid_out_r <= valid_in;
end else if (ready_out) begin
if (scoreboard_if[i].data.wb) begin
inuse_regs[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= 1;
`ifdef PERF_ENABLE
inuse_units[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= scoreboard_if[i].data.ex_type;
if (scoreboard_if[i].data.ex_type == `EX_SFU) begin
inuse_sfu[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= sfu_type;
end
`endif
end
valid_out_r <= 0;
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= 1;
end
end
if (~valid_out_r) begin
data_out_r <= data_in;
`ifdef PERF_ENABLE
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= ibuffer_if[i].data.ex_type;
if (ibuffer_if[i].data.ex_type == `EX_SFU) begin
inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= sfu_type;
end
end
`endif
end
assign ibuffer_if[i].ready = ready_in;
assign scoreboard_if[i].valid = valid_out_r;
assign scoreboard_if[i].data = data_out_r;
`ifdef SIMULATION
reg [31:0] timeout_ctr;
@ -208,7 +204,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
`ifdef DBG_TRACE_CORE_PIPELINE
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
~ready_masks, ibuffer_if[i].data.uuid));
operands_busy, ibuffer_if[i].data.uuid));
`endif
timeout_ctr <= timeout_ctr + 1;
end else if (ibuffer_if[i].valid && ibuffer_if[i].ready) begin
@ -220,7 +216,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
~ready_masks, ibuffer_if[i].data.uuid));
operands_busy, ibuffer_if[i].data.uuid));
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0,
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",

View file

@ -21,15 +21,12 @@ module VX_cyclic_arbiter #(
) (
input wire clk,
input wire reset,
input wire [NUM_REQS-1:0] requests,
input wire unlock,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid
output wire grant_valid,
input wire grant_unlock
);
`UNUSED_PARAM (LOCK_ENABLE)
`UNUSED_VAR (unlock)
if (NUM_REQS == 1) begin
`UNUSED_VAR (clk)
@ -51,7 +48,7 @@ module VX_cyclic_arbiter #(
end else begin
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
grant_index_r <= '0;
end else begin
end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
end
end

View file

@ -21,17 +21,17 @@ module VX_fair_arbiter #(
) (
input wire clk,
input wire reset,
input wire unlock,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid
output wire grant_valid,
input wire grant_unlock
);
if (NUM_REQS == 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (unlock)
`UNUSED_VAR (grant_unlock)
assign grant_index = '0;
assign grant_onehot = requests;
@ -48,18 +48,14 @@ module VX_fair_arbiter #(
always @(posedge clk) begin
if (reset) begin
buffer <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
buffer <= buffer_n;
end
end
VX_priority_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (LOCK_ENABLE)
) priority_arbiter (
.clk (clk),
.reset (reset),
.unlock (unlock),
.requests (requests_qual),
.grant_index (grant_index),
.grant_onehot (grant_onehot),

View file

@ -21,22 +21,23 @@ module VX_generic_arbiter #(
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire unlock,
input wire reset,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid
output wire grant_valid,
input wire grant_unlock
);
if (TYPE == "P") begin
`UNUSED_PARAM (LOCK_ENABLE)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (grant_unlock)
VX_priority_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (LOCK_ENABLE)
.NUM_REQS (NUM_REQS),
) priority_arbiter (
.clk (clk),
.reset (reset),
.unlock (unlock),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
@ -50,12 +51,12 @@ module VX_generic_arbiter #(
.LOCK_ENABLE (LOCK_ENABLE)
) rr_arbiter (
.clk (clk),
.reset (reset),
.unlock (unlock),
.reset (reset),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot)
.grant_onehot (grant_onehot),
.grant_unlock (grant_unlock)
);
end else if (TYPE == "F") begin
@ -66,11 +67,11 @@ module VX_generic_arbiter #(
) fair_arbiter (
.clk (clk),
.reset (reset),
.unlock (unlock),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot)
.grant_onehot (grant_onehot),
.grant_unlock (grant_unlock)
);
end else if (TYPE == "M") begin
@ -81,11 +82,11 @@ module VX_generic_arbiter #(
) matrix_arbiter (
.clk (clk),
.reset (reset),
.unlock (unlock),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot)
.grant_onehot (grant_onehot),
.grant_unlock (grant_unlock)
);
end else if (TYPE == "C") begin
@ -96,11 +97,11 @@ module VX_generic_arbiter #(
) cyclic_arbiter (
.clk (clk),
.reset (reset),
.unlock (unlock),
.requests (requests),
.grant_valid (grant_valid),
.grant_index (grant_index),
.grant_onehot (grant_onehot)
.grant_onehot (grant_onehot),
.grant_unlock (grant_unlock)
);
end else begin

View file

@ -20,18 +20,18 @@ module VX_matrix_arbiter #(
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire unlock,
input wire reset,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid
output wire grant_valid,
input wire grant_unlock
);
if (NUM_REQS == 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (unlock)
`UNUSED_VAR (grant_unlock)
assign grant_index = '0;
assign grant_onehot = requests;
@ -71,18 +71,18 @@ module VX_matrix_arbiter #(
end
if (LOCK_ENABLE == 0) begin
`UNUSED_VAR (unlock)
`UNUSED_VAR (grant_unlock)
assign grant_onehot = grant_unqual;
end else begin
reg [NUM_REQS-1:0] grant_unqual_prev;
always @(posedge clk) begin
if (reset) begin
grant_unqual_prev <= '0;
end else if (unlock) begin
end else if (grant_unlock) begin
grant_unqual_prev <= grant_unqual;
end
end
assign grant_onehot = unlock ? grant_unqual : grant_unqual_prev;
assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev;
end
VX_onehot_encoder #(

View file

@ -21,7 +21,7 @@ module VX_mem_rsp_sel #(
parameter TAG_SEL_BITS = 0,
parameter OUT_REG = 0
) (
input wire clk,
input wire clk,
input wire reset,
// input response
@ -46,18 +46,20 @@ input wire clk,
wire [LOG_NUM_REQS-1:0] grant_index;
wire grant_valid;
wire rsp_fire;
wire grant_ready;
VX_priority_arbiter #(
.NUM_REQS (NUM_REQS)
VX_generic_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (1),
.TYPE ("P")
) arbiter (
.clk (clk),
.reset (reset),
.unlock (rsp_fire),
.requests (rsp_valid_in),
.grant_valid (grant_valid),
.grant_index (grant_index),
`UNUSED_PIN (grant_onehot)
`UNUSED_PIN (grant_onehot),
.grant_unlock(grant_ready)
);
reg [NUM_REQS-1:0] rsp_valid_sel;
@ -78,7 +80,7 @@ input wire clk,
end
end
assign rsp_fire = grant_valid && rsp_ready_unqual;
assign grant_ready = rsp_ready_unqual;
VX_elastic_buffer #(
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),

View file

@ -16,22 +16,13 @@
`TRACING_OFF
module VX_priority_arbiter #(
parameter NUM_REQS = 1,
parameter LOCK_ENABLE = 0,
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire [NUM_REQS-1:0] requests,
input wire unlock,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid
);
`UNUSED_PARAM (LOCK_ENABLE)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (unlock)
if (NUM_REQS == 1) begin
assign grant_index = '0;

View file

@ -21,18 +21,18 @@ module VX_rr_arbiter #(
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire unlock,
input wire reset,
input wire [NUM_REQS-1:0] requests,
output wire [LOG_NUM_REQS-1:0] grant_index,
output wire [NUM_REQS-1:0] grant_onehot,
output wire grant_valid
output wire grant_valid,
input wire grant_unlock
);
if (NUM_REQS == 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (unlock)
`UNUSED_VAR (grant_unlock)
assign grant_index = '0;
assign grant_onehot = requests;
@ -55,7 +55,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
state <= grant_index_r;
end
end
@ -85,7 +85,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
state <= grant_index_r;
end
end
@ -121,7 +121,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
state <= grant_index_r;
end
end
@ -165,7 +165,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
state <= grant_index_r;
end
end
@ -219,7 +219,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
state <= grant_index_r;
end
end
@ -285,7 +285,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
state <= grant_index_r;
end
end
@ -365,7 +365,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
state <= grant_index_r;
end
end
@ -399,7 +399,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
pointer_reg <= {NUM_REQS{1'b1}};
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
if (|req_masked) begin
pointer_reg <= mask_higher_pri_regs;
end else if (|requests) begin
@ -443,7 +443,7 @@ module VX_rr_arbiter #(
always @(posedge clk) begin
if (reset) begin
state <= '0;
end else if (!LOCK_ENABLE || unlock) begin
end else if (!LOCK_ENABLE || grant_unlock) begin
state <= grant_index_r;
end
end

View file

@ -19,7 +19,6 @@ module VX_stream_arb #(
parameter NUM_OUTPUTS = 1,
parameter DATAW = 1,
parameter `STRING ARBITER = "P",
parameter LOCK_ENABLE = 1,
parameter MAX_FANOUT = `MAX_FANOUT,
parameter OUT_REG = 0 ,
parameter NUM_REQS = (NUM_INPUTS + NUM_OUTPUTS - 1) / NUM_OUTPUTS,
@ -57,7 +56,6 @@ module VX_stream_arb #(
.NUM_OUTPUTS (1),
.DATAW (DATAW),
.ARBITER (ARBITER),
.LOCK_ENABLE (LOCK_ENABLE),
.MAX_FANOUT (MAX_FANOUT),
.OUT_REG (OUT_REG)
) arb_slice (
@ -102,7 +100,6 @@ module VX_stream_arb #(
.NUM_OUTPUTS (1),
.DATAW (DATAW),
.ARBITER (ARBITER),
.LOCK_ENABLE (LOCK_ENABLE),
.MAX_FANOUT (MAX_FANOUT),
.OUT_REG (OUT_REG)
) fanout_slice_arb (
@ -129,7 +126,6 @@ module VX_stream_arb #(
.NUM_OUTPUTS (1),
.DATAW (DATAW + LOG_NUM_REQS2),
.ARBITER (ARBITER),
.LOCK_ENABLE (LOCK_ENABLE),
.MAX_FANOUT (MAX_FANOUT),
.OUT_REG (OUT_REG)
) fanout_join_arb (
@ -158,25 +154,25 @@ module VX_stream_arb #(
wire arb_valid;
wire [NUM_REQS_W-1:0] arb_index;
wire [NUM_REQS-1:0] arb_onehot;
wire arb_unlock;
wire arb_ready;
VX_generic_arbiter #(
.NUM_REQS (NUM_REQS),
.LOCK_ENABLE (LOCK_ENABLE),
.LOCK_ENABLE (1),
.TYPE (ARBITER)
) arbiter (
.clk (clk),
.reset (reset),
.requests (valid_in),
.unlock (arb_unlock),
.grant_valid (arb_valid),
.grant_index (arb_index),
.grant_onehot (arb_onehot)
.grant_onehot (arb_onehot),
.grant_unlock (arb_ready)
);
assign valid_in_r = arb_valid;
assign data_in_r = data_in[arb_index];
assign arb_unlock = | (valid_in_r & ready_in_r);
assign arb_ready = ready_in_r;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign ready_in[i] = ready_in_r & arb_onehot[i];
@ -217,7 +213,6 @@ module VX_stream_arb #(
.NUM_OUTPUTS (BATCH_SIZE),
.DATAW (DATAW),
.ARBITER (ARBITER),
.LOCK_ENABLE (LOCK_ENABLE),
.MAX_FANOUT (MAX_FANOUT),
.OUT_REG (OUT_REG)
) arb_slice (
@ -252,7 +247,6 @@ module VX_stream_arb #(
.NUM_OUTPUTS (NUM_BATCHES),
.DATAW (DATAW),
.ARBITER (ARBITER),
.LOCK_ENABLE (LOCK_ENABLE),
.MAX_FANOUT (MAX_FANOUT),
.OUT_REG (OUT_REG)
) fanout_fork_arb (
@ -280,7 +274,6 @@ module VX_stream_arb #(
.NUM_OUTPUTS (BATCH_SIZE),
.DATAW (DATAW),
.ARBITER (ARBITER),
.LOCK_ENABLE (LOCK_ENABLE),
.MAX_FANOUT (MAX_FANOUT),
.OUT_REG (OUT_REG)
) fanout_slice_arb (
@ -305,24 +298,24 @@ module VX_stream_arb #(
wire [NUM_OUTPUTS-1:0] arb_requests;
wire arb_valid;
wire [NUM_OUTPUTS-1:0] arb_onehot;
wire arb_unlock;
wire arb_ready;
VX_generic_arbiter #(
.NUM_REQS (NUM_OUTPUTS),
.LOCK_ENABLE (LOCK_ENABLE),
.LOCK_ENABLE (1),
.TYPE (ARBITER)
) arbiter (
.clk (clk),
.reset (reset),
.requests (arb_requests),
.unlock (arb_unlock),
.grant_valid (arb_valid),
`UNUSED_PIN (grant_index),
.grant_onehot (arb_onehot)
.grant_onehot (arb_onehot),
.grant_unlock (arb_ready)
);
assign arb_requests = ready_in_r;
assign arb_unlock = | (valid_in & ready_in);
assign arb_ready = valid_in[0];
assign ready_in = arb_valid;
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin

View file

@ -21,8 +21,7 @@ module VX_stream_xbar #(
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
parameter ARBITER = "P",
parameter LOCK_ENABLE = 0,
parameter OUT_REG = 0,
parameter OUT_REG = 0,
parameter MAX_FANOUT = `MAX_FANOUT,
parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1)
) (
@ -66,7 +65,6 @@ module VX_stream_xbar #(
.NUM_OUTPUTS (1),
.DATAW (DATAW),
.ARBITER (ARBITER),
.LOCK_ENABLE (LOCK_ENABLE),
.MAX_FANOUT (MAX_FANOUT),
.OUT_REG (OUT_REG)
) xbar_arb (
@ -95,7 +93,6 @@ module VX_stream_xbar #(
.NUM_OUTPUTS (1),
.DATAW (DATAW),
.ARBITER (ARBITER),
.LOCK_ENABLE (LOCK_ENABLE),
.MAX_FANOUT (MAX_FANOUT),
.OUT_REG (OUT_REG)
) xbar_arb (

View file

@ -62,10 +62,10 @@ Cluster::Cluster(const SimContext& ctx,
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
!L2_ENABLED,
log2ceil(L2_CACHE_SIZE), // C
log2ceil(MEM_BLOCK_SIZE), // L
log2ceil(L2_NUM_WAYS), // W
0, // A
log2ceil(L2_CACHE_SIZE),// C
log2ceil(MEM_BLOCK_SIZE),// L
log2ceil(L1_LINE_SIZE), // W
log2ceil(L2_NUM_WAYS), // A
log2ceil(L2_NUM_BANKS), // B
XLEN, // address bits
1, // number of ports

View file

@ -210,7 +210,7 @@ void Core::schedule() {
void Core::fetch() {
perf_stats_.ifetch_latency += pending_ifetches_;
// handle icache reponse
// handle icache response
auto& icache_rsp_port = icache_rsp_ports.at(0);
if (!icache_rsp_port.empty()){
auto& mem_rsp = icache_rsp_port.front();

View file

@ -339,7 +339,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
break;
}
case 1: {
// RV64I: SLLI
// RV32I: SLLI
rddata[t].i = rsdata[t][0].i << immsrc;
break;
}
@ -360,11 +360,11 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
case 5: {
if (func7) {
// RV64I: SRAI
// RV32I: SRAI
Word result = rsdata[t][0].i >> immsrc;
rddata[t].i = result;
} else {
// RV64I: SRLI
// RV32I: SRLI
Word result = rsdata[t][0].u >> immsrc;
rddata[t].i = result;
}

View file

@ -34,7 +34,7 @@ static void show_usage() {
uint32_t num_threads = NUM_THREADS;
uint32_t num_warps = NUM_WARPS;
uint32_t num_cores = NUM_CORES;
bool showStats = false;;
bool showStats = false;
bool riscv_test = false;
const char* program = nullptr;

View file

@ -33,8 +33,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
!L3_ENABLED,
log2ceil(L3_CACHE_SIZE), // C
log2ceil(MEM_BLOCK_SIZE), // L
log2ceil(L3_NUM_WAYS), // W
0, // A
log2ceil(L2_LINE_SIZE), // W
log2ceil(L3_NUM_WAYS), // A
log2ceil(L3_NUM_BANKS), // B
XLEN, // address bits
1, // number of ports
@ -58,7 +58,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
}
// set up memory perf recording
// set up memory profiling
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
__unused (cycle);
perf_mem_reads_ += !req.write;

View file

@ -44,7 +44,7 @@ Socket::Socket(const SimContext& ctx,
XLEN, // address bits
1, // number of ports
1, // number of inputs
true, // write-through
false, // write-through
false, // write response
(uint8_t)arch.num_warps(), // mshr
2, // pipeline latency