mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
minor update
This commit is contained in:
parent
e53b295eea
commit
29c5a28273
3 changed files with 127 additions and 45 deletions
|
@ -44,7 +44,8 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
|
||||
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
|
||||
localparam DATAW = META_DATAW + 3 * `NUM_THREADS * `XLEN;
|
||||
localparam REGS_DATAW = NUM_SRC_REGS * `NUM_THREADS * `XLEN;
|
||||
localparam DATAW = META_DATAW + REGS_DATAW;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
|
||||
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
|
||||
localparam XLEN_SIZE = `XLEN / 8;
|
||||
|
@ -69,10 +70,12 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
wire pipe_in_ready;
|
||||
reg pipe_out_valid;
|
||||
wire pipe_out_ready;
|
||||
reg [META_DATAW-1:0] pipe_out_data;
|
||||
reg [META_DATAW-1:0] pipe_out_data, pipe_out_data_n;
|
||||
|
||||
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n;
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched;
|
||||
wire reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n2;
|
||||
|
||||
reg [NUM_SRC_REGS-1:0] data_fetched, data_fetched_n;
|
||||
reg has_collision, has_collision_n;
|
||||
|
||||
wire stg_in_valid, stg_in_ready;
|
||||
|
@ -134,6 +137,15 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
data_fetched_n = data_fetched;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched_n = '0;
|
||||
end else begin
|
||||
data_fetched_n = data_fetched | req_in_ready;
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
src_data_n = src_data;
|
||||
for (integer b = 0; b < NUM_BANKS; ++b) begin
|
||||
|
@ -143,6 +155,18 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
assign pipe_out_data_n = {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.uuid
|
||||
};
|
||||
|
||||
wire pipe_stall = pipe_out_valid && ~pipe_out_ready;
|
||||
assign pipe_in_ready = ~pipe_stall;
|
||||
|
||||
|
@ -150,45 +174,18 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
|
||||
wire stg_in_fire = stg_in_valid && stg_in_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pipe_out_valid <= 0;
|
||||
gpr_rd_valid <= '0;
|
||||
data_fetched <= '0;
|
||||
src_data <= '0;
|
||||
end else begin
|
||||
if (~pipe_stall) begin
|
||||
pipe_out_valid <= scoreboard_if.valid;
|
||||
gpr_rd_valid <= gpr_rd_valid_n;
|
||||
if (scoreboard_if.ready) begin
|
||||
data_fetched <= '0;
|
||||
end else begin
|
||||
data_fetched <= data_fetched | req_in_ready;
|
||||
end
|
||||
if (stg_in_fire) begin
|
||||
src_data <= '0;
|
||||
end else begin
|
||||
src_data <= src_data_n;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (~pipe_stall) begin
|
||||
pipe_out_data <= {
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd,
|
||||
scoreboard_if.data.uuid
|
||||
};
|
||||
has_collision <= has_collision_n;
|
||||
gpr_rd_addr <= gpr_rd_addr_n;
|
||||
gpr_rd_req_idx <= gpr_rd_req_idx_n;
|
||||
end
|
||||
end
|
||||
assign src_data_n2 = stg_in_fire ? '0 : src_data_n;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_BANKS + NUM_SRC_REGS + REGS_DATAW + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
|
||||
.RESETW (1 + NUM_BANKS + NUM_SRC_REGS + REGS_DATAW)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({scoreboard_if.valid, gpr_rd_valid_n, data_fetched_n, src_data_n2, pipe_out_data_n, has_collision_n, gpr_rd_addr_n, gpr_rd_req_idx_n}),
|
||||
.data_out ({pipe_out_valid, gpr_rd_valid, data_fetched, src_data, pipe_out_data, has_collision, gpr_rd_addr, gpr_rd_req_idx})
|
||||
);
|
||||
|
||||
assign pipe_out_ready = stg_in_ready;
|
||||
assign stg_in_valid = pipe_out_valid && ~has_collision;
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
module VX_onehot_mux #(
|
||||
parameter DATAW = 1,
|
||||
parameter N = 1,
|
||||
parameter MODEL = 1
|
||||
parameter MODEL = 1,
|
||||
parameter LUT_OPT = 0
|
||||
) (
|
||||
input wire [N-1:0][DATAW-1:0] data_in,
|
||||
input wire [N-1:0] sel_in,
|
||||
|
@ -26,6 +27,90 @@ module VX_onehot_mux #(
|
|||
if (N == 1) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end else if (LUT_OPT && N == 2) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
|
||||
end else if (LUT_OPT && N == 3) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
3'b001: data_out_r = data_in[0];
|
||||
3'b010: data_out_r = data_in[1];
|
||||
3'b100: data_out_r = data_in[2];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 4) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
4'b0001: data_out_r = data_in[0];
|
||||
4'b0010: data_out_r = data_in[1];
|
||||
4'b0100: data_out_r = data_in[2];
|
||||
4'b1000: data_out_r = data_in[3];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 5) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
5'b00001: data_out_r = data_in[0];
|
||||
5'b00010: data_out_r = data_in[1];
|
||||
5'b00100: data_out_r = data_in[2];
|
||||
5'b01000: data_out_r = data_in[3];
|
||||
5'b10000: data_out_r = data_in[4];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 6) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
6'b000001: data_out_r = data_in[0];
|
||||
6'b000010: data_out_r = data_in[1];
|
||||
6'b000100: data_out_r = data_in[2];
|
||||
6'b001000: data_out_r = data_in[3];
|
||||
6'b010000: data_out_r = data_in[4];
|
||||
6'b100000: data_out_r = data_in[5];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 7) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
7'b0000001: data_out_r = data_in[0];
|
||||
7'b0000010: data_out_r = data_in[1];
|
||||
7'b0000100: data_out_r = data_in[2];
|
||||
7'b0001000: data_out_r = data_in[3];
|
||||
7'b0010000: data_out_r = data_in[4];
|
||||
7'b0100000: data_out_r = data_in[5];
|
||||
7'b1000000: data_out_r = data_in[6];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (LUT_OPT && N == 8) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
8'b00000001: data_out_r = data_in[0];
|
||||
8'b00000010: data_out_r = data_in[1];
|
||||
8'b00000100: data_out_r = data_in[2];
|
||||
8'b00001000: data_out_r = data_in[3];
|
||||
8'b00010000: data_out_r = data_in[4];
|
||||
8'b00100000: data_out_r = data_in[5];
|
||||
8'b01000000: data_out_r = data_in[6];
|
||||
8'b10000000: data_out_r = data_in[7];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (MODEL == 1) begin
|
||||
wire [N-1:0][DATAW-1:0] mask;
|
||||
for (genvar i = 0; i < N; ++i) begin
|
||||
|
|
|
@ -314,7 +314,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
if (num_cores > 1) {
|
||||
uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_csrs_per_core + scrb_wctl_per_core;
|
||||
int scrb_percent_per_core = calcAvgPercent(scrb_stalls_per_core, cycles_per_core);
|
||||
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n"
|
||||
fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
|
||||
, core_id
|
||||
, scrb_stalls_per_core
|
||||
, scrb_percent_per_core
|
||||
|
@ -559,7 +559,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
|
|||
fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent);
|
||||
fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent);
|
||||
fprintf(stream, "PERF: ibuffer stalls=%ld (%d%%)\n", ibuffer_stalls, ibuffer_percent);
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n"
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, csrs=%d%%, wctl=%d%%)\n"
|
||||
, scrb_stalls
|
||||
, scrb_percent
|
||||
, calcAvgPercent(scrb_alu, scrb_total)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue