mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop
This commit is contained in:
commit
7afc557ba8
5 changed files with 42 additions and 40 deletions
|
@ -194,10 +194,14 @@
|
|||
`ifndef FPU_FPNEW
|
||||
`ifndef FPU_DSP
|
||||
`ifndef FPU_DPI
|
||||
`ifdef SYNTHESIS
|
||||
`define FPU_DSP
|
||||
`else
|
||||
`ifndef SYNTHESIS
|
||||
`ifndef DPI_DISABLE
|
||||
`define FPU_DPI
|
||||
`else
|
||||
`define FPU_DSP
|
||||
`endif
|
||||
`else
|
||||
`define FPU_DSP
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
|
19
hw/rtl/cache/VX_cache.sv
vendored
19
hw/rtl/cache/VX_cache.sv
vendored
|
@ -530,14 +530,17 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_req = core_req_valid & core_req_ready & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_req = core_req_valid & core_req_ready & core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_req;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_req;
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
`BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw);
|
||||
`BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw);
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
|
||||
|
@ -560,13 +563,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle_r;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle_r;
|
||||
|
||||
`BUFFER(perf_core_reads_per_cycle_r, perf_core_reads_per_cycle);
|
||||
`BUFFER(perf_core_writes_per_cycle_r, perf_core_writes_per_cycle);
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -578,8 +575,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
perf_mem_stalls <= '0;
|
||||
perf_crsp_stalls <= '0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle_r);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle_r);
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle);
|
||||
|
|
|
@ -266,8 +266,8 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle, perf_dcache_rd_req_per_cycle_r;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_wr_req_per_cycle, perf_dcache_wr_req_per_cycle_r;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_wr_req_per_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rsp_per_cycle;
|
||||
|
||||
wire [1:0] perf_icache_pending_read_cycle;
|
||||
|
@ -283,7 +283,9 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
wire perf_icache_req_fire = icache_bus_if.req_valid & icache_bus_if.req_ready;
|
||||
wire perf_icache_rsp_fire = icache_bus_if.rsp_valid & icache_bus_if.rsp_ready;
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rd_req_fire, perf_dcache_wr_req_fire, perf_dcache_rsp_fire;
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rd_req_fire, perf_dcache_rd_req_fire_r;
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r;
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rsp_fire;
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign perf_dcache_rd_req_fire[i] = dcache_bus_if[i].req_valid && ~dcache_bus_if[i].req_data.rw && dcache_bus_if[i].req_ready;
|
||||
|
@ -291,15 +293,15 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
assign perf_dcache_rsp_fire[i] = dcache_bus_if[i].rsp_valid && dcache_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`POP_COUNT(perf_dcache_rd_req_per_cycle, perf_dcache_rd_req_fire);
|
||||
`POP_COUNT(perf_dcache_wr_req_per_cycle, perf_dcache_wr_req_fire);
|
||||
`POP_COUNT(perf_dcache_rsp_per_cycle, perf_dcache_rsp_fire);
|
||||
`BUFFER(perf_dcache_rd_req_fire_r, perf_dcache_rd_req_fire);
|
||||
`BUFFER(perf_dcache_wr_req_fire_r, perf_dcache_wr_req_fire);
|
||||
|
||||
`BUFFER(perf_dcache_rd_req_per_cycle_r, perf_dcache_rd_req_per_cycle);
|
||||
`BUFFER(perf_dcache_wr_req_per_cycle_r, perf_dcache_wr_req_per_cycle);
|
||||
`POP_COUNT(perf_dcache_rd_req_per_cycle, perf_dcache_rd_req_fire_r);
|
||||
`POP_COUNT(perf_dcache_wr_req_per_cycle, perf_dcache_wr_req_fire_r);
|
||||
`POP_COUNT(perf_dcache_rsp_per_cycle, perf_dcache_rsp_fire);
|
||||
|
||||
assign perf_icache_pending_read_cycle = perf_icache_req_fire - perf_icache_rsp_fire;
|
||||
assign perf_dcache_pending_read_cycle = perf_dcache_rd_req_per_cycle_r - perf_dcache_rsp_per_cycle;
|
||||
assign perf_dcache_pending_read_cycle = perf_dcache_rd_req_per_cycle - perf_dcache_rsp_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -323,8 +325,8 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
perf_dcache_lat <= '0;
|
||||
end else begin
|
||||
perf_ifetches <= perf_ifetches + `PERF_CTR_BITS'(perf_icache_req_fire);
|
||||
perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_dcache_rd_req_per_cycle_r);
|
||||
perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_dcache_wr_req_per_cycle_r);
|
||||
perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_dcache_rd_req_per_cycle);
|
||||
perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_dcache_wr_req_per_cycle);
|
||||
perf_icache_lat <= perf_icache_lat + perf_icache_pending_reads;
|
||||
perf_dcache_lat <= perf_dcache_lat + perf_dcache_pending_reads;
|
||||
end
|
||||
|
|
|
@ -176,8 +176,9 @@ module VX_stream_xbar #(
|
|||
// we have a collision when there exists a valid transfer with multiple input candicates
|
||||
// we count the unique duplicates each cycle.
|
||||
|
||||
reg [NUM_INPUTS-1:0] per_cycle_collision, per_cycle_collision_r;
|
||||
wire [`CLOG2(NUM_INPUTS+1)-1:0] collision_count;
|
||||
reg [PERF_CTR_BITS-1:0] collisions_r;
|
||||
reg [NUM_INPUTS-1:0] per_cycle_collision;
|
||||
|
||||
always @(*) begin
|
||||
per_cycle_collision = 0;
|
||||
|
@ -190,16 +191,15 @@ module VX_stream_xbar #(
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`CLOG2(NUM_INPUTS+1)-1:0] collision_count, collision_count_r;
|
||||
`POP_COUNT(collision_count, per_cycle_collision);
|
||||
`BUFFER(collision_count_r, collision_count);
|
||||
|
||||
`BUFFER(per_cycle_collision_r, per_cycle_collision);
|
||||
`POP_COUNT(collision_count, per_cycle_collision_r);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
collisions_r <= '0;
|
||||
end else begin
|
||||
collisions_r <= collisions_r + PERF_CTR_BITS'(collision_count_r);
|
||||
collisions_r <= collisions_r + PERF_CTR_BITS'(collision_count);
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -229,14 +229,16 @@ module VX_shared_mem import VX_gpu_pkg::*; #(
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: reads, writes
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle, perf_reads_per_cycle_r;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle, perf_writes_per_cycle_r;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
wire [NUM_REQS-1:0] perf_reads_per_req = req_valid & req_ready & ~req_rw;
|
||||
wire [NUM_REQS-1:0] perf_writes_per_req = req_valid & req_ready & req_rw;
|
||||
wire [NUM_REQS-1:0] perf_reads_per_req, perf_writes_per_req;
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_req = rsp_valid & ~rsp_ready;
|
||||
|
||||
`BUFFER(perf_reads_per_req, req_valid & req_ready & ~req_rw);
|
||||
`BUFFER(perf_writes_per_req, req_valid & req_ready & req_rw);
|
||||
|
||||
`POP_COUNT(perf_reads_per_cycle, perf_reads_per_req);
|
||||
`POP_COUNT(perf_writes_per_cycle, perf_writes_per_req);
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
|
||||
|
@ -245,17 +247,14 @@ module VX_shared_mem import VX_gpu_pkg::*; #(
|
|||
reg [`PERF_CTR_BITS-1:0] perf_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
`BUFFER(perf_reads_per_cycle_r, perf_reads_per_cycle);
|
||||
`BUFFER(perf_writes_per_cycle_r, perf_writes_per_cycle);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_reads <= '0;
|
||||
perf_writes <= '0;
|
||||
perf_crsp_stalls <= '0;
|
||||
end else begin
|
||||
perf_reads <= perf_reads + `PERF_CTR_BITS'(perf_reads_per_cycle_r);
|
||||
perf_writes <= perf_writes + `PERF_CTR_BITS'(perf_writes_per_cycle_r);
|
||||
perf_reads <= perf_reads + `PERF_CTR_BITS'(perf_reads_per_cycle);
|
||||
perf_writes <= perf_writes + `PERF_CTR_BITS'(perf_writes_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue