mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
Added performance counter for number of cycles when the rop is not used
This commit is contained in:
parent
1a871385a0
commit
283837dffb
10 changed files with 303 additions and 25 deletions
|
@ -146,6 +146,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
uint64_t rop_mem_reads = 0;
|
||||
uint64_t rop_mem_writes = 0;
|
||||
uint64_t rop_mem_lat = 0;
|
||||
uint64_t rop_inactive_cycles = 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -302,6 +303,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
rop_mem_reads = get_csr_64(staging_ptr, CSR_MPM_ROP_READS);
|
||||
rop_mem_writes = get_csr_64(staging_ptr, CSR_MPM_ROP_WRITES);
|
||||
rop_mem_lat = get_csr_64(staging_ptr, CSR_MPM_ROP_LAT);
|
||||
rop_inactive_cycles = get_csr_64(staging_ptr, CSR_MPM_ROP_INACTIVE_CYC);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@ -345,9 +347,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
fprintf(stream, "PERF: tex memory latency=%d cycles\n", tex_avg_lat);
|
||||
#endif
|
||||
#ifdef EXT_ROP_ENABLE
|
||||
fprintf(stream, "PERF: rop memory reads=%ld\n", rop_mem_reads);
|
||||
fprintf(stream, "PERF: rop memory writes=%ld\n", rop_mem_writes);
|
||||
fprintf(stream, "PERF: rop memory latency=%ld\n", rop_mem_lat);
|
||||
fprintf(stream, "PERF: rop memory reads=%ld\n", rop_mem_reads);
|
||||
fprintf(stream, "PERF: rop memory writes=%ld\n", rop_mem_writes);
|
||||
fprintf(stream, "PERF: rop memory latency=%ld\n", rop_mem_lat);
|
||||
fprintf(stream, "PERF: rop inactive cycles=%ld\n", rop_inactive_cycles);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -172,6 +172,7 @@ module VX_cluster #(
|
|||
`UNUSED_VAR (rop_perf_if.mem_reads)
|
||||
`UNUSED_VAR (rop_perf_if.mem_writes)
|
||||
`UNUSED_VAR (rop_perf_if.mem_latency)
|
||||
`UNUSED_VAR (rop_perf_if.inactive_cycles)
|
||||
`UNUSED_VAR (perf_ocache_if.reads)
|
||||
`UNUSED_VAR (perf_ocache_if.writes)
|
||||
`UNUSED_VAR (perf_ocache_if.read_misses)
|
||||
|
|
|
@ -290,6 +290,13 @@ module VX_csr_data #(
|
|||
`CSR_MPM_TEX_LAT : read_data_r = {`NUM_THREADS{tex_perf_if.mem_latency[31:0]}};
|
||||
`CSR_MPM_TEX_LAT_H : read_data_r = {`NUM_THREADS{32'(tex_perf_if.mem_latency[`PERF_CTR_BITS-1:32])}};
|
||||
`endif
|
||||
`ifdef EXT_RASTER_ENABLE
|
||||
// PERF: rasterunit
|
||||
`CSR_MPM_RAS_READS : read_data_r = {`NUM_THREADS{raster_perf_if.mem_reads[31:0]}};
|
||||
`CSR_MPM_RAS_READS_H : read_data_r = {`NUM_THREADS{32'(raster_perf_if.mem_reads[`PERF_CTR_BITS-1:32])}};
|
||||
`CSR_MPM_RAS_LAT : read_data_r = {`NUM_THREADS{raster_perf_if.mem_latency[31:0]}};
|
||||
`CSR_MPM_RAS_LAT_H : read_data_r = {`NUM_THREADS{32'(raster_perf_if.mem_latency[`PERF_CTR_BITS-1:32])}};
|
||||
`endif
|
||||
`ifdef EXT_ROP_ENABLE
|
||||
// PERF: ropunit
|
||||
`CSR_MPM_ROP_READS : read_data_r = {`NUM_THREADS{rop_perf_if.mem_reads[31:0]}};
|
||||
|
@ -298,12 +305,8 @@ module VX_csr_data #(
|
|||
`CSR_MPM_ROP_WRITES_H : read_data_r = {`NUM_THREADS{32'(rop_perf_if.mem_writes[`PERF_CTR_BITS-1:32])}};
|
||||
`CSR_MPM_ROP_LAT : read_data_r = {`NUM_THREADS{rop_perf_if.mem_latency[31:0]}};
|
||||
`CSR_MPM_ROP_LAT_H : read_data_r = {`NUM_THREADS{32'(rop_perf_if.mem_latency[`PERF_CTR_BITS-1:32])}};
|
||||
`ifdef EXT_RASTER_ENABLE
|
||||
// PERF: rasterunit
|
||||
`CSR_MPM_RAS_READS : read_data_r = {`NUM_THREADS{raster_perf_if.mem_reads[31:0]}};
|
||||
`CSR_MPM_RAS_READS_H : read_data_r = {`NUM_THREADS{32'(raster_perf_if.mem_reads[`PERF_CTR_BITS-1:32])}};
|
||||
`CSR_MPM_RAS_LAT : read_data_r = {`NUM_THREADS{raster_perf_if.mem_latency[31:0]}};
|
||||
`CSR_MPM_RAS_LAT_H : read_data_r = {`NUM_THREADS{32'(raster_perf_if.mem_latency[`PERF_CTR_BITS-1:32])}};
|
||||
`CSR_MPM_ROP_INACTIVE_CYC:read_data_r = {`NUM_THREADS{rop_perf_if.inactive_cycles[31:0]}};
|
||||
`CSR_MPM_ROP_INACTIVE_CYC_H: read_data_r = {`NUM_THREADS{32'(rop_perf_if.inactive_cycles[`PERF_CTR_BITS-1:32])}};
|
||||
`endif
|
||||
// PERF: reserved
|
||||
`CSR_MPM_RESERVED : read_data_r = '0;
|
||||
|
|
|
@ -33,7 +33,7 @@ module VX_mem_unit # (
|
|||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_icache_if(), perf_dcache_if(), perf_smem_if();
|
||||
VX_perf_cache_if perf_icache_if(), perf_dcache_if(), perf_tcache_if(), perf_smem_if();
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -97,13 +97,16 @@
|
|||
`define CSR_MPM_ROP_READS_H 12'hB9D
|
||||
`define CSR_MPM_ROP_WRITES 12'hB1E // rop memory writes
|
||||
`define CSR_MPM_ROP_WRITES_H 12'hB9E
|
||||
`define CSR_MPM_ROP_LAT 12'hB1F // rop memory latency
|
||||
`define CSR_MPM_ROP_LAT_H 12'hB9F
|
||||
`define CSR_MPM_ROP_LAT 12'hB20 // rop memory latency
|
||||
`define CSR_MPM_ROP_LAT_H 12'hBA0
|
||||
`define CSR_MPM_ROP_INACTIVE_CYC 12'hB1F // rop inactive cycles
|
||||
`define CSR_MPM_ROP_INACTIVE_CYC_H 12'hB9F
|
||||
// PERF: rasterunit
|
||||
`define CSR_MPM_RAS_READS 12'hB20 // raster accesses
|
||||
`define CSR_MPM_RAS_READS_H 12'hBA0
|
||||
`define CSR_MPM_RAS_LAT 12'hB21 // raster latency
|
||||
`define CSR_MPM_RAS_LAT_H 12'hBA1
|
||||
`define CSR_MPM_RAS_READS 12'hB21 // raster accesses
|
||||
`define CSR_MPM_RAS_READS_H 12'hBA1
|
||||
`define CSR_MPM_RAS_LAT 12'hB22 // raster latency
|
||||
`define CSR_MPM_RAS_LAT_H 12'hBA2
|
||||
|
||||
|
||||
// Machine Information Registers
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
|
|
|
@ -5,18 +5,20 @@ interface VX_rop_perf_if ();
|
|||
wire [`PERF_CTR_BITS-1:0] mem_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] mem_writes;
|
||||
wire [`PERF_CTR_BITS-1:0] mem_latency;
|
||||
wire [`PERF_CTR_BITS-1:0] rop_inactive;
|
||||
wire [`PERF_CTR_BITS-1:0] inactive_cycles;
|
||||
|
||||
modport master (
|
||||
output mem_reads,
|
||||
output mem_writes,
|
||||
output mem_latency
|
||||
output mem_latency,
|
||||
output inactive_cycles
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input mem_reads,
|
||||
input mem_writes,
|
||||
input mem_latency
|
||||
input mem_latency,
|
||||
input inactive_cycles
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -41,19 +41,19 @@ module VX_rop_unit #(
|
|||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_rop_inactive;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_inactive_cycles;
|
||||
|
||||
wire perf_rop_inactive_cycle = ~rop_req_if.valid & rop_req_if.ready;
|
||||
wire perf_inactive_cycle = ~rop_req_if.valid & rop_req_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_rop_inactive <= 0;
|
||||
perf_inactive_cycles <= 0;
|
||||
end else begin
|
||||
perf_rop_inactive <= perf_rop_inactive + `PERF_CTR_BITS(perf_rop_inactive_cycle);
|
||||
perf_inactive_cycles <= perf_inactive_cycles + `PERF_CTR_BITS'(perf_inactive_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
assign rop_perf_if.rop_inactive = perf_rop_inactive;
|
||||
assign rop_perf_if.inactive_cycles = perf_inactive_cycles;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -179,7 +179,7 @@ module VX_tex_unit #(
|
|||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_rsp_per_cycle;
|
||||
|
||||
wire [`NUM_THREADS-1:0] perf_mem_req_per_req = cache_req_if.valid & cache_req_if.ready;
|
||||
wire [`NUM_THREADS-1:0] perf_mem_rsp_per_req = cache_rsp_if.tmask & {`NUM_THREADS{cache_rsp_if.valid & cache_rsp_if.ready}};
|
||||
wire [`NUM_THREADS-1:0] perf_mem_rsp_per_req = cache_rsp_if.valid & cache_rsp_if.ready;
|
||||
|
||||
`POP_COUNT(perf_mem_req_per_cycle, perf_mem_req_per_req);
|
||||
`POP_COUNT(perf_mem_rsp_per_cycle, perf_mem_rsp_per_req);
|
||||
|
|
94
perf/rop/perf.sh
Executable file
94
perf/rop/perf.sh
Executable file
|
@ -0,0 +1,94 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# ensure build
|
||||
make -s
|
||||
|
||||
simple()
|
||||
{
|
||||
echo "begin rop tests"
|
||||
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_8.png -w8 -h8" --perf | grep 'PERF' > ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_16.png -w16 -h16" --perf | grep 'PERF' >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_32.png -w32 -h32" --perf | grep 'PERF' >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_64.png -w64 -h64" --perf | grep 'PERF' >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png -w128 -h128" --perf | grep 'PERF' >> ./perf/rop/rop_perf.log
|
||||
|
||||
echo "rop tests done!"
|
||||
}
|
||||
|
||||
depth_stencil()
|
||||
{
|
||||
echo "begin rop tests (with depth-stencil)"
|
||||
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_8.png -w8 -h8 -d" --perf > ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_16.png -w16 -h16 -d" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_32.png -w32 -h32 -d" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_64.png -w64 -h64 -d" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png -w128 -h128 -d" --perf >> ./perf/rop/rop_perf.log
|
||||
|
||||
echo "rop tests done!"
|
||||
}
|
||||
|
||||
blend()
|
||||
{
|
||||
echo "begin rop tests (with blend)"
|
||||
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_8.png -w8 -h8 -b" --perf > ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_16.png -w16 -h16 -b" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_32.png -w32 -h32 -b" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_64.png -w64 -h64 -b" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png -w128 -h128 -b" --perf >> ./perf/rop/rop_perf.log
|
||||
|
||||
echo "rop tests done!"
|
||||
}
|
||||
|
||||
depth_stencil_blend()
|
||||
{
|
||||
echo "begin rop tests (with depth-stencil & blend)"
|
||||
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_8.png -w8 -h8 -b -d" --perf > ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_16.png -w16 -h16 -b -d" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_32.png -w32 -h32 -b -d" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_64.png -w64 -h64 -b -d" --perf >> ./perf/rop/rop_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/rop/rop_perf.log
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png -w128 -h128 -b -d" --perf >> ./perf/rop/rop_perf.log
|
||||
|
||||
echo "rop tests done!"
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "usage: [-d] [-b] [-db] [-h|--help]"
|
||||
}
|
||||
|
||||
case $1 in
|
||||
-d ) depth_stencil
|
||||
;;
|
||||
-b ) blend
|
||||
;;
|
||||
-db ) depth_stencil_blend
|
||||
;;
|
||||
-h | --help ) usage
|
||||
;;
|
||||
* ) simple
|
||||
;;
|
||||
esac
|
||||
shift
|
172
perf/rop/rop_perf.log
Normal file
172
perf/rop/rop_perf.log
Normal file
|
@ -0,0 +1,172 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE
|
||||
running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE make -C ./ci/../driver/rtlsim
|
||||
PERF: instrs=1005, cycles=3685, IPC=0.272727
|
||||
PERF: ibuffer stalls=14
|
||||
PERF: scoreboard stalls=421
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: loads=124
|
||||
PERF: stores=107
|
||||
PERF: branches=185
|
||||
PERF: icache reads=685
|
||||
PERF: icache read misses=38 (hit ratio=94%)
|
||||
PERF: dcache reads=32
|
||||
PERF: dcache writes=37
|
||||
PERF: dcache read misses=7 (hit ratio=78%)
|
||||
PERF: dcache write misses=32 (hit ratio=13%)
|
||||
PERF: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: smem reads=62
|
||||
PERF: smem writes=59
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: memory requests=103 (reads=41, writes=62)
|
||||
PERF: memory average latency=14 cycles
|
||||
PERF: tex memory reads=0
|
||||
PERF: tex memory latency=-2147483648 cycles
|
||||
PERF: rop memory reads=0
|
||||
PERF: rop memory writes=64
|
||||
PERF: rop memory latency=0
|
||||
PERF: rop inactive cycles=4102
|
||||
|
||||
**************************************
|
||||
|
||||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE
|
||||
running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE make -C ./ci/../driver/rtlsim
|
||||
PERF: instrs=2005, cycles=5275, IPC=0.380095
|
||||
PERF: ibuffer stalls=14
|
||||
PERF: scoreboard stalls=679
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: loads=124
|
||||
PERF: stores=107
|
||||
PERF: branches=385
|
||||
PERF: icache reads=1185
|
||||
PERF: icache read misses=38 (hit ratio=96%)
|
||||
PERF: dcache reads=32
|
||||
PERF: dcache writes=37
|
||||
PERF: dcache read misses=7 (hit ratio=78%)
|
||||
PERF: dcache write misses=32 (hit ratio=13%)
|
||||
PERF: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: smem reads=62
|
||||
PERF: smem writes=59
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: memory requests=103 (reads=41, writes=62)
|
||||
PERF: memory average latency=17 cycles
|
||||
PERF: tex memory reads=0
|
||||
PERF: tex memory latency=-2147483648 cycles
|
||||
PERF: rop memory reads=0
|
||||
PERF: rop memory writes=256
|
||||
PERF: rop memory latency=0
|
||||
PERF: rop inactive cycles=5596
|
||||
|
||||
**************************************
|
||||
|
||||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE
|
||||
running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE make -C ./ci/../driver/rtlsim
|
||||
PERF: instrs=5925, cycles=10831, IPC=0.547041
|
||||
PERF: ibuffer stalls=14
|
||||
PERF: scoreboard stalls=1675
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: loads=124
|
||||
PERF: stores=107
|
||||
PERF: branches=1169
|
||||
PERF: icache reads=3145
|
||||
PERF: icache read misses=38 (hit ratio=98%)
|
||||
PERF: dcache reads=32
|
||||
PERF: dcache writes=37
|
||||
PERF: dcache read misses=7 (hit ratio=78%)
|
||||
PERF: dcache write misses=32 (hit ratio=13%)
|
||||
PERF: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: smem reads=62
|
||||
PERF: smem writes=59
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: memory requests=103 (reads=41, writes=62)
|
||||
PERF: memory average latency=14 cycles
|
||||
PERF: tex memory reads=0
|
||||
PERF: tex memory latency=-2147483648 cycles
|
||||
PERF: rop memory reads=0
|
||||
PERF: rop memory writes=1024
|
||||
PERF: rop memory latency=0
|
||||
PERF: rop inactive cycles=10768
|
||||
|
||||
**************************************
|
||||
|
||||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE
|
||||
running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE make -C ./ci/../driver/rtlsim
|
||||
PERF: instrs=21445, cycles=33425, IPC=0.641586
|
||||
PERF: ibuffer stalls=14
|
||||
PERF: scoreboard stalls=5587
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: loads=124
|
||||
PERF: stores=107
|
||||
PERF: branches=4273
|
||||
PERF: icache reads=10905
|
||||
PERF: icache read misses=38 (hit ratio=99%)
|
||||
PERF: dcache reads=32
|
||||
PERF: dcache writes=37
|
||||
PERF: dcache read misses=7 (hit ratio=78%)
|
||||
PERF: dcache write misses=32 (hit ratio=13%)
|
||||
PERF: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: smem reads=62
|
||||
PERF: smem writes=59
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: memory requests=103 (reads=41, writes=62)
|
||||
PERF: memory average latency=16 cycles
|
||||
PERF: tex memory reads=0
|
||||
PERF: tex memory latency=-2147483648 cycles
|
||||
PERF: rop memory reads=0
|
||||
PERF: rop memory writes=4096
|
||||
PERF: rop memory latency=0
|
||||
PERF: rop inactive cycles=31826
|
||||
|
||||
**************************************
|
||||
|
||||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE
|
||||
running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DEXT_GFX_ENABLE make -C ./ci/../driver/rtlsim
|
||||
PERF: instrs=83205, cycles=122919, IPC=0.676909
|
||||
PERF: ibuffer stalls=14
|
||||
PERF: scoreboard stalls=21091
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: loads=124
|
||||
PERF: stores=107
|
||||
PERF: branches=16625
|
||||
PERF: icache reads=41785
|
||||
PERF: icache read misses=38 (hit ratio=99%)
|
||||
PERF: dcache reads=32
|
||||
PERF: dcache writes=37
|
||||
PERF: dcache read misses=7 (hit ratio=78%)
|
||||
PERF: dcache write misses=32 (hit ratio=13%)
|
||||
PERF: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: smem reads=62
|
||||
PERF: smem writes=59
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: memory requests=103 (reads=41, writes=62)
|
||||
PERF: memory average latency=14 cycles
|
||||
PERF: tex memory reads=0
|
||||
PERF: tex memory latency=-2147483648 cycles
|
||||
PERF: rop memory reads=0
|
||||
PERF: rop memory writes=16384
|
||||
PERF: rop memory latency=0
|
||||
PERF: rop inactive cycles=115176
|
Loading…
Add table
Add a link
Reference in a new issue