Rework simulation stats

Signed-off-by: Eric Matthews <ematthew@sfu.ca>
This commit is contained in:
Eric Matthews 2022-05-27 16:28:17 -04:00
parent 3239e20360
commit 6aeac17b9d
17 changed files with 400 additions and 361 deletions

View file

@ -39,13 +39,7 @@ module branch_unit
output branch_results_t br_results,
output logic branch_flush,
exception_interface.unit exception,
//Trace signals
output logic tr_branch_correct,
output logic tr_branch_misspredict,
output logic tr_return_correct,
output logic tr_return_misspredict
exception_interface.unit exception
);
logic branch_issued_r;
@ -161,14 +155,4 @@ module branch_unit
////////////////////////////////////////////////////
//Assertions
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin
assign tr_branch_correct = instruction_is_completing & ~is_return & ~branch_flush;
assign tr_branch_misspredict = instruction_is_completing & ~is_return & branch_flush;
assign tr_return_correct = instruction_is_completing & is_return & ~branch_flush;
assign tr_return_misspredict = instruction_is_completing & is_return & branch_flush;
end
endgenerate
endmodule

View file

@ -45,8 +45,6 @@ module cva5
wishbone_interface.master dwishbone,
wishbone_interface.master iwishbone,
output trace_outputs_t tr,
l2_requester_interface.master l2,
input interrupt_t s_interrupt,
@ -193,41 +191,6 @@ module cva5
//LS
wb_packet_t wb_snoop;
//Trace Interface Signals
logic tr_early_branch_correction;
logic tr_operand_stall;
logic tr_unit_stall;
logic tr_no_id_stall;
logic tr_no_instruction_stall;
logic tr_other_stall;
logic tr_branch_operand_stall;
logic tr_alu_operand_stall;
logic tr_ls_operand_stall;
logic tr_div_operand_stall;
logic tr_alu_op;
logic tr_branch_or_jump_op;
logic tr_load_op;
logic tr_store_op;
logic tr_mul_op;
logic tr_div_op;
logic tr_misc_op;
logic tr_instruction_issued_dec;
logic [31:0] tr_instruction_pc_dec;
logic [31:0] tr_instruction_data_dec;
logic tr_branch_correct;
logic tr_branch_misspredict;
logic tr_return_correct;
logic tr_return_misspredict;
logic tr_load_conflict_delay;
logic tr_rs1_forwarding_needed;
logic tr_rs2_forwarding_needed;
logic tr_rs1_and_rs2_forwarding_needed;
////////////////////////////////////////////////////
//Implementation
@ -310,8 +273,7 @@ module cva5
.tlb_on (tlb_on),
.l1_request (l1_request[L1_ICACHE_ID]),
.l1_response (l1_response[L1_ICACHE_ID]),
.exception (1'b0),
.tr_early_branch_correction (tr_early_branch_correction)
.exception (1'b0)
);
branch_predictor #(.CONFIG(CONFIG))
@ -410,26 +372,7 @@ module cva5
.unit_issue (unit_issue),
.gc (gc),
.current_privilege (current_privilege),
.exception (exception[PRE_ISSUE_EXCEPTION]),
.tr_operand_stall (tr_operand_stall),
.tr_unit_stall (tr_unit_stall),
.tr_no_id_stall (tr_no_id_stall),
.tr_no_instruction_stall (tr_no_instruction_stall),
.tr_other_stall (tr_other_stall),
.tr_branch_operand_stall (tr_branch_operand_stall),
.tr_alu_operand_stall (tr_alu_operand_stall),
.tr_ls_operand_stall (tr_ls_operand_stall),
.tr_div_operand_stall (tr_div_operand_stall),
.tr_alu_op (tr_alu_op),
.tr_branch_or_jump_op (tr_branch_or_jump_op),
.tr_load_op (tr_load_op),
.tr_store_op (tr_store_op),
.tr_mul_op (tr_mul_op),
.tr_div_op (tr_div_op),
.tr_misc_op (tr_misc_op),
.tr_instruction_issued_dec (tr_instruction_issued_dec),
.tr_instruction_pc_dec (tr_instruction_pc_dec),
.tr_instruction_data_dec (tr_instruction_data_dec)
.exception (exception[PRE_ISSUE_EXCEPTION])
);
////////////////////////////////////////////////////
@ -458,11 +401,7 @@ module cva5
.branch_inputs (branch_inputs),
.br_results (br_results),
.branch_flush (branch_flush),
.exception (exception[BR_EXCEPTION]),
.tr_branch_correct (tr_branch_correct),
.tr_branch_misspredict (tr_branch_misspredict),
.tr_return_correct (tr_return_correct),
.tr_return_misspredict (tr_return_misspredict)
.exception (exception[BR_EXCEPTION])
);
@ -498,8 +437,7 @@ module cva5
.retire_port_valid(retire_port_valid),
.exception (exception[LS_EXCEPTION]),
.load_store_status(load_store_status),
.wb (unit_wb[UNIT_IDS.LS]),
.tr_load_conflict_delay (tr_load_conflict_delay)
.wb (unit_wb[UNIT_IDS.LS])
);
generate if (CONFIG.INCLUDE_S_MODE) begin : gen_dtlb_dmmu
@ -635,40 +573,5 @@ module cva5
////////////////////////////////////////////////////
//Assertions
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin : gen_cva5_trace
always_ff @(posedge clk) begin
tr.events.early_branch_correction <= tr_early_branch_correction;
tr.events.operand_stall <= tr_operand_stall;
tr.events.unit_stall <= tr_unit_stall;
tr.events.no_id_stall <= tr_no_id_stall;
tr.events.no_instruction_stall <= tr_no_instruction_stall;
tr.events.other_stall <= tr_other_stall;
tr.events.instruction_issued_dec <= tr_instruction_issued_dec;
tr.events.branch_operand_stall <= tr_branch_operand_stall;
tr.events.alu_operand_stall <= tr_alu_operand_stall;
tr.events.ls_operand_stall <= tr_ls_operand_stall;
tr.events.div_operand_stall <= tr_div_operand_stall;
tr.events.alu_op <= tr_alu_op;
tr.events.branch_or_jump_op <= tr_branch_or_jump_op;
tr.events.load_op <= tr_load_op;
tr.events.store_op <= tr_store_op;
tr.events.mul_op <= tr_mul_op;
tr.events.div_op <= tr_div_op;
tr.events.misc_op <= tr_misc_op;
tr.events.branch_correct <= tr_branch_correct;
tr.events.branch_misspredict <= tr_branch_misspredict;
tr.events.return_correct <= tr_return_correct;
tr.events.return_misspredict <= tr_return_misspredict;
tr.events.load_conflict_delay <= tr_load_conflict_delay;
tr.events.rs1_forwarding_needed <= tr_rs1_forwarding_needed;
tr.events.rs2_forwarding_needed <= tr_rs2_forwarding_needed;
tr.events.rs1_and_rs2_forwarding_needed <= tr_rs1_and_rs2_forwarding_needed;
tr.instruction_pc_dec <= tr_instruction_pc_dec;
tr.instruction_data_dec <= tr_instruction_data_dec;
end
end
endgenerate
endmodule

View file

@ -304,12 +304,6 @@ package cva5_config;
PRE_ISSUE_EXCEPTION = 2
} exception_sources_t;
////////////////////////////////////////////////////
//Trace Options
//Trace interface is necessary for verilator simulation
localparam ENABLE_TRACE_INTERFACE = 1;
////////////////////////////////////////////////////
//L1 Arbiter IDs
localparam L1_CONNECTIONS = 4;

View file

@ -262,46 +262,49 @@ package cva5_types;
logic external;
} interrupt_t;
typedef struct packed {
//Fetch
logic early_branch_correction;
typedef enum {
FETCH_EARLY_BR_CORRECTION_STAT,
FETCH_SUB_UNIT_STALL_STAT,
FETCH_ID_STALL_STAT,
FETCH_IC_HIT_STAT,
FETCH_IC_MISS_STAT,
FETCH_IC_ARB_STALL_STAT,
//Decode
logic operand_stall;
logic unit_stall;
logic no_id_stall;
logic no_instruction_stall;
logic other_stall;
logic instruction_issued_dec;
logic branch_operand_stall;
logic alu_operand_stall;
logic ls_operand_stall;
logic div_operand_stall;
FETCH_BP_BR_CORRECT_STAT,
FETCH_BP_BR_MISPREDICT_STAT,
FETCH_BP_RAS_CORRECT_STAT,
FETCH_BP_RAS_MISPREDICT_STAT,
//Instruction mix
logic alu_op;
logic branch_or_jump_op;
logic load_op;
logic store_op;
logic mul_op;
logic div_op;
logic misc_op;
ISSUE_NO_INSTRUCTION_STAT,
ISSUE_NO_ID_STAT,
ISSUE_FLUSH_STAT,
ISSUE_UNIT_BUSY_STAT,
ISSUE_OPERANDS_NOT_READY_STAT,
ISSUE_HOLD_STAT,
ISSUE_MULTI_SOURCE_STAT,
ISSUE_OPERAND_STALL_ON_LOAD_STAT,
ISSUE_OPERAND_STALL_ON_MULTIPLY_STAT,
ISSUE_OPERAND_STALL_ON_DIVIDE_STAT,
ISSUE_OPERAND_STALL_FOR_BRANCH_STAT,
ISSUE_STORE_WITH_FORWARDED_DATA_STAT,
ISSUE_DIVIDER_RESULT_REUSE_STAT,
//Branch Unit
logic branch_correct;
logic branch_misspredict;
logic return_correct;
logic return_misspredict;
LSU_LOAD_BLOCKED_BY_STORE_STAT,
LSU_SUB_UNIT_STALL_STAT,
LSU_DC_HIT_STAT,
LSU_DC_MISS_STAT,
LSU_DC_ARB_STALL_STAT
} stats_t;
//Load Store Unit
logic load_conflict_delay;
//Register File
logic rs1_forwarding_needed;
logic rs2_forwarding_needed;
logic rs1_and_rs2_forwarding_needed;
} cva5_trace_events_t;
typedef enum {
ALU_STAT,
BR_STAT,
MUL_STAT,
DIV_STAT,
LOAD_STAT,
STORE_STAT,
MISC_STAT
} instruction_mix_stats_t;
typedef struct packed {
logic [31:0] pc;
@ -309,10 +312,4 @@ package cva5_types;
logic valid;
} trace_retire_outputs_t;
typedef struct packed {
logic [31:0] instruction_pc_dec;
logic [31:0] instruction_data_dec;
cva5_trace_events_t events;
} trace_outputs_t;
endpackage

View file

@ -72,31 +72,8 @@ module decode_and_issue
input gc_outputs_t gc,
input logic [1:0] current_privilege,
exception_interface.unit exception,
//Trace signals
output logic tr_operand_stall,
output logic tr_unit_stall,
output logic tr_no_id_stall,
output logic tr_no_instruction_stall,
output logic tr_other_stall,
output logic tr_branch_operand_stall,
output logic tr_alu_operand_stall,
output logic tr_ls_operand_stall,
output logic tr_div_operand_stall,
output logic tr_alu_op,
output logic tr_branch_or_jump_op,
output logic tr_load_op,
output logic tr_store_op,
output logic tr_mul_op,
output logic tr_div_op,
output logic tr_misc_op,
output logic tr_instruction_issued_dec,
output logic [31:0] tr_instruction_pc_dec,
output logic [31:0] tr_instruction_data_dec
);
exception_interface.unit exception
);
logic [2:0] fn3;
logic [6:0] opcode;
@ -639,33 +616,4 @@ module decode_and_issue
////////////////////////////////////////////////////
//Assertions
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin : gen_decode_trace
assign tr_operand_stall = issue.stage_valid & ~gc.fetch_flush & ~gc.issue_hold & ~pre_issue_exception_pending & ~operands_ready & |issue_ready;
assign tr_unit_stall = issue_valid & ~gc.fetch_flush & ~|issue_ready;
assign tr_no_id_stall = (~issue.stage_valid & ~pc_id_available & ~gc.fetch_flush); //All instructions in execution pipeline
assign tr_no_instruction_stall = (pc_id_available & ~issue.stage_valid) | gc.fetch_flush;
assign tr_other_stall = issue.stage_valid & ~instruction_issued & ~(tr_operand_stall | tr_unit_stall | tr_no_id_stall | tr_no_instruction_stall);
assign tr_branch_operand_stall = tr_operand_stall & unit_needed_issue_stage[UNIT_IDS.BR];
assign tr_alu_operand_stall = tr_operand_stall & unit_needed_issue_stage[UNIT_IDS.ALU] & ~unit_needed_issue_stage[UNIT_IDS.BR];
assign tr_ls_operand_stall = tr_operand_stall & unit_needed_issue_stage[UNIT_IDS.LS];
assign tr_div_operand_stall = tr_operand_stall & unit_needed_issue_stage[UNIT_IDS.DIV];
//Instruction Mix
always_ff @(posedge clk) begin
tr_alu_op <= issue_to[UNIT_IDS.ALU];
tr_branch_or_jump_op <= issue_to[UNIT_IDS.BR];
tr_load_op <= issue_to[UNIT_IDS.LS] & is_load_r;
tr_store_op <= issue_to[UNIT_IDS.LS] & is_store_r;
tr_mul_op <= issue_to[UNIT_IDS.MUL];
tr_div_op <= issue_to[UNIT_IDS.DIV];
tr_misc_op <= issue_to[UNIT_IDS.CSR] | issue_to[UNIT_IDS.IEC];
end
assign tr_instruction_issued_dec = instruction_issued;
assign tr_instruction_pc_dec = issue.pc;
assign tr_instruction_data_dec = issue.instruction;
end endgenerate
endmodule

View file

@ -60,10 +60,7 @@ module fetch
wishbone_interface.master iwishbone,
input logic icache_on,
l1_arbiter_request_interface.master l1_request,
l1_arbiter_return_interface.master l1_response,
//Trace Interface
output logic tr_early_branch_correction
l1_arbiter_return_interface.master l1_response
);
localparam NUM_SUB_UNITS = int'(CONFIG.INCLUDE_ILOCAL_MEM) + int'(CONFIG.INCLUDE_ICACHE) + int'(CONFIG.INCLUDE_IBUS);
@ -308,8 +305,6 @@ module fetch
assign is_branch_or_jump = fetch_instruction[6:2] inside {JAL_T, JALR_T, BRANCH_T};
assign early_branch_flush = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_predicted_branch_or_jump & (~is_branch_or_jump);
assign early_branch_flush_ras_adjust = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_branch & (~is_branch_or_jump);
if (ENABLE_TRACE_INTERFACE)
assign tr_early_branch_correction = early_branch_flush;
end endgenerate
////////////////////////////////////////////////////
//End of Implementation

View file

@ -40,9 +40,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
//Retire release
input id_t retire_ids [RETIRE_PORTS],
input logic retire_port_valid [RETIRE_PORTS],
output logic tr_possible_load_conflict_delay
input logic retire_port_valid [RETIRE_PORTS]
);
typedef struct packed {
@ -68,7 +66,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
//Can accept requests so long as store queue is not needed or is not full
assign lsq.full = lsq.data_in.store & sq.full;
//Address hash for load-store collision checking
addr_hash lsq_addr_hash (
.clk (clk),
@ -149,11 +147,4 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
////////////////////////////////////////////////////
//Assertions
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin : gen_lsq_trace
assign tr_possible_load_conflict_delay = lq.valid & (store_conflict | (sq.full & sq.valid));
end
endgenerate
endmodule

View file

@ -63,9 +63,7 @@ module load_store_unit
exception_interface.unit exception,
output load_store_status_t load_store_status,
unit_writeback_interface.unit wb,
output logic tr_load_conflict_delay
unit_writeback_interface.unit wb
);
localparam NUM_SUB_UNITS = int'(CONFIG.INCLUDE_DLOCAL_MEM) + int'(CONFIG.INCLUDE_PERIPHERAL_BUS) + int'(CONFIG.INCLUDE_DCACHE);
@ -130,12 +128,8 @@ module load_store_unit
fifo_interface #(.DATA_WIDTH($bits(load_attributes_t))) load_attributes();
load_store_queue_interface lsq();
logic tr_possible_load_conflict_delay;
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Alignment Exception
@ -232,8 +226,7 @@ module load_store_unit
.lsq (lsq),
.wb_snoop (wb_snoop),
.retire_ids (retire_ids),
.retire_port_valid (retire_port_valid),
.tr_possible_load_conflict_delay (tr_possible_load_conflict_delay)
.retire_port_valid (retire_port_valid)
);
assign shared_inputs = lsq.data_out;
assign lsq.pop = sub_unit_issue;
@ -436,11 +429,4 @@ module load_store_unit
// else $error("invalid L/S address");
// `endif
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin : gen_ls_trace
assign tr_load_conflict_delay = tr_possible_load_conflict_delay & units_ready;
end
endgenerate
endmodule

View file

@ -84,7 +84,6 @@ module cva5_wrapper_xilinx
avalon_interface m_avalon ();
wishbone_interface dwishbone ();
wishbone_interface iwishbone ();
trace_outputs_t tr;
logic timer_interrupt;
logic interrupt;

View file

@ -290,7 +290,6 @@ module litex_wrapper
avalon_interface m_avalon();
local_memory_interface instruction_bram();
local_memory_interface data_bram();
trace_outputs_t tr;
interrupt_t s_interrupt;
//L2 to Wishbone

177
test_benches/sim_stats.sv Normal file
View file

@ -0,0 +1,177 @@
/*
* Copyright © 2022 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module sim_stats
import cva5_config::*;
import cva5_types::*;
#(
parameter NUM_OF_STATS = 32,
parameter NUM_INSTRUCTION_MIX_STATS = 5
)
(
input logic clk,
input logic rst,
input logic start_collection,
input logic end_collection,
input logic stats [NUM_OF_STATS],
input logic [NUM_INSTRUCTION_MIX_STATS-1:0] instruction_mix_stats [RETIRE_PORTS],
input retire_packet_t retire
);
int log_file;
logic en;
logic [63:0] instructions_retired;
logic [63:0] cycle_count;
logic [63:0] stat_count [NUM_OF_STATS];
logic [63:0] instruction_mix_stat_count [NUM_INSTRUCTION_MIX_STATS-1:0];
logic [$clog2(RETIRE_PORTS):0] instruction_mix_inc [NUM_INSTRUCTION_MIX_STATS-1:0];
function void print_stats ();
$display("Fetch---------------------------------------------------------");
$display("Early Branch Correction : %-d", stat_count[FETCH_EARLY_BR_CORRECTION_STAT]);
$display("Sub Unit Stall : %-d", stat_count[FETCH_SUB_UNIT_STALL_STAT]);
$display("No ID available : %-d", stat_count[FETCH_ID_STALL_STAT]);
$display("Instruction Cache");
$display(" Hits : %-d", stat_count[FETCH_IC_HIT_STAT]);
$display(" Misses : %-d", stat_count[FETCH_IC_MISS_STAT]);
$display(" Arbiter stall : %-d", stat_count[FETCH_IC_ARB_STALL_STAT]);
$display("Branch Predictor");
$display(" Branches");
$display(" Correct : %-d", stat_count[FETCH_BP_BR_CORRECT_STAT]);
$display(" Mispredict : %-d", stat_count[FETCH_BP_BR_MISPREDICT_STAT]);
$display(" Returns (RAS)");
$display(" Correct : %-d", stat_count[FETCH_BP_RAS_CORRECT_STAT]);
$display(" Mispredict : %-d", stat_count[FETCH_BP_RAS_MISPREDICT_STAT]);
$display("Issue---------------------------------------------------------");
$display("Stall Sources");
$display(" No Instruction : %-d", stat_count[ISSUE_NO_INSTRUCTION_STAT]);
$display(" Max IDs Issued : %-d", stat_count[ISSUE_NO_ID_STAT]);
$display(" Flush : %-d", stat_count[ISSUE_FLUSH_STAT]);
$display(" Unit Busy : %-d", stat_count[ISSUE_UNIT_BUSY_STAT]);
$display(" Operands Not Ready : %-d", stat_count[ISSUE_OPERANDS_NOT_READY_STAT]);
$display(" Hold : %-d", stat_count[ISSUE_HOLD_STAT]);
$display(" Multi-Source : %-d", stat_count[ISSUE_MULTI_SOURCE_STAT]);
$display("Operand Stall Waiting On");
$display(" Load : %-d", stat_count[ISSUE_OPERAND_STALL_ON_LOAD_STAT]);
$display(" Multiply : %-d", stat_count[ISSUE_OPERAND_STALL_ON_MULTIPLY_STAT]);
$display(" Divide : %-d", stat_count[ISSUE_OPERAND_STALL_ON_DIVIDE_STAT]);
$display("Operands Stall (Branch) : %-d", stat_count[ISSUE_OPERAND_STALL_FOR_BRANCH_STAT]);
$display("Store with Forwarded Data : %-d", stat_count[ISSUE_STORE_WITH_FORWARDED_DATA_STAT]);
$display("Divider Result Reuse : %-d", stat_count[ISSUE_DIVIDER_RESULT_REUSE_STAT]);
$display("Load-Store Unit-----------------------------------------------");
$display("Load Blocked by Store : %-d", stat_count[LSU_LOAD_BLOCKED_BY_STORE_STAT]);
$display("Sub Unit Stall : %-d", stat_count[LSU_SUB_UNIT_STALL_STAT]);
$display("Data Cache");
$display(" Hits : %-d", stat_count[LSU_DC_HIT_STAT]);
$display(" Misses : %-d", stat_count[LSU_DC_MISS_STAT]);
$display(" Arbiter stall : %-d", stat_count[LSU_DC_ARB_STALL_STAT]);
$display("Retire--------------------------------------------------------");
$display("Instructions Retired : %-d", instructions_retired);
$display("Runtime (cycles) : %-d", cycle_count);
$display("IPC : %-f", real'(instructions_retired)/real'(cycle_count));
$display("Instruction Mix");
$display(" Basic ALU : %-d", instruction_mix_stat_count[ALU_STAT]);
$display(" Branch or Jump : %-d", instruction_mix_stat_count[BR_STAT]);
$display(" Multiply : %-d", instruction_mix_stat_count[MUL_STAT]);
$display(" Divide : %-d", instruction_mix_stat_count[DIV_STAT]);
$display(" Load : %-d", instruction_mix_stat_count[LOAD_STAT]);
$display(" Store : %-d", instruction_mix_stat_count[STORE_STAT]);
$display(" Misc : %-d", instruction_mix_stat_count[MISC_STAT]);
$display("");
endfunction
import "DPI-C" function string cva5_csv_log_file_name();
function void print_stats_csv ();
stats_t stat_enum;
instruction_mix_stats_t instruction_mix_stat_enum;
if (log_file != 0) begin
$fdisplay(log_file, "Instructions Retired,%-d", instructions_retired);
$fdisplay(log_file, "Runtime (cycles),%-d", cycle_count);
$fdisplay(log_file, "IPC,%-f", real'(instructions_retired)/real'(cycle_count));
foreach(stat_count[i]) begin
stat_enum = stats_t'(i);
$fdisplay(log_file, "%s,%-d", stat_enum.name(), stat_count[i]);
end
foreach(instruction_mix_stat_count[i]) begin
instruction_mix_stat_enum = instruction_mix_stats_t'(i);
$fdisplay(log_file, "%s,%-d", instruction_mix_stat_enum.name(), instruction_mix_stat_count[i]);
end
$fclose(log_file);
end
endfunction
////////////////////////////////////////////////////
//Implementation
initial begin
if (cva5_csv_log_file_name() != "")
log_file = $fopen(cva5_csv_log_file_name(), "w");
end
always_ff @ (posedge clk) begin
if (end_collection) begin
print_stats();
print_stats_csv();
end
end
always_ff @ (posedge clk) begin
if (rst)
en <= 0;
else
en <= (en & ~end_collection) | start_collection;
end
always_comb begin
instruction_mix_inc = '{default: 0};
for (int i = 0; i < RETIRE_PORTS; i++) begin
for (int j = 0; j < NUM_INSTRUCTION_MIX_STATS; j++) begin
instruction_mix_inc[j] += ($clog2(RETIRE_PORTS)+1)'(instruction_mix_stats[i][j]);
end
end
end
always_ff @ (posedge clk) begin
if (rst) begin
instructions_retired <= 0;
cycle_count <= 0;
foreach (stat_count[i])
stat_count[i] <= 0;
foreach (instruction_mix_stat_count[i])
instruction_mix_stat_count[i] <=0;
end
if (en) begin
instructions_retired <= instructions_retired + 64'(retire.count);
cycle_count <= cycle_count + 1;
foreach (stat_count[i])
stat_count[i] <= stat_count[i] + 64'(stats[i]);
foreach (instruction_mix_stat_count[i])
instruction_mix_stat_count[i] <= instruction_mix_stat_count[i] + 64'(instruction_mix_inc[i]);
end
end
endmodule

View file

@ -186,8 +186,6 @@ module axi_l2_test # (
avalon_interface m_avalon();
wishbone_interface dwishbone();
trace_outputs_t tr;
l2_requester_interface l2[L2_NUM_PORTS-1:0]();
l2_memory_interface mem();

View file

@ -51,7 +51,7 @@ bool CVA5Tracer::has_terminated() {
bool CVA5Tracer::has_stalled() {
if (!tb->instruction_issued) {
if (!tb->retire_ports_valid[0]) {
if (stall_count > stall_limit) {
stall_count = 0;
std::cout << "\n\nError!!!!\n";
@ -70,31 +70,6 @@ bool CVA5Tracer::store_queue_empty() {
return tb->store_queue_empty;
}
void CVA5Tracer::reset_stats() {
for (int i=0; i < numEvents; i++)
event_counters[i] = 0;
}
void CVA5Tracer::update_stats() {
if (collect_stats) {
for (int i=0; i < numEvents; i++)
event_counters[i] += tb->cva5_events[i];
}
}
void CVA5Tracer::print_stats() {
std::cout << " CVA5 trace stats\n";
std::cout << "--------------------------------------------------------------\n";
for (int i=0; i < numEvents; i++)
std::cout << " " << eventNames[i] << ":" << event_counters[i] << std::endl;
std::cout << "--------------------------------------------------------------\n\n";
}
void CVA5Tracer::reset() {
tb->clk = 0;
tb->rst = 1;
@ -103,10 +78,7 @@ void CVA5Tracer::reset() {
}
tb->rst = 0;
reset_stats();
std::cout << "DONE System reset \n" << std::flush;
}
void CVA5Tracer::set_log_file(std::ofstream* logFile) {
@ -154,22 +126,9 @@ void CVA5Tracer::tick() {
verilatorWaveformTracer->dump(vluint32_t(cycle_count));
#endif
if (check_if_instruction_retired(BENCHMARK_START_COLLECTION_NOP)) {
reset_stats();
collect_stats = true;
}
else if (check_if_instruction_retired(BENCHMARK_RESUME_COLLECTION_NOP)) {
collect_stats = true;
}
else if (check_if_instruction_retired(BENCHMARK_END_COLLECTION_NOP)) {
collect_stats = false;
}
tb->clk = 1;
tb->eval();
axi_ddr->step();
update_stats();
update_UART();
update_memory();

View file

@ -39,39 +39,6 @@
#define ERROR_TERMINATION_NOP 0x00F00013U
#define SUCCESS_TERMINATION_NOP 0x00A00013U
template <typename T, int N>
constexpr int arraySize(T(&)[N]) { return N; }
static const char * const eventNames[] = {
"early_branch_correction",
"operand_stall",
"unit_stall",
"no_id_stall",
"no_instruction_stall",
"other_stall",
"instruction_issued_dec",
"branch_operand_stall",
"alu_operand_stall",
"ls_operand_stall",
"div_operand_stall",
"alu_op",
"branch_or_jump_op",
"load_op",
"store_op",
"mul_op",
"div_op",
"misc_op",
"branch_correct",
"branch_misspredict",
"return_correct",
"return_misspredict",
"load_conflict_delay",
"rs1_forwarding_needed",
"rs2_forwarding_needed",
"rs1_and_rs2_forwarding_needed"
};
static const int numEvents = arraySize(eventNames);
//Testbench with CVA5 trace outputs on toplevel
class CVA5Tracer {
public:
@ -81,8 +48,6 @@ public:
bool has_terminated();
bool has_stalled();
bool store_queue_empty();
void print_stats();
void reset_stats();
void reset();
void tick();
@ -107,16 +72,12 @@ private:
int stall_limit = 2000;
int stall_count = 0;
uint64_t cycle_count = 0;
uint64_t event_counters[numEvents];
bool collect_stats = false;
bool program_complete = false;
void update_stats();
void update_UART();
void update_memory();
uint32_t instruction_r;
uint32_t data_out_r;
};
#endif

View file

@ -3,16 +3,22 @@
#include <fstream>
#include "verilated.h"
#include "verilated_vcd_c.h"
#include "svdpi.h"
#include "Vcva5_sim__Dpi.h"
#include "Vcva5_sim.h"
#include "CVA5Tracer.h"
CVA5Tracer *cva5Tracer;
char* csv_log_name;
//For time index on assertions
double sc_time_stamp () {
return cva5Tracer->get_cycle_count();
}
const char* cva5_csv_log_file_name () {
return csv_log_name;
}
//#define TRACE_ON
using namespace std;
int main(int argc, char **argv) {
@ -42,11 +48,8 @@ int main(int argc, char **argv) {
exit(EXIT_FAILURE);
}
logFile.open (argv[1]);
sigFile.open (argv[2]);
//printf("HW INIT:%s \n", argv[3]);
programFile.open (argv[3]);
if (!logFile.is_open()) {
@ -58,6 +61,12 @@ int main(int argc, char **argv) {
exit(EXIT_FAILURE);
}
char default_csv = '\0';
if (argv[6]) {
csv_log_name = argv[6];
} else {
csv_log_name = &default_csv;
}
// Create an instance of our module under test
cva5Tracer = new CVA5Tracer(programFile);
cva5Tracer->set_log_file(&logFile);
@ -93,8 +102,8 @@ int main(int argc, char **argv) {
}
cout << "--------------------------------------------------------------\n";
cout << " Simulation Completed " << cva5Tracer->get_cycle_count() << " cycles.\n";
cva5Tracer->print_stats();
cout << " Simulation Completed\n";
cout << "--------------------------------------------------------------\n";
logFile.close();
sigFile.close();

View file

@ -167,12 +167,7 @@ module cva5_sim
output logic [31:0] retire_ports_instruction [RETIRE_PORTS],
output logic [31:0] retire_ports_pc [RETIRE_PORTS],
output logic retire_ports_valid [RETIRE_PORTS],
output logic store_queue_empty,
output logic instruction_issued,
output logic cva5_events [0:$bits(cva5_trace_events_t)-1],
output logic [31:0] instruction_pc_dec,
output logic [31:0] instruction_data_dec
output logic store_queue_empty
);
logic [3:0] WRITE_COUNTER_MAX;
@ -237,8 +232,6 @@ module cva5_sim
wishbone_interface dwishbone();
wishbone_interface iwishbone();
trace_outputs_t tr;
l2_requester_interface l2[L2_NUM_PORTS-1:0]();
l2_memory_interface mem();
@ -431,16 +424,162 @@ module cva5_sim
////////////////////////////////////////////////////
//Trace Interface
assign instruction_pc_dec = tr.instruction_pc_dec;
assign instruction_data_dec = tr.instruction_data_dec;
assign instruction_issued = tr.events.instruction_issued_dec;
logic [$bits(cva5_trace_events_t)-1:0] cva5_events_packed;
assign cva5_events_packed = tr.events;
localparam BENCHMARK_START_COLLECTION_NOP = 32'h00C00013;
localparam BENCHMARK_END_COLLECTION_NOP = 32'h00D00013;
logic start_collection;
logic end_collection;
//NOP detection
always_comb begin
foreach(cva5_events_packed[i])
cva5_events[$bits(cva5_trace_events_t)-1-i] = cva5_events_packed[i];
start_collection = 0;
end_collection = 0;
foreach(retire_ports_valid[i]) begin
start_collection |= retire_ports_valid[i] & (retire_ports_instruction[i] == BENCHMARK_START_COLLECTION_NOP);
end_collection |= retire_ports_valid[i] & (retire_ports_instruction[i] == BENCHMARK_END_COLLECTION_NOP);
end
end
//Hierarchy paths for major components
`define FETCH_P cpu.fetch_block
`define ICACHE_P cpu.fetch_block.gen_fetch_icache.i_cache
`define BRANCH_P cpu.branch_unit_block
`define ISSUE_P cpu.decode_and_issue_block
`define RENAME_P cpu.renamer_block
`define METADATA_P cpu.id_block
`define LS_P cpu.load_store_unit_block
`define LSQ_P cpu.load_store_unit_block.lsq_block
`define DCACHE_P cpu.load_store_unit_block.gen_ls_dcache.data_cache
stats_t stats_enum;
instruction_mix_stats_t instruction_mix_enum;
localparam NUM_STATS = stats_enum.num();
localparam NUM_INSTRUCTION_MIX_STATS = instruction_mix_enum.num();
logic stats [NUM_STATS];
logic is_mul [RETIRE_PORTS];
logic is_div [RETIRE_PORTS];
logic [NUM_INSTRUCTION_MIX_STATS-1:0] instruction_mix_stats [RETIRE_PORTS];
logic icache_hit;
logic icache_miss;
logic iarb_stall;
logic dcache_hit;
logic dcache_miss;
logic darb_stall;
//Issue stalls
logic base_no_instruction_stall;
logic base_no_id_sub_stall;
logic base_flush_sub_stall;
logic base_unit_busy_stall;
logic base_operands_stall;
logic base_hold_stall;
logic single_source_issue_stall;
logic [3:0] stall_source_count;
///////////////
//Issue phys_rd to unit mem
//Used for determining what outputs an operand stall is waiting on
logic [`ISSUE_P.NUM_UNITS-1:0] phys_addr_table [64];
always_ff @(posedge clk) begin
if (cpu.instruction_issued_with_rd)
phys_addr_table[`ISSUE_P.issue.phys_rd_addr] <= `ISSUE_P.unit_needed_issue_stage;
end
generate if (EXAMPLE_CONFIG.INCLUDE_ICACHE) begin
assign icache_hit = `ICACHE_P.tag_hit;
assign icache_miss = `ICACHE_P.second_cycle & ~`ICACHE_P.tag_hit;
assign iarb_stall = `ICACHE_P.request_r & ~cpu.l1_request[L1_ICACHE_ID].ack;
end endgenerate
generate if (EXAMPLE_CONFIG.INCLUDE_DCACHE) begin
assign dcache_hit = `DCACHE_P.read_hit;
assign dcache_miss = `DCACHE_P.read_miss_complete;
assign darb_stall = `DCACHE_P.arb_request_r;
end endgenerate
always_comb begin
stats = '{default: '0};
//Fetch
stats[FETCH_EARLY_BR_CORRECTION_STAT] = `FETCH_P.early_branch_flush;
stats[FETCH_SUB_UNIT_STALL_STAT] = `METADATA_P.pc_id_available & ~`FETCH_P.units_ready;
stats[FETCH_ID_STALL_STAT] = ~`METADATA_P.pc_id_available;
stats[FETCH_IC_HIT_STAT] = icache_hit;
stats[FETCH_IC_MISS_STAT] = icache_miss;
stats[FETCH_IC_ARB_STALL_STAT] = iarb_stall;
//Branch predictor
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & `BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & `BRANCH_P.branch_flush;
//Issue stalls
base_no_instruction_stall = ~`ISSUE_P.issue.stage_valid | cpu.gc.fetch_flush;
base_no_id_sub_stall = (`METADATA_P.post_issue_count == MAX_IDS);
base_flush_sub_stall = cpu.gc.fetch_flush;
base_unit_busy_stall = `ISSUE_P.issue.stage_valid & ~|`ISSUE_P.issue_ready;
base_operands_stall = `ISSUE_P.issue.stage_valid & ~`ISSUE_P.operands_ready;
base_hold_stall = `ISSUE_P.issue.stage_valid & (cpu.gc.issue_hold | `ISSUE_P.pre_issue_exception_pending);
stall_source_count = 4'(base_no_instruction_stall) + 4'(base_unit_busy_stall) + 4'(base_operands_stall) + 4'(base_hold_stall);
single_source_issue_stall = (stall_source_count == 1);
//Issue stall determination
stats[ISSUE_NO_INSTRUCTION_STAT] = base_no_instruction_stall & single_source_issue_stall;
stats[ISSUE_NO_ID_STAT] = base_no_instruction_stall & base_no_id_sub_stall & single_source_issue_stall;
stats[ISSUE_FLUSH_STAT] = base_no_instruction_stall & base_flush_sub_stall & single_source_issue_stall;
stats[ISSUE_UNIT_BUSY_STAT] = base_unit_busy_stall & single_source_issue_stall;
stats[ISSUE_OPERANDS_NOT_READY_STAT] = base_operands_stall & single_source_issue_stall;
stats[ISSUE_HOLD_STAT] = base_hold_stall & single_source_issue_stall;
stats[ISSUE_MULTI_SOURCE_STAT] = (base_no_instruction_stall | base_unit_busy_stall | base_operands_stall | base_hold_stall) & ~single_source_issue_stall;
//Misc Issue stats
stats[ISSUE_OPERAND_STALL_FOR_BRANCH_STAT] = stats[ISSUE_OPERANDS_NOT_READY_STAT] & `ISSUE_P.unit_needed_issue_stage[`ISSUE_P.UNIT_IDS.BR];
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `ISSUE_P.is_store_r & `ISSUE_P.ls_inputs.forwarded_store;
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `ISSUE_P.gen_decode_div_inputs.div_op_reuse;
//Issue Stall Source
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin
stats[ISSUE_OPERAND_STALL_ON_LOAD_STAT] |= `ISSUE_P.issue.stage_valid & phys_addr_table[`ISSUE_P.issue_phys_rs_addr[i]][`ISSUE_P.UNIT_IDS.LS] & `ISSUE_P.rs_conflict[i] ;
stats[ISSUE_OPERAND_STALL_ON_MULTIPLY_STAT] |= EXAMPLE_CONFIG.INCLUDE_MUL & `ISSUE_P.issue.stage_valid & phys_addr_table[`ISSUE_P.issue_phys_rs_addr[i]][`ISSUE_P.UNIT_IDS.MUL] & `ISSUE_P.rs_conflict[i] ;
stats[ISSUE_OPERAND_STALL_ON_DIVIDE_STAT] |= EXAMPLE_CONFIG.INCLUDE_DIV & `ISSUE_P.issue.stage_valid & phys_addr_table[`ISSUE_P.issue_phys_rs_addr[i]][`ISSUE_P.UNIT_IDS.DIV] & `ISSUE_P.rs_conflict[i] ;
end
//LS Stats
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.store_conflict;
stats[LSU_SUB_UNIT_STALL_STAT] = `LS_P.lsq.valid & ~`LS_P.units_ready;
stats[LSU_DC_HIT_STAT] = dcache_hit;
stats[LSU_DC_MISS_STAT] = dcache_miss;
stats[LSU_DC_ARB_STALL_STAT] = darb_stall;
//Retire Instruction Mix
for (int i = 0; i < RETIRE_PORTS; i++) begin
is_mul[i] = retire_ports_instruction[i][25] & ~retire_ports_instruction[i][14];
is_div[i] = retire_ports_instruction[i][25] & retire_ports_instruction[i][14];
instruction_mix_stats[i][ALU_STAT] = cpu.retire_port_valid[i] & (retire_ports_instruction[i][6:2] inside {ARITH_T, ARITH_IMM_T, AUIPC_T, LUI_T}) & ~(is_mul[i] | is_div[i]);
instruction_mix_stats[i][BR_STAT] = cpu.retire_port_valid[i] & (retire_ports_instruction[i][6:2] inside {BRANCH_T, JAL_T, JALR_T});
instruction_mix_stats[i][MUL_STAT] = cpu.retire_port_valid[i] & (retire_ports_instruction[i][6:2] inside {ARITH_T}) & is_mul[i];
instruction_mix_stats[i][DIV_STAT] = cpu.retire_port_valid[i] & (retire_ports_instruction[i][6:2] inside {ARITH_T}) & is_div[i];
instruction_mix_stats[i][LOAD_STAT] = cpu.retire_port_valid[i] & (retire_ports_instruction[i][6:2] inside {LOAD_T, AMO_T});
instruction_mix_stats[i][STORE_STAT] = cpu.retire_port_valid[i] & (retire_ports_instruction[i][6:2] inside {STORE_T, AMO_T});
instruction_mix_stats[i][MISC_STAT] = cpu.retire_port_valid[i] & (retire_ports_instruction[i][6:2] inside {SYSTEM_T, FENCE_T});
end
end
sim_stats #(.NUM_OF_STATS(NUM_STATS), .NUM_INSTRUCTION_MIX_STATS(NUM_INSTRUCTION_MIX_STATS)) stats_block (
.clk (clk),
.rst (rst),
.start_collection (start_collection),
.end_collection (end_collection),
.stats (stats),
.instruction_mix_stats (instruction_mix_stats),
.retire (cpu.retire)
);
////////////////////////////////////////////////////
//Performs the lookups to provide the speculative architectural register file with
//standard register names for simulation purposes
@ -448,7 +587,7 @@ module cva5_sim
logic [31:0][31:0] sim_registers_unamed;
simulation_named_regfile sim_register;
typedef struct packed{
typedef struct packed{
phys_addr_t phys_addr;
logic [$clog2(EXAMPLE_CONFIG.NUM_WB_GROUPS)-1:0] wb_group;
} spec_table_t;

View file

@ -91,7 +91,7 @@ $(CVA5_SIM): $(CVA5_HW_SRCS) $(CVA5_SIM_SRCS)
-o cva5-sim \
$(VERILATOR_LINT_IGNORE) $(VERILATOR_CFLAGS) \
$(CVA5_SIM_SRCS) \
$(CVA5_HW_SRCS) $(VERILATOR_DIR)/cva5_sim.sv --top-module cva5_sim
$(CVA5_HW_SRCS) $(CVA5_DIR)/test_benches/sim_stats.sv $(VERILATOR_DIR)/cva5_sim.sv --top-module cva5_sim
$(MAKE) -C $(CVA5_SIM_DIR) -f Vcva5_sim.mk
.PHONY: clean-cva5-sim