mirror of
https://github.com/openhwgroup/cve2.git
synced 2025-04-20 20:19:08 -04:00
* rename files and modules to cve2 Signed-off-by: Szymon Bieganski <szymon.bieganski@oss.nxp.com> * updated tb files Signed-off-by: Szymon Bieganski <szymon.bieganski@oss.nxp.com> * remaining references to ibex: gitignore, examples, etc. Signed-off-by: Szymon Bieganski <szymon.bieganski@oss.nxp.com> Signed-off-by: Szymon Bieganski <szymon.bieganski@oss.nxp.com> Co-authored-by: Szymon Bieganski <szymon.bieganski@oss.nxp.com>
714 lines
33 KiB
Systemverilog
714 lines
33 KiB
Systemverilog
// Copyright lowRISC contributors.
|
|
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
// A formal testbench for the ICache. This gets bound into the actual ICache DUT.
|
|
|
|
`include "prim_assert.sv"
|
|
|
|
// A macro to emulate |-> (a syntax that Yosys doesn't currently support).
|
|
`define IMPLIES(a, b) ((b) || (!(a)))
|
|
|
|
`define IS_ONE_HOT(expr, width) \
|
|
!((expr) & ((expr) - {{(width)-1{1'b0}}, 1'b1}))
|
|
|
|
`define ASSUME_ZERO_IN_RESET(name) \
|
|
`ASSUME(name``_zero_in_reset, `IMPLIES(!rst_ni, ~|(name)), clk_i, 1'b0)
|
|
|
|
module formal_tb
|
|
import cve2_pkg::*;
|
|
#(
|
|
// DUT parameters
|
|
parameter bit BranchPredictor = 1'b0,
|
|
parameter bit ICacheECC = 1'b0,
|
|
parameter bit BranchCache = 1'b0,
|
|
|
|
// Internal parameters / localparams
|
|
parameter int unsigned NUM_FB = 4
|
|
) (
|
|
// Top-level ports
|
|
input logic clk_i,
|
|
input logic rst_ni,
|
|
input logic req_i,
|
|
input logic branch_i,
|
|
input logic branch_spec_i,
|
|
input logic predicted_branch_i,
|
|
input logic branch_mispredict_i,
|
|
input logic [31:0] addr_i,
|
|
input logic ready_i,
|
|
input logic valid_o,
|
|
input logic [31:0] rdata_o,
|
|
input logic [31:0] addr_o,
|
|
input logic err_o,
|
|
input logic err_plus2_o,
|
|
input logic instr_req_o,
|
|
input logic instr_gnt_i,
|
|
input logic [31:0] instr_addr_o,
|
|
input logic [BUS_SIZE-1:0] instr_rdata_i,
|
|
input logic instr_err_i,
|
|
input logic instr_pmp_err_i,
|
|
input logic instr_rvalid_i,
|
|
input logic icache_enable_i,
|
|
input logic icache_inval_i,
|
|
input logic busy_o,
|
|
|
|
// Internal signals
|
|
input logic [ADDR_W-1:0] prefetch_addr_q,
|
|
input logic [NUM_FB-1:0][NUM_FB-1:0] fill_older_q,
|
|
input logic [NUM_FB-1:0] fill_busy_q,
|
|
input logic [NUM_FB-1:0] fill_stale_q,
|
|
input logic [NUM_FB-1:0] fill_hit_q,
|
|
input logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_cnt_q,
|
|
input logic [NUM_FB-1:0] fill_ext_hold_q,
|
|
input logic [NUM_FB-1:0] fill_ext_done_d,
|
|
input logic [NUM_FB-1:0] fill_ext_done_q,
|
|
input logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_rvd_cnt_q,
|
|
input logic [NUM_FB-1:0] fill_rvd_done,
|
|
input logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_out_cnt_q,
|
|
input logic [NUM_FB-1:0] fill_out_done,
|
|
input logic [NUM_FB-1:0] fill_ext_req,
|
|
input logic [NUM_FB-1:0] fill_rvd_exp,
|
|
input logic [NUM_FB-1:0] fill_data_sel,
|
|
input logic [NUM_FB-1:0] fill_data_reg,
|
|
input logic [NUM_FB-1:0][IC_LINE_BEATS_W-1:0] fill_ext_off,
|
|
input logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_rvd_beat,
|
|
input logic [NUM_FB-1:0] fill_out_arb,
|
|
input logic [NUM_FB-1:0] fill_rvd_arb,
|
|
input logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_err_q,
|
|
input logic skid_valid_q,
|
|
|
|
input logic [NUM_FB-1:0][ADDR_W-1:0] packed_fill_addr_q
|
|
);
|
|
|
|
logic [ADDR_W-1:0] line_step;
|
|
assign line_step = {{ADDR_W-IC_LINE_W-1{1'b0}},1'b1,{IC_LINE_W{1'b0}}};
|
|
|
|
// We are bound into the DUT. This means we don't control the clock and reset directly, but we
|
|
// still want to constrain rst_ni to reset the module at the start of time (for one cycle) and
|
|
// then stay high.
|
|
//
|
|
// Note that having a cycle with rst_ni low at the start of time means that we can safely use
|
|
// $past, $rose and $fell in calls to `ASSERT without any need for an "f_past_valid signal": they
|
|
// will only be evaluated from cycle 2 onwards.
|
|
logic [1:0] f_startup_count = 2'd0;
|
|
always_ff @(posedge clk_i) begin : reset_assertion
|
|
f_startup_count <= f_startup_count + ((f_startup_count == 2'd3) ? 2'd0 : 2'd1);
|
|
|
|
// Assume that rst_ni is low for the first cycle and not true after that.
|
|
assume (~((f_startup_count == 2'd0) ^ ~rst_ni));
|
|
|
|
// There is a feed-through path from branch_i to req_o which isn't squashed when in reset. Assume
|
|
// that branch_i isn't asserted when in reset.
|
|
assume (`IMPLIES(!rst_ni, !branch_i));
|
|
end
|
|
|
|
// Several of the protocol checks are only valid when there is a valid address. This is false
|
|
// after reset. It becomes true after any branch after reset and then false again on any returned
|
|
// error (because the straight-line address depends on the presumably-bogus rdata).
|
|
logic f_addr_valid;
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
f_addr_valid <= 1'b0;
|
|
end else begin
|
|
if (branch_i) begin
|
|
f_addr_valid <= 1'b1;
|
|
end else if (valid_o & ready_i & err_o) begin
|
|
f_addr_valid <= 1'b0;
|
|
end
|
|
end
|
|
end
|
|
|
|
// Reset assumptions
|
|
//
|
|
// We assume that req_i isn't asserted to the block when in reset (which avoids it making requests
|
|
// on the external bus).
|
|
`ASSUME_ZERO_IN_RESET(req_i)
|
|
|
|
// Parameter assumptions
|
|
//
|
|
// If BranchPredictor = 1'b0, the branch_mispredict_i signal will never go high
|
|
`ASSUME(no_mispred_without_branch_pred, `IMPLIES(branch_mispredict_i, BranchPredictor))
|
|
|
|
// Protocol assumptions
|
|
//
|
|
// These are assumptions based on the top-level ports. They somewhat mirror the assertions in the
|
|
// simulation-based protocol checkers (see code in dv/uvm/icache/dv).
|
|
|
|
// Assumptions about the core
|
|
//
|
|
// The branch address must always be even
|
|
`ASSUME(even_address, `IMPLIES(branch_i, ~addr_i[0]))
|
|
// The branch_spec signal must be driven if branch is
|
|
`ASSUME(gate_bs, `IMPLIES(branch_i, branch_spec_i))
|
|
// Ready will not be asserted when req_i is low
|
|
`ASSUME(ready_implies_req_i, `IMPLIES(ready_i, req_i))
|
|
|
|
// Assumptions about the instruction bus
|
|
//
|
|
// The instruction bus is an in-order pipelined slave. We make requests with instr_req_o. If a
|
|
// request is granted (with instr_gnt_i), then the request has gone out. Sometime in the future,
|
|
// it will come back, but we can make other requests in the meantime. Requests are answered (in
|
|
// order) by the bus asserting instr_rvalid_i.
|
|
//
|
|
// Check that we won't get any spurious responses, using a simple counter of the requests on the
|
|
// bus.
|
|
logic [31:0] f_reqs_on_bus;
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
f_reqs_on_bus <= 32'd0;
|
|
end else begin
|
|
if (instr_req_o & (instr_gnt_i & ~instr_pmp_err_i) & ~instr_rvalid_i)
|
|
f_reqs_on_bus <= f_reqs_on_bus + 32'd1;
|
|
else if (~(instr_req_o & instr_gnt_i & ~instr_pmp_err_i) & instr_rvalid_i)
|
|
f_reqs_on_bus <= f_reqs_on_bus - 32'd1;
|
|
end
|
|
end
|
|
`ASSUME(no_rvalid_without_pending_req, `IMPLIES(instr_rvalid_i, f_reqs_on_bus != 32'd0))
|
|
|
|
// Assume the bus doesn't grant a request which would make the counter wrap (needed for inductive
|
|
// proofs). Since the counter allows (1<<32)-1 requests, this shouldn't be an unreasonable
|
|
// assumption!
|
|
`ASSUME(no_gnt_when_bus_full, ~instr_gnt_i | ~&f_reqs_on_bus);
|
|
|
|
|
|
// Top-level assertions
|
|
//
|
|
// This section contains the assertions that prove the properties we care about. All should be
|
|
// about observable signals (so shouldn't contain any references to anything that isn't exposed as
|
|
// an input port).
|
|
|
|
// REQ stays high until GNT
|
|
//
|
|
// If instr_req_o goes high, we won't drive it low again until instr_gnt_i or instr_pmp_err_i is
|
|
// high (the latter signals that the outgoing request got squashed, so we can forget about it).
|
|
//
|
|
// Read this as "a negedge of instr_req_o implies that the transaction was granted or squashed on
|
|
// the previous cycle".
|
|
`ASSERT(req_to_gnt,
|
|
`IMPLIES($fell(instr_req_o), $past(instr_gnt_i | instr_pmp_err_i)))
|
|
|
|
// ADDR stability
|
|
//
|
|
// If instr_req_o goes high, the address at instr_addr_o will stay constant until the request is
|
|
// squashed or granted. The encoding below says "either the address is stable, the request has
|
|
// been squashed, we've had a grant or this is a new request".
|
|
`ASSERT(req_addr_stable,
|
|
$stable(instr_addr_o) | $past(instr_gnt_i | instr_pmp_err_i | ~instr_req_o))
|
|
|
|
// VALID until READY
|
|
//
|
|
// The handshake isn't quite standard, because the core can cancel it by signalling branch_i,
|
|
// redirecting the icache somewhere else. So we ask that once valid_o goes high, it won't be
|
|
// de-asserted unless either ready_i was high on the previous cycle (in which case, the icache
|
|
// sent an instruction to the core) or branch_i goes high.
|
|
//
|
|
// We also have no requirements on the valid/ready handshake if the address is unknown
|
|
// (!f_addr_valid).
|
|
`ASSERT(vld_to_rdy,
|
|
`IMPLIES(f_addr_valid & $fell(valid_o), $past(branch_i | ready_i)))
|
|
|
|
// ADDR stability
|
|
`ASSERT(addr_stable,
|
|
`IMPLIES(f_addr_valid & $past(valid_o & ~(ready_i | branch_i)), $stable(addr_o)))
|
|
|
|
// ERR stability
|
|
`ASSERT(err_stable,
|
|
`IMPLIES(f_addr_valid & $past(valid_o & ~(ready_i | branch_i)), $stable(err_o)))
|
|
`ASSERT(err_plus2_stable,
|
|
`IMPLIES(f_addr_valid & $past(valid_o & err_o & ~(ready_i | branch_i)), $stable(err_plus2_o)))
|
|
|
|
// ERR_PLUS2 implies uncompressed
|
|
`ASSERT(err_plus_2_implies_uncompressed,
|
|
`IMPLIES(valid_o & err_o & err_plus2_o, rdata_o[1:0] == 2'b11))
|
|
|
|
// RDATA stability
|
|
//
|
|
// If valid_o is true and err_o is false, the bottom 16 bits of rdata_o will stay constant until
|
|
// the core takes the data by asserting ready_i, or until the core branches or de-asserts req_i.
|
|
`ASSERT(rdata_stable_lo,
|
|
`IMPLIES(f_addr_valid & ~err_o & $past(valid_o & ~(ready_i | branch_i)),
|
|
$stable(rdata_o[15:0])))
|
|
`ASSERT(rdata_stable_hi,
|
|
`IMPLIES(f_addr_valid & ~err_o &
|
|
$past(valid_o & ~(ready_i | branch_i)) & (rdata_o[1:0] == 2'b11),
|
|
$stable(rdata_o[31:16])))
|
|
|
|
// Formal coverage points
|
|
//
|
|
// See a good result returned by the cache
|
|
`COVER(fetch_good_result, f_addr_valid & valid_o & ready_i & ~branch_i & ~err_o)
|
|
|
|
// See a bad result returned by the cache
|
|
`COVER(fetch_bad_result, f_addr_valid & valid_o & ready_i & ~branch_i & err_o)
|
|
|
|
// See a bad result for the upper word returned by the cache
|
|
`COVER(fetch_bad_result_2, f_addr_valid & valid_o & ready_i & ~branch_i & err_o & err_plus2_o)
|
|
|
|
// See 8 back-to-back fetches ("full throughput")
|
|
logic [31:0] f_b2b_counter;
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
f_b2b_counter <= 32'd0;
|
|
end else begin
|
|
if (valid_o & ready_i & ~branch_i) begin
|
|
f_b2b_counter <= f_b2b_counter + 32'd1;
|
|
end else begin
|
|
f_b2b_counter <= 32'd0;
|
|
end
|
|
end
|
|
end
|
|
`COVER(back_to_back_fetches, f_b2b_counter == 8);
|
|
|
|
// Internal (induction) assertions
|
|
//
|
|
// Code below this line can refer to internal signals of the DUT. The assertions shouldn't be
|
|
// needed for BMC checks, but will be required to constrain the state space used for k-induction.
|
|
|
|
for (genvar fb = 0; fb < NUM_FB; fb++) begin : g_fb_older_asserts
|
|
// If fill buffer i is busy then fill_older_q[i][j] means that that fill buffer j has an
|
|
// outstanding request which started before us (and should take precedence). We should check
|
|
// that this only happens if buffer j is indeed busy.
|
|
//
|
|
// fill_busy_q[i] -> fill_older_q[i][j] -> fill_busy_q[j]
|
|
//
|
|
// which we can encode as
|
|
//
|
|
// (fill_older_q[i][j] -> fill_busy_q[j]) | ~fill_busy_q[i]
|
|
// = (fill_busy_q[j] | ~fill_older_q[i][j]) | ~fill_busy_q[i]
|
|
//
|
|
// Grouping by j, we can rewrite this as:
|
|
`ASSERT(older_is_busy, &(fill_busy_q | ~fill_older_q[fb]) | ~fill_busy_q[fb])
|
|
|
|
// No fill buffer should ever think that it's older than itself
|
|
`ASSERT(older_anti_refl, !fill_older_q[fb][fb])
|
|
|
|
// The "older" relation should be anti-symmetric (a fill buffer can't be both older than, and
|
|
// younger than, another). This takes NUM_FB*(NUM_FB-1)/2 assertions, comparing each pair of
|
|
// buffers. Here, we do this by looping over the indices below fb.
|
|
//
|
|
// If I and J both think the other is older, then fill_older_q[I][J] and fill_older_q[J][I] will
|
|
// both be true. Check that doesn't happen.
|
|
for (genvar fb2 = 0; fb2 < fb; fb2++) begin : g_older_anti_symm_asserts
|
|
`ASSERT(older_anti_symm, ~(fill_older_q[fb][fb2] & fill_older_q[fb2][fb]))
|
|
end
|
|
|
|
// The older relation should be transitive (if i is older than j and j is older than k, then i
|
|
// is older than k). That is:
|
|
//
|
|
// (fill_busy_q[i] & fill_older_q[i][j]) ->
|
|
// (fill_busy_q[j] & fill_older_q[j][k]) ->
|
|
// (fill_busy_q[i] & fill_older_q[i][k])
|
|
//
|
|
// Note that the second fill_busy_q[i] holds trivially and fill_busy_q[j] holds because of
|
|
// order_is_busy, so this can be rewritten as:
|
|
//
|
|
// fill_busy_q[i] & fill_older_q[i][j] -> fill_older_q[j][k] -> fill_older_q[i][k]
|
|
//
|
|
// Converting A->B->C into (A&B)->C and then rewriting A->B as B|~A, this is equivalent to
|
|
//
|
|
// (fill_older_q[i][k] | ~fill_older_q[j][k]) | ~(fill_busy_q[i] & fill_older_q[i][j])
|
|
//
|
|
// Looping over i and j, we can simplify this as
|
|
//
|
|
// &(fill_older_q[i] | ~fill_older_q[j]) | ~(fill_busy_q[i] & fill_older_q[i][j])
|
|
//
|
|
for (genvar fb2 = 0; fb2 < NUM_FB; fb2++) begin : g_older_transitive_asserts
|
|
`ASSERT(older_transitive,
|
|
(&(fill_older_q[fb] | ~fill_older_q[fb2]) |
|
|
~(fill_busy_q[fb] & fill_older_q[fb][fb2])))
|
|
end
|
|
|
|
// The older relation should be total. This is a bit finicky because of fill buffers that aren't
|
|
// currently busy. Specifically, we want
|
|
//
|
|
// i != j -> fill_busy_q[i] -> fill_busy_q[j] -> (fill_older_q[i][j] | fill_older_q[j][i])
|
|
//
|
|
for (genvar fb2 = 0; fb2 < fb; fb2++) begin : g_older_total_asserts
|
|
`ASSERT(older_total,
|
|
`IMPLIES(fill_busy_q[fb] & fill_busy_q[fb2],
|
|
fill_older_q[fb][fb2] | fill_older_q[fb2][fb]))
|
|
end
|
|
end
|
|
|
|
// Assertions about fill-buffer counters
|
|
for (genvar fb = 0; fb < NUM_FB; fb++) begin : g_fb_counter_asserts
|
|
|
|
// We should never have fill_ext_hold_q[fb] if fill_ext_cnt_q[fb] == IC_LINE_BEATS (because we
|
|
// shouldn't have made a request after we filled up).
|
|
`ASSERT(no_fill_ext_hold_when_full,
|
|
`IMPLIES(fill_ext_hold_q[fb],
|
|
fill_ext_cnt_q[fb] < IC_LINE_BEATS[IC_LINE_BEATS_W:0]))
|
|
|
|
// Each fill buffer is supposed to make at most IC_LINE_BEATS requests (once we've filled the
|
|
// buffer, we shouldn't be asking for more).
|
|
`ASSERT(no_fill_ext_req_when_full,
|
|
`IMPLIES(fill_ext_req[fb],
|
|
(fill_ext_cnt_q[fb] < IC_LINE_BEATS[IC_LINE_BEATS_W:0])))
|
|
|
|
for (genvar fb2 = 0; fb2 < NUM_FB; fb2++) begin : g_older_counter_asserts
|
|
// Because we make requests from the oldest fill buffer first, a fill buffer should only have
|
|
// made any requests if every older fill buffer is done.
|
|
`ASSERT(older_ext_ordering,
|
|
`IMPLIES((fill_busy_q[fb] &&
|
|
(fill_ext_cnt_q[fb] != '0) &&
|
|
fill_older_q[fb][fb2] &&
|
|
fill_busy_q[fb2]),
|
|
fill_ext_done_q[fb2]))
|
|
|
|
// Similarly, if J is older than I then we should see fill_rvd_done[J] before
|
|
// fill_rvd_cnt_q[I] is nonzero.
|
|
`ASSERT(older_rvd_ordering,
|
|
`IMPLIES((fill_busy_q[fb] &&
|
|
(fill_rvd_cnt_q[fb] != '0) &&
|
|
fill_older_q[fb][fb2] &&
|
|
fill_busy_q[fb2]),
|
|
fill_rvd_done[fb2]))
|
|
end
|
|
|
|
// Tying together f_reqs_on_bus (the testbench request tracking) with the outstanding request
|
|
// count implied by fill_ext_cnt_q and fill_rvd_cnt_q.
|
|
//
|
|
// We expect the number of outstanding requests to be the sum of
|
|
//
|
|
// fill_rvd_cnt_q[fb] - fill_ext_cnt_q[fb]
|
|
//
|
|
// over all busy fill buffers.
|
|
logic [31:0] f_rvd_wo_ext_cnt;
|
|
always_comb begin
|
|
f_rvd_wo_ext_cnt = 32'd0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
if (fill_busy_q[i])
|
|
f_rvd_wo_ext_cnt += {{32-(IC_LINE_BEATS_W+1){1'b0}},
|
|
fill_ext_cnt_q[i] - fill_rvd_cnt_q[i]};
|
|
end
|
|
end
|
|
`ASSERT(rvd_minus_ext_cnt, f_rvd_wo_ext_cnt == f_reqs_on_bus);
|
|
|
|
// We have to make a request before we get a response, so no fill buffer should have more
|
|
// responses than requests.
|
|
`ASSERT(fill_rvd_le_ext, fill_rvd_cnt_q[fb] <= fill_ext_cnt_q[fb])
|
|
|
|
// When data comes back from the instruction bus, it will be assigned to the oldest fill buffer
|
|
// that still expects to receive some. This is correct because we always make requests from the
|
|
// oldest fill buffer (and the instruction bus answers in-order).
|
|
|
|
// We should never expect to receive beats of data unless fill_rvd_cnt_q is less than
|
|
// fill_ext_cnt_q. Note that fill_rvd_exp can be true in this situation, but fill_rvd_arb
|
|
// shouldn't be.
|
|
`ASSERT(rvd_arb_implies_ext_ahead,
|
|
`IMPLIES(fill_rvd_arb[fb], fill_rvd_cnt_q[fb] < fill_ext_cnt_q[fb]))
|
|
|
|
// Similarly, each fill buffer expects to receive at most IC_LINE_BEATS responses
|
|
`ASSERT(no_fill_rvd_exp_when_full,
|
|
`IMPLIES(fill_rvd_exp[fb], fill_rvd_cnt_q[fb] < IC_LINE_BEATS[IC_LINE_BEATS_W:0]))
|
|
|
|
// There are several signals per fb which must be at most equal to IC_LINE_BEATS, but they are
|
|
// stored with $clog2(IC_LINE_BEATS_W) + 1 bits, so the signals can represent much bigger
|
|
// numbers.
|
|
`define ASSERT_MAX_LINE_BEATS(name) \
|
|
`ASSERT(name``_max, name[fb] <= IC_LINE_BEATS[IC_LINE_BEATS_W:0])
|
|
|
|
`ASSERT_MAX_LINE_BEATS(fill_ext_cnt_q)
|
|
`ASSERT_MAX_LINE_BEATS(fill_rvd_cnt_q)
|
|
`ASSERT_MAX_LINE_BEATS(fill_out_cnt_q)
|
|
|
|
// All fill-buffer addresses should be half-word aligned. This won't quite be true after an
|
|
// error (because the prefetch address can get messed up).
|
|
`ASSERT(fb_addr_hw_aligned,
|
|
`IMPLIES(f_addr_valid & fill_busy_q[fb] & ~fill_stale_q[fb],
|
|
~packed_fill_addr_q[fb][0]))
|
|
|
|
// The output counter shouldn't run ahead of the rvd counter unless there was a cache hit or an
|
|
// error. Note that the received counter counts from zero, whereas the output counter starts at
|
|
// the first beat to come out. We can adjust by using fill_rvd_beat, which starts at the first
|
|
// beat (like fill_out_cnt_q).
|
|
`ASSERT(fill_out_le_rvd,
|
|
`IMPLIES(fill_busy_q[fb] & ~fill_stale_q[fb] & ~branch_i,
|
|
fill_hit_q[fb] ||
|
|
|fill_err_q[fb] ||
|
|
(fill_out_cnt_q[fb] <= fill_rvd_beat[fb])))
|
|
end
|
|
|
|
// The prefetch address is the next address to prefetch. It should always be at least half-word
|
|
// aligned (it's populated by addr_i and then gets aligned to line boundaries afterwards)
|
|
`ASSERT(prefetch_addr_hw_aligned, `IMPLIES(f_addr_valid, ~prefetch_addr_q[0]))
|
|
|
|
// Define an analogue of fill_older_q, but only for buffers that are busy, not stale and think
|
|
// they have more data to return.
|
|
logic [NUM_FB-1:0] f_has_output;
|
|
logic [NUM_FB-1:0][NUM_FB-1:0] f_older_with_output, f_younger_with_output;
|
|
always_comb begin
|
|
f_has_output = '0;
|
|
f_older_with_output = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
f_has_output[i] = fill_busy_q[i] & ~fill_stale_q[i] & ~fill_out_done[i];
|
|
end
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
for (int j = 0; j < NUM_FB; j++) begin
|
|
f_older_with_output[i][j] = f_has_output[i] & f_has_output[j] & fill_older_q[i][j];
|
|
f_younger_with_output[j][i] = f_older_with_output[i][j];
|
|
end
|
|
end
|
|
end
|
|
|
|
// Find the oldest busy, non-stale fill buffer that doesn't think it's finished returning data.
|
|
// This is the one that should be outputting data. Grab its index and various associated
|
|
// addresses. Similarly with the youngest.
|
|
int unsigned f_oldest_fb, f_youngest_fb;
|
|
logic [ADDR_W-1:0] f_oldest_fill_addr_q, f_youngest_fill_addr_q;
|
|
logic [IC_LINE_BEATS_W:0] f_oldest_fill_out_cnt_q;
|
|
always_comb begin
|
|
f_oldest_fb = NUM_FB;
|
|
f_youngest_fb = NUM_FB;
|
|
f_oldest_fill_addr_q = '0;
|
|
f_oldest_fill_out_cnt_q = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
if (f_has_output[i] & ~|(f_older_with_output[i])) begin
|
|
f_oldest_fb = i;
|
|
f_oldest_fill_addr_q = packed_fill_addr_q[i];
|
|
f_oldest_fill_out_cnt_q = fill_out_cnt_q[i];
|
|
end
|
|
if (f_has_output[i] & ~|(f_younger_with_output[i])) begin
|
|
f_youngest_fb = i;
|
|
f_youngest_fill_addr_q = packed_fill_addr_q[i];
|
|
end
|
|
end
|
|
end
|
|
|
|
logic [ADDR_W-1:0] f_oldest_fill_line_start, f_youngest_fill_line_start;
|
|
assign f_oldest_fill_line_start =
|
|
{f_oldest_fill_addr_q[ADDR_W-1:IC_LINE_W], {IC_LINE_W{1'b0}}};
|
|
assign f_youngest_fill_line_start =
|
|
{f_youngest_fill_addr_q[ADDR_W-1:IC_LINE_W], {IC_LINE_W{1'b0}}};
|
|
|
|
// Suppose we have at least one fill buffer with data that needs outputting. Consider the oldest
|
|
// such fill buffer (f_oldest_fb). Data flows as follows:
|
|
//
|
|
// fill buffer -> (skid buffer) -> output
|
|
//
|
|
// We always read from a 4-byte chunk of the fill buffer, whose "read address"
|
|
// (f_oldest_fill_beat_start) is
|
|
//
|
|
// f_oldest_fill_line_start + 4 * f_oldest_fill_out_cnt_q
|
|
//
|
|
// The interaction with the skid buffer is a little complicated. Here are the possible scenarios,
|
|
// where the 1st two columns (addr_o, skid_valid_q) determine the other two (f_skidded_addr,
|
|
// f_skidded_beat_addr).
|
|
//
|
|
// | addr_o | skid | notional | line start |
|
|
// | mod 4 | valid | read addr | + out cnt |
|
|
// |--------+-------+-----------+------------|
|
|
// | 0 | 0 | 0 | 0 |
|
|
// | 0 | 1 | 2 | 0 |
|
|
// | 2 | 0 | 2 | 0 |
|
|
// | 2 | 1 | 4 | 4 |
|
|
//
|
|
// These checks are ignored if the address is invalid (because of an error) or on the cycle where
|
|
// a branch comes in.
|
|
logic [ADDR_W-1:0] f_skidded_addr;
|
|
logic [ADDR_W-1:0] f_beat_addr;
|
|
logic [ADDR_W-1:0] f_skidded_beat_addr;
|
|
assign f_skidded_addr = addr_o + 2 * {{ADDR_W-1{1'b0}}, skid_valid_q};
|
|
assign f_beat_addr = {addr_o[ADDR_W-1:2], 2'b00};
|
|
assign f_skidded_beat_addr = {f_skidded_addr[ADDR_W-1:2], 2'b00};
|
|
|
|
logic [ADDR_W-1:0] f_oldest_fill_beat_start;
|
|
assign f_oldest_fill_beat_start = (f_oldest_fill_line_start +
|
|
{{ADDR_W-IC_LINE_BEATS_W-3{1'b0}},
|
|
f_oldest_fill_out_cnt_q, 2'b00});
|
|
|
|
`ASSERT(oldest_fb_addr,
|
|
`IMPLIES((f_oldest_fb < NUM_FB) && f_addr_valid && ~branch_i,
|
|
f_oldest_fill_beat_start == f_skidded_beat_addr))
|
|
|
|
// One other property that should hold for the oldest FB (only really relevant for branch targets)
|
|
// is that fill_addr_q <= f_skidded_addr. This avoids the model getting into a state where
|
|
// the counters think we fetched the top half of a line first (because fill_addr_q is something
|
|
// like 0x4) and that we're now reading the lower half, but addr_o is something like 0x2 and
|
|
// return results trash the output data.
|
|
`ASSERT(oldest_fb_addr_low,
|
|
`IMPLIES((f_oldest_fb < NUM_FB) && f_addr_valid && ~branch_i,
|
|
f_oldest_fill_addr_q <= f_skidded_addr))
|
|
|
|
// Track the address of the first beat for each fill buffer and its bottom address
|
|
logic [NUM_FB-1:0][ADDR_W-1:0] f_fill_beat_addr_q, f_fill_line_addr_q;
|
|
always_comb begin
|
|
f_fill_beat_addr_q = '0;
|
|
f_fill_line_addr_q = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
f_fill_beat_addr_q[i] = {packed_fill_addr_q[i][ADDR_W-1:BUS_W], {BUS_W{1'b0}}};
|
|
f_fill_line_addr_q[i] = {packed_fill_addr_q[i][ADDR_W-1:IC_LINE_W], {IC_LINE_W{1'b0}}};
|
|
end
|
|
end
|
|
|
|
for (genvar fb = 0; fb < NUM_FB; fb++) begin : g_fb_addr_asserts
|
|
for (genvar fb2 = 0; fb2 < NUM_FB; fb2++) begin : g_fb_addr_asserts2
|
|
// We've checked that older is a total ordering on busy fill buffers (see older_total and the
|
|
// assertions immediately preceding it). That means it's also a total ordering on non-stale
|
|
// busy fill buffers (taking a subset of a total order doesn't change the fact it's total). We
|
|
// want to check if fb I immediately precedes fb J then J's address should immediately follow
|
|
// I's line.
|
|
//
|
|
// "fb is older than fb2 and nothing else comes in between" can be phrased as
|
|
// "fill_older_q[fb2] is exactly equal to fill_older_q[fb] plus the bit for fb" (note that
|
|
// older_anti_refl implies that won't be set). That would be:
|
|
//
|
|
// fill_older_q[fb2] == fill_older_q[fb] | ({{NUM_FB-1{1'b0}}, 1'b1} << fb)
|
|
//
|
|
// Since we are only interested in FBs that have more output data to write, we use
|
|
// f_older_with_output instead of fill_older_q.
|
|
`ASSERT(chained_fb_addr,
|
|
`IMPLIES((f_older_with_output[fb2] ==
|
|
(f_older_with_output[fb] | ({{NUM_FB-1{1'b0}}, 1'b1} << fb))),
|
|
packed_fill_addr_q[fb2] == f_fill_line_addr_q[fb] + line_step))
|
|
end
|
|
|
|
// If there is an older buffer than this one which has data to be output, this fill buffer's
|
|
// output count should be zero.
|
|
`ASSERT(younger_out_cnt_zero, `IMPLIES(|f_older_with_output[fb], fill_out_cnt_q[fb] == '0))
|
|
end
|
|
|
|
// Just as we check chaining between adjacent fill buffers' addresses, we expect prefetch_addr_q
|
|
// (used for the next fetch) to be the line after the youngest fill buffer.
|
|
`ASSERT(chained_prefetch_addr,
|
|
`IMPLIES(f_youngest_fb < NUM_FB,
|
|
prefetch_addr_q == f_youngest_fill_line_start + line_step))
|
|
|
|
// The output address can never be aligned when skid_valid is true. The state machine looks like
|
|
// this:
|
|
//
|
|
// | State | Next states |
|
|
// | (misaligned; skid_valid) | |
|
|
// |--------------------------+----------------------------|
|
|
// | (0; 0) | (0; 0) (uc instr) |
|
|
// | | (1; 1) (cmp instr) |
|
|
// | (1; 0) | (0; 0) (cmp instr) |
|
|
// | | (1; 1) (uc instr 1st half) |
|
|
// | (1; 1) | (0; 0) (cmp instr) |
|
|
// | | (1; 1) (uc instr) |
|
|
//
|
|
// The 1st two states are possible after branches. There's no arc to (0; 1).
|
|
`ASSERT(misaligned_when_skid_valid, `IMPLIES(skid_valid_q, addr_o[1]))
|
|
|
|
// If no fill buffers are waiting to output data, prefetch_addr_q holds the address that will be
|
|
// assigned to the next available fill buffer. This should equal the skidded version of addr_o
|
|
// (both addresses will have recently been set by a branch).
|
|
`ASSERT(no_fb_prefetch_addr,
|
|
`IMPLIES(f_addr_valid && (f_oldest_fb == NUM_FB), prefetch_addr_q == f_skidded_addr))
|
|
|
|
// fill_out_arb should be 1-hot
|
|
`ASSERT(fill_out_arb_one_hot, `IS_ONE_HOT(fill_out_arb, NUM_FB))
|
|
|
|
// fill_data_sel is not 1-hot, but it is when restricted to busy fill buffers that are not stale
|
|
// or done.
|
|
`ASSERT(fill_data_sel_one_hot,
|
|
`IS_ONE_HOT(fill_data_sel & fill_busy_q & ~fill_out_done & ~fill_stale_q, NUM_FB))
|
|
|
|
// fill_data_reg is based off of fill_data_sel and *should* be 1-hot (used for muxing)
|
|
`ASSERT(fill_data_reg_one_hot, `IS_ONE_HOT(fill_data_reg, NUM_FB))
|
|
|
|
|
|
// We can derive masks of beats that we think we have requested and received. fill_ext_cnt_q[fb]
|
|
// (fill_rvd_cnt_q[fb]) are the number of beats that we've requested (received). We started at
|
|
// beat fill_addr_q[fb][IC_LINE_W-1:BUS_W].
|
|
//
|
|
// To make it easy to track what's going on, we define auxiliary signals. f_fill_first_beat[fb] is
|
|
// the index of the first beat to be fetched for this fill buffer. This is non-zero if the fill
|
|
// buffer comes about from a branch and fill_addr_q[fb] starts after the first beat.
|
|
//
|
|
// f_fill_ext_end_beat[fb] (f_fill_rvd_end_beat[fb]) is the index of the first beat that hasn't
|
|
// been requested (received). This doesn't wrap around, so if IC_LINE_BEATS is 2, we started at
|
|
// beat 1 and have fetched 2 beats then it will be 3 (not 1).
|
|
//
|
|
// With these in hand, you can define f_fill_ext_mask[fb] (f_fill_rvd_mask[fb]), which has a bit
|
|
// per beat, which is set if the corresponding data has been requested (received). Writing b for
|
|
// the beat in question, s for the start beat, c for the count, e for the end beat (without
|
|
// wrapping) and w for the log of the number of beats in a line, the check becomes:
|
|
//
|
|
// (c != 0) && ((s <= b) ? (b < e) : (b + (1 << w) < e))
|
|
// = (c != 0) && (e > ((s <= b) ? b : (b + (1 << w))))
|
|
// = (c != 0) && (e > (b + ((s <= b) ? 0 : (1 << w))))
|
|
// = (c != 0) && (e > (b + (((s <= b) ? 0 : 1) << w)))
|
|
// = (c != 0) && (e > (b + ((s > b) << w)))
|
|
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS_W-1:0] f_fill_first_beat;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] f_fill_ext_end_beat, f_fill_rvd_end_beat;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] f_fill_ext_mask, f_fill_rvd_mask;
|
|
|
|
always_comb begin
|
|
f_fill_first_beat = '0;
|
|
f_fill_ext_end_beat = '0;
|
|
f_fill_ext_mask = '0;
|
|
f_fill_rvd_end_beat = '0;
|
|
f_fill_rvd_mask = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
f_fill_first_beat[i] = f_fill_beat_addr_q[i][IC_LINE_W-1:BUS_W];
|
|
f_fill_ext_end_beat[i] = {1'b0, f_fill_first_beat[i]} + fill_ext_cnt_q[i];
|
|
f_fill_rvd_end_beat[i] = {1'b0, f_fill_first_beat[i]} + fill_rvd_cnt_q[i];
|
|
for (int b = 0; b < IC_LINE_BEATS; b++) begin
|
|
f_fill_ext_mask[i][b] = ((|fill_ext_cnt_q[i]) &&
|
|
(f_fill_ext_end_beat[i] >
|
|
(b[IC_LINE_BEATS_W:0] +
|
|
{f_fill_first_beat[i] > b[IC_LINE_BEATS_W-1:0],
|
|
{IC_LINE_BEATS_W{1'b0}}})));
|
|
f_fill_rvd_mask[i][b] = (|fill_rvd_cnt_q[i] &&
|
|
(f_fill_rvd_end_beat[i] >
|
|
(b[IC_LINE_BEATS_W:0] +
|
|
{f_fill_first_beat[i] > b[IC_LINE_BEATS_W-1:0],
|
|
{IC_LINE_BEATS_W{1'b0}}})));
|
|
end
|
|
end
|
|
end
|
|
|
|
// Now we have that mask, we can assert that we only have fill errors on beats that we have
|
|
// fetched. That's not quite true (because of PMP errors). So what we really want to say is:
|
|
//
|
|
// If beat b has an error then either we have received data for beat b or we tried to get data
|
|
// for beat b and the memory request was squashed by a PMP error.
|
|
//
|
|
// The former case is easy (bit b should be set in f_fill_rvd_mask). In the latter case,
|
|
// fill_ext_done_d will be true, fill_ext_cnt_q will be less than IC_LINE_BEATS, and fill_ext_off
|
|
// (the next beat to fetch) will equal b. We define explicit masks for the bits allowed in each
|
|
// case.
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] f_rvd_err_mask, f_pmp_err_mask, f_err_mask;
|
|
always_comb begin
|
|
f_rvd_err_mask = '0;
|
|
f_pmp_err_mask = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
f_rvd_err_mask[i] = f_fill_rvd_mask[i];
|
|
for (int b = 0; b < IC_LINE_BEATS; b++) begin
|
|
f_pmp_err_mask[i][b] = (fill_ext_done_d[i] &&
|
|
!fill_ext_cnt_q[i][IC_LINE_BEATS_W] &&
|
|
(fill_ext_off[i] == b[IC_LINE_BEATS_W-1:0]));
|
|
end
|
|
end
|
|
end
|
|
assign f_err_mask = f_rvd_err_mask | f_pmp_err_mask;
|
|
|
|
for (genvar fb = 0; fb < NUM_FB; fb++) begin : g_fb_error_beat_asserts
|
|
`ASSERT(err_is_recv_or_pmp,
|
|
`IMPLIES(fill_busy_q[fb], ~|(fill_err_q[fb] & ~f_err_mask[fb])))
|
|
end
|
|
|
|
// If there is data in the skid buffer, it either came from the previous line (and addr_o is the
|
|
// top of that line and f_skidded_addr is the start of our line) or it came from this line. In the
|
|
// latter case, we must have received the data that's in the skid buffer.
|
|
//
|
|
// If there is no valid fill buffer at the moment, any skid buffer data must be from the previous
|
|
// line.
|
|
`ASSERT(skid_is_rvd_wo_buffer,
|
|
`IMPLIES((f_oldest_fb == NUM_FB) && f_addr_valid && skid_valid_q,
|
|
f_skidded_addr[IC_LINE_W-1:0] == '0))
|
|
|
|
// If there is a valid fill buffer and the skidded address isn't the start of the line then we
|
|
// must either have received the beat of data the skid buffer came from, that beat should have an
|
|
// associated error or we must have had a cache hit.
|
|
`ASSERT(skid_is_rvd_with_buffer,
|
|
`IMPLIES(((f_oldest_fb < NUM_FB) && f_addr_valid &&
|
|
skid_valid_q && (f_skidded_addr[IC_LINE_W-1:0] != '0)),
|
|
f_fill_rvd_mask[f_oldest_fb][f_beat_addr[IC_LINE_W-1:BUS_W]] |
|
|
fill_err_q[f_oldest_fb][f_beat_addr[IC_LINE_W-1:BUS_W]] |
|
|
fill_hit_q[f_oldest_fb]))
|
|
|
|
|
|
endmodule
|