diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index b08bf06d8..39992605d 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -46,7 +46,7 @@ localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; -localparam ZICBOZ_SUPPORTED = 0; +localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index 939ce72c8..ddbca789a 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -48,7 +48,7 @@ localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; -localparam ZICBOZ_SUPPORTED = 0; +localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 52b54029f..1714544ec 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -101,6 +101,7 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN/8-1:0] LineByteMask; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; logic ZeroCacheLine; + logic CMOZeroHit; logic [LINELEN-1:0] PreLineWriteData; genvar index; @@ -119,7 +120,7 @@ module cache import cvw::*; #(parameter cvw_t P, // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CMOp, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, - .SetValid, .ClearValid, .SetDirty, .ClearDirty, .ZeroCacheLine, .SelWriteback, .SelCMOWriteback, .VictimWay, + .SetValid, .ClearValid, .SetDirty, .ClearDirty, .CMOZeroHit, .SelWriteback, .SelCMOWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches @@ -225,7 +226,7 @@ module cache import cvw::*; #(parameter cvw_t P, .FlushStage, .CacheRW, .CacheAtomic, .Stall, .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, - .ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .SelWriteback, .SelCMOWriteback, .SelFlush, + .ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .CMOZeroHit, .SelWriteback, .SelCMOWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, .CMOp, .CacheEn, .LRUWriteEn); diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 8f0e7aa2d..be5616acd 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -60,6 +60,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic SetDirty, // Set the dirty bit in the selected way and set output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic ZeroCacheLine, // Write zeros to all bytes of cacheline + output logic CMOZeroHit, // CMOZ hit output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback output logic SelCMOWriteback, // Overrides cached tag check to select a specific way and set for writeback for both data and tag output logic LRUWriteEn, // Update the LRU state @@ -75,7 +76,10 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic AnyUpdateHit, AnyHit; logic AnyMiss; logic FlushFlag; - + logic CMOWritebackHit; + logic CMOZeroNoEviction; + logic CMOZeroEviction; + typedef enum logic [3:0]{STATE_READY, // hit states // miss states STATE_FETCH, @@ -93,8 +97,12 @@ module cachefsm import cvw::*; #(parameter cvw_t P, statetype CurrState, NextState; assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 + assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit + assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; + assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now + assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line + assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. @@ -117,8 +125,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READY: if(InvalidateCache) NextState = STATE_READY; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement - else if(AnyMiss) /* & LineDirty */ NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement - else if((CMOp[1] | CMOp[2]) & CacheHit) NextState = STATE_CMO_WRITEBACK; + else if(AnyMiss | CMOZeroEviction) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement + else if(CMOWritebackHit) NextState = STATE_CMO_WRITEBACK; else NextState = STATE_READY; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else if(CacheBusAck) NextState = STATE_READY; @@ -127,7 +135,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; else NextState = STATE_READY; // exclusion-tag-start: icache case - STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH; + STATE_WRITEBACK: if(CacheBusAck & ~CMOp[3]) NextState = STATE_FETCH; + else if(CacheBusAck) NextState = STATE_CMO_DONE; else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; @@ -139,6 +148,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_CMO_WRITEBACK: if(CacheBusAck & (CMOp[1] | CMOp[2])) NextState = STATE_CMO_DONE; else NextState = STATE_CMO_WRITEBACK; + STATE_CMO_DONE: if(Stall) NextState = STATE_CMO_DONE; + else NextState = STATE_READY; // exclusion-tag-end: icache case default: NextState = STATE_READY; endcase @@ -146,7 +157,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // com back to CPU assign CacheCommitted = (CurrState != STATE_READY) & ~(READ_ONLY_CACHE & (CurrState == STATE_READ_HOLD | CurrState == STATE_CMO_DONE)); - assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss | ((CMOp[1] | CMOp[2]) & CacheHit))) | // exclusion-tag: icache StallStates + assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss | CMOWritebackHit | CMOZeroEviction)) | // exclusion-tag: icache StallStates (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -154,21 +165,25 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK) | (CurrState == STATE_CMO_WRITEBACK); // write enables internal to cache + assign CMOZeroHit = CurrState == STATE_READY & CMOp[3] & CacheHit ; assign SetValid = CurrState == STATE_WRITE_LINE | - (CurrState == STATE_READY & CMOp[3]); // *** RT: NOT completely right has to be a hit + (CurrState == STATE_READY & CMOZeroNoEviction) | + (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); assign ClearValid = P.ZICBOM_SUPPORTED & ((CurrState == STATE_READY & CMOp[0] & CacheHit) | (CurrState == STATE_CMO_WRITEBACK & CMOp[2] & CacheBusAck)); // coverage off -item e 1 -fecexprrow 8 assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) | (CurrState == STATE_WRITE_LINE) & ~FlushStage; // exclusion-tag-start: icache flushdirtycontrols - assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOp[3])) | // exclusion-tag: icache SetDirty *** NOT completely right has to be a hit for CMOp[3] - (CurrState == STATE_WRITE_LINE & (CacheRW[0])); + assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty + (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | + (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty (CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. // Flush and eviction controls (P.ZICBOM_SUPPORTED & CurrState == STATE_CMO_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck); - assign ZeroCacheLine = CurrState == STATE_READY & CMOp[3]; // *** RT: NOT completely right + assign ZeroCacheLine = P.ZICBOZ_SUPPORTED & ((CurrState == STATE_READY & CMOZeroNoEviction) | + (CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck))); assign SelWriteback = (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_READY & AnyMiss & LineDirty); assign SelCMOWriteback = CurrState == STATE_CMO_WRITEBACK; @@ -188,7 +203,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // Bus interface controls assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses (CurrState == STATE_FETCH & ~CacheBusAck) | - (CurrState == STATE_WRITEBACK & CacheBusAck); + (CurrState == STATE_WRITEBACK & CacheBusAck & ~CMOp[3]); assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 85d2b36ab..216cd82d2 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -41,10 +41,10 @@ module cacheway import cvw::*; #(parameter cvw_t P, input logic SetValid, // Set the valid bit in the selected way and set input logic ClearValid, // Clear the valid bit in the selected way and set input logic SetDirty, // Set the dirty bit in the selected way and set - input logic ZeroCacheLine, // Write zeros to all bytes of a cache line + input logic CMOZeroHit, // Write zeros to all bytes of a cache line input logic ClearDirty, // Clear the dirty bit in the selected way and set input logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback - input logic SelCMOWriteback, // Overrides cached tag check to select a specific way and set for writeback for both data and tag + input logic SelCMOWriteback,// Overrides cached tag check to select a specific way and set for writeback for both data and tag input logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr input logic VictimWay, // LRU selected this way as victim to evict input logic FlushWay, // This way is selected for flush and possible writeback if dirty @@ -81,7 +81,9 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic SelNotHit2; if (P.ZICBOZ_SUPPORTED) begin : cbologic - assign SelNotHit2 = SetValid & ~(ZeroCacheLine & HitWay); + assign SelNotHit2 = SetValid & ~CMOZeroHit; + //assign SelNotHit2 = SetValid; + end else begin : cbologic assign SelNotHit2 = SetValid; end @@ -96,7 +98,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, // nonzero ways will never see SelFlush=0 while FlushWay=1 since FlushWay only advances on a subset of SelFlush assertion cases. assign FlushWayEn = FlushWay & SelFlush; // *** RT: This is slopy. I should refactor to have the fsm issue two types of writeback commands - assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; + assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; // *** this is not correct + //assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; end else begin:flushlogic // no flush operation for read-only caches. assign SelTag = VictimWay; diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag index 6b13612ce..bc5f454bb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag @@ -57,6 +57,7 @@ target_tests_nosim = \ WALLY-status-fp-enabled-01 \ WALLY-wfi-01 \ WALLY-cbom-01 \ + WALLY-cboz-01 \ # unclear why status-fp-enabled and wfi aren't simulating ok diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output new file mode 100644 index 000000000..644fa6f0b --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output @@ -0,0 +1,204 @@ +deadbeef # begin_signature +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +00000000 # destination 1 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 # destination 2 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies. +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +0bad0bad +0bad0bad +0bad0bad +0bad0bad diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S new file mode 100644 index 000000000..22b076261 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S @@ -0,0 +1,377 @@ +/////////////////////////////////////////// +// +// WALLY-cache-management-tests +// invalidate, clean, and flush +// +// Author: Rose Thompson +// +// Created 22 August 2023 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# Purpose: Tests the Zicboz cache instruction which all operate on cacheline +# granularity blocks of memory. The instruction cbo.zero allocates a cacheline +# and writes 0 to each byte. A dirty cacheline is overwritten, any data in main +# memory is over written. +# ----------- +# Copyright (c) 2020. RISC-V International. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# ----------- +# +# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension. +# + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV64I_Zicboz_Zicbom") +# Test code region +.section .text.init +.globl rvtest_entry_point + +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.zero) + +RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n") + +CBOZTest: + # *** TODO + # first need to discover the length of the cacheline. + # for now assume it is 64 bytes + + #addi sp, sp, -16 + #sd s0, 0(sp) + #sd ra, 8(sp) + + la s0, signature + + ################################################################################ + # Zero cache line hit overwrites + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Zero that region of memory + # 4. Verify the second region is all zero. + + # step 1 +CBOZTest_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 64 + jal ra, memcpy8 + + # step 2 +CBOZTest_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 64 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 3 +CBOZTest_zero_step3: + la a1, Destination1 + cbo.zero (a1) + la a1, Destination1+64 + cbo.zero (a1) + la a1, Destination1+128 + cbo.zero (a1) + la a1, Destination1+192 + cbo.zero (a1) + la a1, Destination1+256 + cbo.zero (a1) + la a1, Destination1+320 + cbo.zero (a1) + la a1, Destination1+384 + cbo.zero (a1) + la a1, Destination1+448 + cbo.zero (a1) + +CBOZTest_zero_step4: + # step 4 (should be zero) + la a0, ZeroData + la a1, Destination1 + li a2, 64 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + ################################################################################ + # Verify cbo.zero miss overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Flush that one line + # 4. Zero that one line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_miss_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 8 + jal ra, memcpy8 + + # step 2 +CBOZTest_miss_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 3 +CBOZTest_miss_zero_step3: + la a1, Destination1 + cbo.flush (a1) + cbo.zero (a1) + +CBOZTest_miss_zero_step4: + # step 4 (should be Invalid) + la a0, ZeroData + la a1, Destination1 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + ################################################################################ + # Verify cbo.zero miss with eviction overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Repeate 1 four times at 4KiB intervals + # 2. Then verify the second region has the same data + # 4. Zero each line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_eviction_zero_step1_0: + la a0, SourceData + la a1, Destination2 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 8 + jal ra, memcpy8 + + # step 3 +CBOZTest_eviction_zero_step3_0: + la a0, SourceData + la a1, Destination2 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 4 +CBOZTest_eviction_zero_step4: + la a1, Destination2 + cbo.zero (a1) + la a1, Destination2+4096 + cbo.zero (a1) + la a1, Destination2+8192 + cbo.zero (a1) + la a1, Destination2+12288 + cbo.zero (a1) + la a1, Destination2+16384 + cbo.zero (a1) + +CBOZTest_eviction_zero_step5_0: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_4096: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+4096 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_8192: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+8192 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_12288: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+12288 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_16384: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+16384 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + + #ld s0, 0(sp) + #ld ra, 8(sp) + #addi sp, sp, 16 + #ret +RVMODEL_HALT + + +.type memcpy8, @function +memcpy8: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy8_loop: + ld t3, 0(t0) + sd t3, 0(t1) + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_loop + ret + +.type memcmp8, @function +# returns which index mismatch, -1 if none +memcmp8: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp8_loop: + ld t3, 0(t0) + ld t4, 0(t1) + bne t3, t4, memcmp8_ne + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcmp8_loop + li a0, -1 + ret +memcmp8_ne: + mv a0, t2 + ret + +RVTEST_CODE_END + + +RVTEST_DATA_BEGIN +# Input data section. +#.data +.align 7 + +ZeroData: + .fill 128, 4, 0x0 +SourceData: + .int 0, 1, 2, 3, 4, 5, 6, 7 + .int 8, 9, 10, 11, 12, 13, 14, 15 + .int 16, 17, 18, 19, 20, 21, 22, 23 + .int 24, 25, 26, 27, 28, 29, 30, 31 + .int 32, 33, 34, 35, 36, 37, 38, 39 + .int 40, 41, 42, 43, 44, 45, 46, 47 + .int 48, 49, 50, 51, 52, 53, 54, 55 + .int 56, 57, 58, 59, 60, 61, 62, 63 + .int 64, 65, 66, 67, 68, 69, 70, 71 + .int 72, 73, 74, 75, 76, 77, 78, 79 + .int 80, 81, 82, 83, 84, 85, 86, 87 + .int 88, 89, 90, 91, 92, 93, 94, 95 + .int 96, 97, 98, 99, 100, 101, 102, 103 + .int 104, 105, 106, 107, 108, 109, 110, 111 + .int 112, 113, 114, 115, 116, 117, 118, 119 + .int 120, 121, 122, 123, 124, 125, 126, 127 + +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + .fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert + # 4 bytes. This needs to be aligned to a cacheline + + .align 6 +Destination1: + .fill 128, 4, 0xdeadbeef +Destination2: + .fill 16, 4, 0xdeadbeef +signature: + .fill 32, 4, 0x0bad0bad + +RVMODEL_DATA_END +