Moved the simple branch predictor custom test into the gshare.S

coverage test.
2025-04-22 04:47:41 -04:00 · 2024-10-02 16:11:54 -05:00 · 2024-10-02 16:11:54 -05:00 · ebdef07301
commit ebdef07301
parent a56ef9e267
8 changed files with 51 additions and 601 deletions
--- a/tests/custom/simple/global_hist_test.s
+++ b/tests/custom/simple/global_hist_test.s
@ -1,3 +1,54 @@
+///////////////////////////////////////////
+// gshare.S
+//
+// Written: Rose Thompson rose@rosethompson.net
+//
+// Purpose: basic check that global history and gshare branch npredictors are working as expected.  Requires manual inspection.
+// TODO: *** Automate checking prediction accuracy.
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-24 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// load code to initalize stack, handle interrupts, terminate
+
+#include "WALLY-init-lib.h"
+
+# run-elf.bash find this in project description
+main:
+
+        addi sp, sp, 8
+        sd ra, 0(sp)
+        jal ra, oneLoopTest
+        jal ra, global_hist_6_space_test
+        jal ra, global_hist_4_space_test
+        jal ra, global_hist_3_space_test
+        jal ra, global_hist_2_space_test
+        jal ra, global_hist_1_space_test
+        jal ra, global_hist_0_space_test
+        
+        fence.I
+
+finished:
+    j done
+
+        .data
+
 .section .text
 .globl oneLoopTest
 .type oneLoopTest, @function
--- a/tests/custom/simple/Makefile
+++ b/tests/custom/simple/Makefile
@ -1,19 +0,0 @@
-TARGETDIR	:= simple
-TARGET		:= $(TARGETDIR)/$(TARGETDIR).elf
-ROOT		:= ..
-LIBRARY_DIRS	:= ${ROOT}/crt0
-LIBRARY_FILES	:= crt0
-
-MARCH           :=-march=rv64imfdczicbom
-MABI            :=-mabi=lp64d
-LINKER          := ${ROOT}/linker8000-0000.x
-LINK_FLAGS      :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map -L $(RISCV)/riscv64-unknown-elf/lib
-
-CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany  -mstrict-align -O2
-CC=riscv64-unknown-elf-gcc
-DA=riscv64-unknown-elf-objdump -d
-
-
-include $(ROOT)/makefile.inc
-
-
--- a/tests/custom/simple/cbom.s
+++ b/tests/custom/simple/cbom.s
@ -1,449 +0,0 @@
-# Written: ross1728@gmail.com Rose Thompson 17 August 2023
-# Modified: 
-# Purpose: Tests the 3 Zicbom cache instructions which all operate on cacheline
-#          granularity blocks of memory.  Invalidate: Clears valid and dirty bits
-#          and does not write back.  Clean: Writes back dirty cacheline if needed
-#          and clears dirty bit.  Does NOT clear valid bit.  Flush:  Cleans and then
-#          Invalidates.  These operations apply to all caches in the memory system.
-#          The tests are divided into three parts one for the data cache, instruction cache
-#          and checks to verify the uncached regions of memory cause exceptions.
-# -----------
-# Copyright (c) 2020. RISC-V International. All rights reserved.
-# SPDX-License-Identifier: BSD-3-Clause
-# -----------
-#
-# This assembly file tests the fence.i instruction of the RISC-V Zifencei  extension.
-# 
-
-.section .text
-.globl CBOMTest
-.type CBOMTest, @function
-CBOMTest:
-        # *** TODO
-        # first need to discover the length of the cacheline.
-        # for now assume it is 64 bytes
-
-        addi sp, sp, -16
-        sd s0, 0(sp)
-        sd ra, 8(sp)
-
-	la s0, signature
-
-        ################################################################################
-        # INVALIDATE  D$
-        ################################################################################
-
-        # theory of operation
-        # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory
-        # 2. Then verify the second region has the same data
-        # 3. Invalidate the second region
-        # 4. Verify the second region has the original invalid data
-        # DON'T batch each step.  We want to see the transition between cachelines. The current should be invalidated
-        # but the next should have the copied data.
-
-        # step 1
-CBOMTest_inval_step1: 
-        la a0, SourceData
-        la a1, Destination1
-        li a2, 64
-        jal ra, memcpy8
-
-        # step 2
-CBOMTest_inval_step2: 
-        la a0, SourceData
-        la a1, Destination1
-        li a2, 64
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-
-        # step 3
-CBOMTest_inval_step3: 
-        la a1, Destination1
-        cbo.inval (a1)
-        # step 4  (should be Invalid)
-        la a0, DeadBeafData1
-        la a1, Destination1
-        li a2, 8
-        jal ra, memcmp8
-        sd a0, 0(s0)    # should be -1
-        addi s0, s0, 8
-
-        # step 4 next line (should still be valid)
-CBOMTest_inval_step4: 
-        la a0, SourceData+64
-        la a1, Destination1+64
-        li a2, 8
-        jal ra, memcmp8
-        sd a0, 0(s0)    # should be -1
-        addi s0, s0, 8
-
-        # step 3 (Invalidate all remaining lines)
-CBOMTest_inval_step3_all: 
-        la a1, Destination1+64
-        cbo.inval (a1)
-        cbo.inval (a1)          # verify invalidating an already non present line does not cause an issue.
-        la a1, Destination1+128
-        cbo.inval (a1)
-        la a1, Destination1+192
-        cbo.inval (a1)
-        la a1, Destination1+256
-        cbo.inval (a1)
-        la a1, Destination1+320
-        cbo.inval (a1)
-        la a1, Destination1+384
-        cbo.inval (a1)
-        la a1, Destination1+448
-        cbo.inval (a1)
-	
-        # step 4 All should be invalid
-CBOMTest_inval_step4_all: 
-        la a0, DeadBeafData1
-        la a1, Destination1
-        li a2, 64
-        jal ra, memcmp8
-        sd a0, 0(s0)    # should be -1
-        addi s0, s0, 8
-
-        ################################################################################
-        # Clean D$
-        ################################################################################
-
-        # theory of operation
-        # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory
-        # 2. Then verify the second region has the same data
-        # 3. Invalidate the second region
-        # 4. Verify the second region has the original invalid data
-        # 5. Repeat step 1
-        # 6. Clean cachelines
-        # 7. Verify the second region has the same data
-        # 8. Invalidate the second region
-        # 9. Verify again but this time it should contain the same data
-        # DON'T batch each step.  We want to see the transition between cachelines. The current should be invalidated
-        # but the next should have the copied data.
-        
-        # step 1
-CBOMTest_clean_step1: 
-        la a0, SourceData
-        la a1, Destination2
-        li a2, 64
-        jal ra, memcpy8
-
-        # step 2
-CBOMTest_clean_step2:
-         la a0, SourceData
-        la a1, Destination2
-        li a2, 64
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-
-        # step 3
-CBOMTest_clean_step3:
-        la a1, Destination2
-        cbo.inval (a1)
-        la a1, Destination2+64
-        cbo.inval (a1)
-        la a1, Destination2+128
-        cbo.inval (a1)
-        la a1, Destination2+192
-        cbo.inval (a1)
-        la a1, Destination2+256
-        cbo.inval (a1)
-        la a1, Destination2+320
-        cbo.inval (a1)
-        la a1, Destination2+384
-        cbo.inval (a1)
-        la a1, Destination2+448
-        cbo.inval (a1)
-        cbo.inval (a1)
-        cbo.inval (a1)
-        cbo.inval (a1)
-        cbo.inval (a1)
-        cbo.inval (a1)
-        cbo.inval (a1)
-        cbo.inval (a1)
-        cbo.inval (a1)
-        cbo.inval (a1)
-
-        # step 4 All should be invalid
-CBOMTest_clean_step4:
-        la a0, DeadBeafData1
-        la a1, Destination2
-        li a2, 64
-        jal ra, memcmp8
-        sd a0, 0(s0)    # should be -1
-        addi s0, s0, 8
-        
-        # step 5
-CBOMTest_clean_step5:
-        la a0, SourceData
-        la a1, Destination2
-        li a2, 64
-        jal ra, memcpy8
-
-        # step 6  only clean 1 line
-CBOMTest_clean_step6:
-        la a1, Destination2
-        cbo.clean (a1)
-	
-        # step 7  only check that 1 line
-CBOMTest_clean_step7:
-        la a0, SourceData
-        la a1, Destination2
-        li a2, 8
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-
-        # step 8  invalidate that 1 line and the next
-CBOMTest_clean_step8:
-        la a1, Destination2
-        cbo.inval (a1)
-        la a1, Destination2+64
-        cbo.inval (a1)
-	
-        # step 9  that 1 line should contain the valid data
-CBOMTest_clean_step9_line1:
-        la a0, SourceData
-        la a1, Destination2
-        li a2, 8
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-
-        # step 9 the next should contain the invalid data
-CBOMTest_clean_step9_line2:
-        la a0, DeadBeafData1
-        la a1, Destination2+64
-        li a2, 8
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-
-        # step 5 # now recopy the one we just corrupted
-CBOMTest_clean_step5_recopy_line2:
-        la a0, SourceData+64
-        la a1, Destination2+64
-        li a2, 8
-        jal ra, memcpy8
-
-        # step 6 # clean the remaining
-CBOMTest_clean_step6_clean_all:
-        la a1, Destination2+64
-        cbo.clean (a1)
-        la a1, Destination2+128
-        cbo.clean (a1)
-        la a1, Destination2+192
-        cbo.clean (a1)
-        la a1, Destination2+256
-        cbo.clean (a1)
-        la a1, Destination2+320
-        cbo.clean (a1)
-        la a1, Destination2+384
-        cbo.clean (a1)
-        la a1, Destination2+448
-        cbo.clean (a1)
-        cbo.clean (a1)
-        cbo.clean (a1)
-        cbo.clean (a1)
-        cbo.clean (a1)
-        cbo.clean (a1)
-        cbo.clean (a1)
-        cbo.clean (a1)
-        cbo.clean (a1)
-	
-        # step 8 # invalidate all remaining
-CBOMTest_clean_step7_invalidate_all:
-        la a1, Destination2
-        cbo.inval (a1)
-        la a1, Destination2+64
-        cbo.inval (a1)
-        la a1, Destination2+128
-        cbo.inval (a1)
-        la a1, Destination2+192
-        cbo.inval (a1)
-        la a1, Destination2+256
-        cbo.inval (a1)
-        la a1, Destination2+320
-        cbo.inval (a1)
-        la a1, Destination2+384
-        cbo.inval (a1)
-        la a1, Destination2+448
-        cbo.inval (a1)
-	
-        # step 9 # check all
-CBOMTest_clean_step9_check_all:
-        la a0, SourceData
-        la a1, Destination2
-        li a2, 64
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-
-        ################################################################################
-        # Flush D$ line
-        ################################################################################
-
-        # theory of operation
-        # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory
-        # 2. Then verify the second region has the same data
-        # 3. For flush there is no way to create a negative control. We will flush 1 cache line
-        # 4. Verify whole region
-        # 5. Flush the remaining lines
-        # 6. Verify whole region
-
-        # step 1
-CBOMTest_flush_step1: 
-        la a0, SourceData
-        la a1, Destination3
-        li a2, 64
-        jal ra, memcpy8
-
-        # step 2 All should be valid
-CBOMTest_flush_step2_verify:
-        la a0, SourceData
-        la a1, Destination3
-        li a2, 64
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-
-        # step 3 # flush 1 line
-CBOMTest_flush_step3:
-        la a1, Destination3
-        cbo.flush (a1)
-
-        # step 4 
-CBOMTest_flush_step4_verify:
-        la a0, SourceData
-        la a1, Destination3
-        li a2, 64
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-
-        # step 5
-CBOMTest_flush_step5_flush_all:
-        la a1, Destination3
-        cbo.flush (a1)
-        la a1, Destination3+64
-        cbo.flush (a1)
-        la a1, Destination3+128
-        cbo.flush (a1)
-        la a1, Destination3+192
-        cbo.flush (a1)
-        la a1, Destination3+256
-        cbo.flush (a1)
-        la a1, Destination3+320
-        cbo.flush (a1)
-        la a1, Destination3+384
-        cbo.flush (a1)
-        la a1, Destination3+448
-        cbo.flush (a1)
-        cbo.flush (a1)
-        cbo.flush (a1)
-        cbo.flush (a1)
-        cbo.flush (a1)
-        cbo.flush (a1)
-	
-        # step 6
-CBOMTest_flush_step6_verify:
-        la a0, SourceData
-        la a1, Destination3
-        li a2, 64
-        jal ra, memcmp8
-        sd a0, 0(s0)     # should be -1
-        addi s0, s0, 8
-        
-
-        ld s0, 0(sp)
-        ld ra, 8(sp)
-        addi sp, sp, 16
-        ret
-
-	
-.section .text
-.type memcpy8, @function
-memcpy8:        
-        # a0 is the source
-        # a1 is the dst
-        # a2 is the number of 8 byte words
-        mv t0, a0
-        mv t1, a1
-        li t2, 0
-memcpy8_loop:   
-        ld t3, 0(t0)
-        sd t3, 0(t1)
-        addi t0, t0, 8
-        addi t1, t1, 8
-        addi t2, t2, 1
-        blt t2, a2, memcpy8_loop
-        ret
-
-.section .text
-.type memcmp8, @function
-# returns which index mismatch, -1 if none
-memcmp8:        
-        # a0 is the source1
-        # a1 is the source2
-        # a2 is the number of 8 byte words
-        mv t0, a0
-        mv t1, a1
-        li t2, 0
-memcmp8_loop:
-        ld t3, 0(t0)
-        ld t4, 0(t1)
-        bne t3, t4, memcmp8_ne
-        addi t0, t0, 8
-        addi t1, t1, 8
-        addi t2, t2, 1
-        blt t2, a2, memcmp8_loop
-        li a0, -1
-        ret
-memcmp8_ne:
-        mv a0, t2
-        ret
-
-        
-
-.data
-.align 7
-
-DeadBeafData1:
-        .fill 64, 8, 0xdeadbeefdeadbeef
-SourceData:
-        .int 0, 1, 2, 3, 4, 5, 6, 7
-        .int 8, 9, 10, 11, 12, 13, 14, 15
-        .int 16, 17, 18, 19, 20, 21, 22, 23
-        .int 24, 25, 26, 27, 28, 29, 30, 31
-        .int 32, 33, 34, 35, 36, 37, 38, 39
-        .int 40, 41, 42, 43, 44, 45, 46, 47
-        .int 48, 49, 50, 51, 52, 53, 54, 55
-        .int 56, 57, 58, 59, 60, 61, 62, 63
-        .int 64, 65, 66, 67, 68, 69, 70, 71
-        .int 72, 73, 74, 75, 76, 77, 79, 79
-        .int 80, 81, 82, 83, 84, 85, 86, 87
-        .int 88, 89, 90, 91, 92, 93, 94, 95
-        .int 96, 97, 98, 99, 100, 101, 102, 103
-        .int 104, 105, 106, 107, 108, 109, 110, 111
-        .int 112, 113, 114, 115, 116, 117, 118, 119
-        .int 120, 121, 122, 123, 124, 125, 126, 127
-
-Destination1:
-        .fill 64, 8, 0xdeadbeefdeadbeef
-Destination2:   
-        .fill 64, 8, 0xdeadbeefdeadbeef
-Destination3:
-        .fill 64, 8, 0xdeadbeefdeadbeef
-Destination4:
-        .fill 64, 8, 0xdeadbeefdeadbeef
-
-signature:
-        .fill 16, 8, 0x0bad0bad0bad0bad
-
-
-ExceptedSignature:
-        .fill 13, 8, 0xFFFFFFFFFFFFFFFF
-        .fill 3,  8, 0x0bad0bad0bad0bad
-        
--- a/tests/custom/simple/fail.s
+++ b/tests/custom/simple/fail.s
@ -1,11 +0,0 @@
-# Rose Thompson
-# March 17, 2021
-# Oklahoma State University
-
-.section .text	
-.global fail
-.type fail, @function
-fail:
-	li gp, 1
-	li a0, -1
-	ecall
--- a/tests/custom/simple/header.h
+++ b/tests/custom/simple/header.h
@ -1,16 +0,0 @@
-#ifndef __header
-#define __header
-
-int fail();
-int simple_csrbr_test();
-int lbu_test();
-int icache_spill_test();
-void global_hist_0_space_test();
-void global_hist_1_space_test();
-void global_hist_2_space_test();
-void global_hist_3_space_test();
-void global_hist_4_space_test();
-void global_hist_6_space_test();
-void oneLoopTest();
-void CBOMTest();
-#endif
--- a/tests/custom/simple/lbu_test.s
+++ b/tests/custom/simple/lbu_test.s
@ -1,19 +0,0 @@
-.section .text	
-.global lbu_test
-.type lbu_test, @function
-
-lbu_test:
-
-	li t0, 0x80000
-	lbu t1, 0(t0)
-
-
-pass:
-	li a0, 0
-done:
-	ret
-
-fail:
-	li a0, -1
-	j done
-	
--- a/tests/custom/simple/main.c
+++ b/tests/custom/simple/main.c
@ -1,25 +0,0 @@
-#include "header.h"
-
-int main(){
-  //int res = icache_spill_test();
-  oneLoopTest();
-  global_hist_6_space_test();  
-  global_hist_4_space_test();  
-  global_hist_3_space_test();  
-  global_hist_2_space_test();
-  global_hist_1_space_test();
-  global_hist_0_space_test();
-  CBOMTest();
-  int res = 1;
-  if (res < 0) {
-    fail();
-    return 0;
-  }else {
-    if((res = lbu_test()) < 0) {
-      fail();
-      return 0;
-    }
-    res = simple_csrbr_test();
-    return 0;
-  }
-}
--- a/tests/custom/simple/sample.s
+++ b/tests/custom/simple/sample.s
@ -1,62 +0,0 @@
-.section .text	
-.global simple_csrbr_test
-.type simple_csrbr_test, @function
-
-simple_csrbr_test:	
-
-	# step 1 enable the performance counters
-	# by default the hardware enables all performance counters
-	# however we will eventually want to manually enable incase
-	# some other code disables them
-
-	# br count is counter 5
-	# br mp count is counter 4
-	li t0, 0x30
-	
-	csrrc x0, 0x320, t0  # clear bits 4 and 5 of inhibit register.
-
-	# step 2 read performance counters into general purpose registers
-
-	csrrw t2, 0xB05, x0 # t2 = BR COUNT (perf count 5)
-	csrrw t3, 0xB04, x0 # t3 = BRMP COUNT (perf count 4)
-
-	# step 3 simple loop to show the counters are updated.
-	li t0, 0   # this is the loop counter
-	li t1, 100 # this is the loop end condition
-
-	# for(t1 = 0; t1 < t0; t1++);
-
-loop:	
-	addi t0, t0, 1
-	blt t0, t1, loop
-
-loop_done:	
-	
-	# step 2 read performance counters into general purpose registers
-
-	csrrw t4, 0xB05, x0 # t4 = BR COUNT (perf count 5)
-	csrrw t5, 0xB04, x0 # t5 = BRMP COUNT (perf count 4)
-
-	sub t2, t4, t2 # this is the number of branch instructions committed.
-	sub t3, t5, t3 # this is the number of branch mispredictions committed.
-
-	# now check if the branch count equals 100 and if the branch
-	bne t4, t2, fail
-	li  t5, 3
-	bne t3, t5, fail
-	
-pass:
-	li a0, 0
-done:
-	li t0, 0x30
-	csrrs x0, 0x320, t0  # set bits 4 and 5
-	ecall
-	ret
-
-fail:
-	li a0, -1
-	j done
-
-.data 
-sample_data:
-.int 0