From 9747d122d2e056a8639902e4a0e3061e618f3caf Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Sep 2023 12:56:36 -0700 Subject: [PATCH] tlbNAPOT hangs due to PBMT making instruction memory uncacheable, and spill logic not working there. Fixed TLBLRU to update recently used on TLBHit rather than CAMHit. Moved coverage exclusions to proper line in pmachecker --- sim/coverage | 2 ++ src/mmu/pmachecker.sv | 4 +-- src/mmu/tlb/tlb.sv | 2 +- src/mmu/tlb/tlblru.sv | 4 +-- tests/coverage/tlbNAPOT.S | 68 ++++++++++++++++++++++----------------- 5 files changed, 46 insertions(+), 34 deletions(-) create mode 100755 sim/coverage diff --git a/sim/coverage b/sim/coverage new file mode 100755 index 000000000..038253911 --- /dev/null +++ b/sim/coverage @@ -0,0 +1,2 @@ +# recompile coverage tests and run coverage including them +pushd $WALLY/tests/coverage; make; popd; ./regression-wally -coverage diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 587f54dbd..119e88d8c 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -62,8 +62,8 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent - assign IdempotentRegion = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[7] | SelRegions[6]; - assign Idempotent = (PBMemoryType == 2'b00) ? IdempotentRegion : (PBMemoryType == 2'b01); // exclusion-tag: unused-idempotent + assign IdempotentRegion = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[7] | SelRegions[6]; // exclusion-tag: unused-idempotent + assign Idempotent = (PBMemoryType == 2'b00) ? IdempotentRegion : (PBMemoryType == 2'b01); // Atomic operations are only allowed on RAM assign AtomicAllowed = SelRegions[10] | SelRegions[8] | SelRegions[6]; // exclusion-tag: unused-atomic diff --git a/src/mmu/tlb/tlb.sv b/src/mmu/tlb/tlb.sv index 8910f7d53..9619c958d 100644 --- a/src/mmu/tlb/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -110,7 +110,7 @@ module tlb import cvw::*; #(parameter cvw_t P, .TLBMiss, .TLBHit, .TLBPageFault, .UpdateDA, .SV39Mode, .Translate, .PTE_N, .PBMemoryType); - tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); + tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .TLBHit, .WriteEnables); tlbcam #(P, TLB_ENTRIES, P.VPN_BITS + P.ASID_BITS, P.VPN_SEGMENT_BITS) tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, .PTE_NAPOTs, .SATP_ASID, .Matches, .HitPageType, .CAMHit); diff --git a/src/mmu/tlb/tlblru.sv b/src/mmu/tlb/tlblru.sv index 4cabb33ab..18014155a 100644 --- a/src/mmu/tlb/tlblru.sv +++ b/src/mmu/tlb/tlblru.sv @@ -32,7 +32,7 @@ module tlblru #(parameter TLB_ENTRIES = 8) ( input logic TLBWrite, input logic TLBFlush, input logic [TLB_ENTRIES-1:0] Matches, - input logic CAMHit, + input logic TLBHit, output logic [TLB_ENTRIES-1:0] WriteEnables ); @@ -50,5 +50,5 @@ module tlblru #(parameter TLB_ENTRIES = 8) ( assign RUBitsAccessed = AccessLines | RUBits; assign AllUsed = &RUBitsAccessed; // if all recently used, then clear to none assign RUBitsNext = AllUsed ? 0 : RUBitsAccessed; - flopenr #(TLB_ENTRIES) lrustate(clk, reset, (CAMHit | TLBWrite), RUBitsNext, RUBits); + flopenr #(TLB_ENTRIES) lrustate(clk, reset, (TLBHit | TLBWrite), RUBitsNext, RUBits); endmodule diff --git a/tests/coverage/tlbNAPOT.S b/tests/coverage/tlbNAPOT.S index 282420be9..09dc2abcd 100644 --- a/tests/coverage/tlbNAPOT.S +++ b/tests/coverage/tlbNAPOT.S @@ -31,6 +31,9 @@ # run-elf.bash find this in project description main: + li t5, 0x1 + slli t5, t5, 62 + csrs menvcfg, t5 # Page table root address at 0x80010000; SV48 li t5, 0x9000000000080010 csrw satp, t5 @@ -42,29 +45,24 @@ main: ecall li t4, 0x200000 # address step size - li t0, 0x80215240 # Test NAPOT pages + li a2, 0x80215240 # Test NAPOT pages jal a1, looptest - li t0, 0x80215240 # Test NAPOT pages + li a2, 0xC0215240 # Test ill-formed NAPOT pages jal a1, looptest - li t0, 0xC0215240 # Test ill-formed NAPOT pages - jal a1, looptest - li t0, 0xC0215240 # Test ill-formed NAPOT pages - jal a1, looptest - li t0, 0x40215240 # Test properly formed pages with 1 in PPN[3] that are not NAPOT - jal a1, looptest - li t0, 0x40215240 # Test properly formed pages with 1 in PPN[3] that are not NAPOT - jal a1, looptest - li t4, 0x1000 # address step size - li t0, 0x80216000 # Test NAPOT pages + li a2, 0x40215240 # Test properly formed pages with 1 in PPN[3] that are not NAPOT jal a1, looptest +# li t4, 0x1000 # address step size +# li a2, 0x80216000 # Test NAPOT pages +# jal a1, looptest j done looptest: + mv t0, a2 # base address li t2, 0 # i = 0 li t3, 35 # Max amount of Loops = 34 li t5, 0x8082 # return instruction opcode -loop: bge t2, t3, finished # exit loop if i >= loops +loop: bge t2, t3, looptesti # exit loop if i >= loops sw t5, 0(t0) # store a return at this address to exercise DTLB lw t1, 0(t0) # read it back fence.i # synchronize with I$ @@ -73,6 +71,18 @@ loop: bge t2, t3, finished # exit loop if i >= loops addi t2, t2, 1 j loop +looptesti: + mv t0, a2 # base address + li t2, 0 # i = 0 + fence.i # synchronize with I$ + +# Exercise itlb by jumping to each of the return statements +loopi: bge t2, t3, finished # exit loop if i >= loops + jalr ra, t0 # jump to the return statement to exercise the ITLB + add t0, t0, t4 + addi t2, t2, 1 + j loopi + finished: jr a1 @@ -135,25 +145,25 @@ pagetable: # Leaf page table at 0x80013000 with NAPOT pages .align 12 #80000000 - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF - .8byte 0x80000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF + .8byte 0xA0000000200020CF .8byte 0x80000000200060CF .8byte 0x80000000200060CF