diff --git a/linux/testvector-generation/genTrace.gdb b/linux/testvector-generation/genTrace.gdb new file mode 100755 index 000000000..e69e28ec5 --- /dev/null +++ b/linux/testvector-generation/genTrace.gdb @@ -0,0 +1,25 @@ +define genTrace + # Arguments + set $tcpPort=$arg0 + set $vmlinux=$arg1 + + # GDB config + set pagination off + set logging overwrite on + set logging redirect on + set confirm off + + # Connect to QEMU session + eval "target extended-remote :%d",$tcpPort + + # Symbol Files + eval "file %s",$vmlinux + + # Run until Linux login prompt + b do_idle + ignore 1 2 + c + + kill + q +end diff --git a/linux/testvector-generation/genTrace.sh b/linux/testvector-generation/genTrace.sh new file mode 100755 index 000000000..82f9e48be --- /dev/null +++ b/linux/testvector-generation/genTrace.sh @@ -0,0 +1,43 @@ +#!/bin/bash +tcpPort=1234 +imageDir=$RISCV/buildroot/output/images +outDir=$RISCV/linux-testvectors +recordFile="$outDir/all.qemu" +traceFile="$outDir/all.txt" + +read -p "Warning: running this script will overwrite the contents of: + * $recordFile + * $traceFile +Would you like to proceed? (y/n) " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]] +then + # Create Output Directory + sudo mkdir -p $outDir + sudo chown cad $outDir + sudo touch $recordFile + sudo touch $traceFile + sudo chmod a+rw $recordFile + sudo chmod a+rw $traceFile + + # Compile Devicetree from Source + dtc -I dts -O dtb ../devicetree/virt-trimmed.dts > ../devicetree/virt-trimmed.dtb + + # QEMU Simulation + (qemu-system-riscv64 \ + -M virt -dtb ../devicetree/virt-trimmed.dtb \ + -nographic -serial /dev/null \ + -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio \ + -singlestep -rtc clock=vm -icount shift=0,align=off,sleep=on,rr=record,rrfile=$recordFile \ + -d nochain,cpu,in_asm \ + -gdb tcp::$tcpPort -S \ + 2>&1 >/dev/null | ./parseQemuToGDB.py | ./parseGDBtoTrace.py | ./remove_dup.awk > $traceFile) \ + & riscv64-unknown-elf-gdb -quiet -x genTrace.gdb -ex "genTrace $tcpPort \"$imageDir/vmlinux\"" + + # Cleanup + sudo chown cad $recordFile + sudo chown cad $traceFile + sudo chmod o-w $recordFile + sudo chmod o-w $traceFile +fi + diff --git a/linux/testvector-generation/parseGDBtoTrace.py b/linux/testvector-generation/parseGDBtoTrace.py new file mode 100755 index 000000000..ab63330fd --- /dev/null +++ b/linux/testvector-generation/parseGDBtoTrace.py @@ -0,0 +1,220 @@ +#! /usr/bin/python3 +import sys, fileinput, re + +# Ross Thompson +# July 27, 2021 +# Rewrite of the linux trace parser. + + +InstrStartDelim = '=>' +InstrEndDelim = '-----' + +#InputFile = 'noparse.txt' +#InputFile = sys.stdin +#InputFile = 'temp.txt' +#OutputFile = 'parsedAll.txt' + +HUMAN_READABLE = False + +def toDict(lst): + 'Converts the list of register values to a dictionary' + dct= {} + for item in lst: + regTup = item.split() + dct[regTup[0]] = int(regTup[2], 10) + del dct['pc'] + return dct + +def whichClass(text, Regs): + 'Which instruction class?' + #print(text, Regs) + if text[0:2] == 'ld' or text[0:2] == 'lw' or text[0:2] == 'lh' or text[0:2] == 'lb': + return ('load', WhatAddr(text, Regs), None, WhatMemDestSource(text)) + elif text[0:2] == 'sd' or text[0:2] == 'sw' or text[0:2] == 'sh' or text[0:2] == 'sb': + return ('store', WhatAddr(text, Regs), WhatMemDestSource(text), None) + elif text[0:3] == 'amo': + return ('amo', WhatAddrAMO(text, Regs), WhatMemDestSource(text), WhatMemDestSource(text)) + elif text[0:2] == 'lr': + return ('lr', WhatAddrLR(text, Regs), None, WhatMemDestSource(text)) + elif text[0:2] == 'sc': + return ('sc', WhatAddrSC(text, Regs), WhatMemDestSource(text), None) + else: + return ('other', None, None, None) + +def whatChanged(dct0, dct1): + 'Compares two dictionaries of instrution registers and indicates which registers changed' + dct = {} + for key in dct0: + if (dct1[key] != dct0[key]): + dct[key] = dct1[key] + return dct + +def WhatMemDestSource(text): + ''''What is the destination register. Used to compute where the read data is + on a load or the write data on a store.''' + return text.split()[1].split(',')[0] + +def WhatAddr(text, Regs): + 'What is the data memory address?' + Imm = text.split(',')[1] + (Imm, Src) = Imm.split('(') + Imm = int(Imm.strip(), 10) + Src = Src.strip(')').strip() + RegVal = Regs[Src] + return Imm + RegVal + +def WhatAddrAMO(text, Regs): + 'What is the data memory address?' + Src = text.split('(')[1] + Src = Src.strip(')').strip() + return Regs[Src] + +def WhatAddrLR(text, Regs): + 'What is the data memory address?' + Src = text.split('(')[1] + Src = Src.strip(')').strip() + return Regs[Src] + +def WhatAddrSC(text, Regs): + 'What is the data memory address?' + Src = text.split('(')[1] + Src = Src.strip(')').strip() + return Regs[Src] + +def PrintInstr(instr, fp): + if instr[2] == None: + return + ChangedRegisters = instr[4] + GPR = '' + CSR = [] + for key in ChangedRegisters: + # filter out csr which are not checked. + if(key in RegNumber): + if(RegNumber[key] < 32): + # GPR + if(HUMAN_READABLE): + GPR = '{:-2d} {:016x}'.format(RegNumber[key], ChangedRegisters[key]) + else: + GPR = '{:d} {:x}'.format(RegNumber[key], ChangedRegisters[key]) + else: + if(HUMAN_READABLE): + CSR.extend([key, '{:016x}'.format(ChangedRegisters[key])]) + else: + CSR.extend([key, '{:x}'.format(ChangedRegisters[key])]) + + CSRStr = ' '.join(CSR) + + #print(instr) + + if (HUMAN_READABLE == True): + fp.write('{:016x} {:08x} {:25s}'.format(instr[0], instr[1], instr[2])) + if(len(GPR) != 0): + fp.write(' GPR {}'.format(GPR)) + if(instr[3] == 'load' or instr[3] == 'lr'): + fp.write(' MemR {:016x} {:016x} {:016x}'.format(instr[5], 0, instr[7])) + if(instr[3] == 'store'): + fp.write('\t\t\t MemW {:016x} {:016x} {:016x}'.format(instr[5], instr[6], 0)) + + if(len(CSR) != 0): + fp.write(' CSR {}'.format(CSRStr)) + else: + fp.write('{:x} {:x} {:s}'.format(instr[0], instr[1], instr[2].replace(' ', '_'))) + if(len(GPR) != 0): + fp.write(' GPR {}'.format(GPR)) + if(instr[3] == 'load' or instr[3] == 'lr'): + fp.write(' MemR {:x} {:x} {:x}'.format(instr[5], 0, instr[7])) + if(instr[3] == 'store'): + fp.write(' MemW {:x} {:x} {:x}'.format(instr[5], instr[6], 0)) + + if(len(CSR) != 0): + fp.write(' CSR {}'.format(CSRStr)) + fp.write('\n') + +# reg number +RegNumber = {'zero': 0, 'ra': 1, 'sp': 2, 'gp': 3, 'tp': 4, 't0': 5, 't1': 6, 't2': 7, 's0': 8, 's1': 9, 'a0': 10, 'a1': 11, 'a2': 12, 'a3': 13, 'a4': 14, 'a5': 15, 'a6': 16, 'a7': 17, 's2': 18, 's3': 19, 's4': 20, 's5': 21, 's6': 22, 's7': 23, 's8': 24, 's9': 25, 's10': 26, 's11': 27, 't3': 28, 't4': 29, 't5': 30, 't6': 31, 'mhartid': 32, 'mstatus': 33, 'mip': 34, 'mie': 35, 'mideleg': 36, 'medeleg': 37, 'mtvec': 38, 'stvec': 39, 'mepc': 40, 'sepc': 41, 'mcause': 42, 'scause': 43, 'mtval': 44, 'stval': 45} +# initial state +CurrentInstr = ['0', '0', None, 'other', {'zero': 0, 'ra': 0, 'sp': 0, 'gp': 0, 'tp': 0, 't0': 0, 't1': 0, 't2': 0, 's0': 0, 's1': 0, 'a0': 0, 'a1': 0, 'a2': 0, 'a3': 0, 'a4': 0, 'a5': 0, 'a6': 0, 'a7': 0, 's2': 0, 's3': 0, 's4': 0, 's5': 0, 's6': 0, 's7': 0, 's8': 0, 's9': 0, 's10': 0, 's11': 0, 't3': 0, 't4': 0, 't5': 0, 't6': 0, 'mhartid': 0, 'mstatus': 0, 'mip': 0, 'mie': 0, 'mideleg': 0, 'medeleg': 0, 'mtvec': 0, 'stvec': 0, 'mepc': 0, 'sepc': 0, 'mcause': 0, 'scause': 0, 'mtval': 0, 'stval': 0}, {}, None, None, None] + +#with open (InputFile, 'r') as InputFileFP: +#lines = InputFileFP.readlines() +lineNum = 0 +StartLine = 0 +EndLine = 0 +numInstrs = 0 +#instructions = [] +MemAdr = 0 +lines = [] +interrupts=open('interrupts.txt','w') +interrupts.close() + +for line in fileinput.input('-'): + if line.startswith('riscv_cpu_do_interrupt'): + with open('interrupts.txt','a') as interrupts: + interrupts.write(str(numInstrs)+': '+line.strip('riscv_cpu_do_interrupt')) + break + lines.insert(lineNum, line) + if InstrStartDelim in line: + lineNum = 0 + StartLine = lineNum + elif InstrEndDelim in line: + EndLine = lineNum + (InstrBits, text) = lines[StartLine].split(':') + InstrBits = int(InstrBits.strip('=> '), 16) + text = text.strip() + PC = int(lines[StartLine+1].split(':')[0][2:], 16) + Regs = toDict(lines[StartLine+2:EndLine]) + (Class, Addr, WriteReg, ReadReg) = whichClass(text, Regs) + #print("CWR", Class, WriteReg, ReadReg) + PreviousInstr = CurrentInstr + + Changed = whatChanged(PreviousInstr[4], Regs) + + if (ReadReg !=None): ReadData = ReadReg + else: ReadData = None + + if (WriteReg !=None): WriteData = WriteReg + else: WriteData = None + + CurrentInstr = [PC, InstrBits, text, Class, Regs, Changed, Addr, WriteData, ReadData] + + #print(CurrentInstr[0:4], PreviousInstr[5], CurrentInstr[6:7], PreviousInstr[8]) + + # pc, instrbits, text and class come from the last line. + MoveInstrToRegWriteLst = PreviousInstr[0:4] + # updated registers come from the current line. + MoveInstrToRegWriteLst.append(CurrentInstr[5]) # destination regs + # memory address if present comes from the last line. + MoveInstrToRegWriteLst.append(PreviousInstr[6]) # MemAdrM + # write data from the previous line + #MoveInstrToRegWriteLst.append(PreviousInstr[7]) # WriteDataM + + if (PreviousInstr[7] != None): + MoveInstrToRegWriteLst.append(Regs[PreviousInstr[7]]) # WriteDataM + else: + MoveInstrToRegWriteLst.append(None) + + # read data from the current line + #MoveInstrToRegWriteLst.append(PreviousInstr[8]) # ReadDataM + if (PreviousInstr[8] != None): + MoveInstrToRegWriteLst.append(Regs[PreviousInstr[8]]) # ReadDataM + else: + MoveInstrToRegWriteLst.append(None) + + lines.clear() + #instructions.append(MoveInstrToRegWriteLst) + PrintInstr(MoveInstrToRegWriteLst, sys.stdout) + numInstrs +=1 + if (numInstrs % 1e4 == 0): + sys.stderr.write('Trace parser reached '+str(numInstrs/1.0e6)+' million instrs.\n') + sys.stderr.flush() + lineNum += 1 + + +#for instruction in instructions[1::]: + + +#with open(OutputFile, 'w') as OutputFileFP: +# print('opened file') + + + diff --git a/linux/testvector-generation/parseQemuToGDB.py b/linux/testvector-generation/parseQemuToGDB.py new file mode 100755 index 000000000..932761db7 --- /dev/null +++ b/linux/testvector-generation/parseQemuToGDB.py @@ -0,0 +1,148 @@ +#! /usr/bin/python3 +import fileinput, sys + +sys.stderr.write("reminder: parse_qemu.py takes input from stdin\n") +parseState = "idle" +beginPageFault = 0 +inPageFault = 0 +endPageFault = 0 +CSRs = {} +pageFaultCSRs = {} +regs = {} +pageFaultRegs = {} +instrs = {} +instrCount = 0 +returnAdr = 0 + +def printPC(l): + global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs, instrCount + if not inPageFault: + inst = l.split() + if len(inst) > 3: + print(f'=> {inst[1]}:\t{inst[2]} {inst[3]}') + else: + print(f'=> {inst[1]}:\t{inst[2]}') + print(f'{inst[0]} 0x{inst[1]}') + instrCount += 1 + if ((instrCount % 100000) == 0): + sys.stderr.write("QEMU parser reached "+str(instrCount)+" instrs\n") + +def printCSRs(): + global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs + if not inPageFault: + for (csr,val) in CSRs.items(): + print('{}{}{:#x} {}'.format(csr, ' '*(15-len(csr)), val, val)) + print('-----') + +def parseCSRs(l): + global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs + if l.strip() and (not l.startswith("Disassembler")) and (not l.startswith("Please")): + # If we've hit the register file + if l.startswith(' x0/zero'): + parseState = "regFile" + if not inPageFault: + instr = instrs[CSRs["pc"]] + printPC(instr) + parseRegs(l) + # If we've hit a CSR + else: + csr = l.split()[0] + val = int(l.split()[1],16) + # Commented out this conditional because the pageFault instrs don't corrupt CSRs + #if inPageFault: + # Not sure if these CSRs should be updated or not during page fault. + #if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"): + # We do update some CSRs + # CSRs[csr] = val + #else: + # Others we preserve until changed later + # pageFaultCSRs[csr] = val + #elif pageFaultCSRs and (csr in pageFaultCSRs): + # if (val != pageFaultCSRs[csr]): + # del pageFaultCSRs[csr] + # CSRs[csr] = val + #else: + # CSRs[csr] = val + # + # However SEPC and STVAL do get corrupted upon exiting + if endPageFault and ((csr == 'sepc') or (csr == 'stval')): + CSRs[csr] = returnAdr + pageFaultCSRs[csr] = val + elif pageFaultCSRs and (csr in pageFaultCSRs): + if (val != pageFaultCSRs[csr]): + del pageFaultCSRs[csr] + CSRs[csr] = val + else: + CSRs[csr] = val + +def parseRegs(l): + global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs, pageFaultRegs + if "pc" in l: + printCSRs() + # New non-disassembled instruction + parseState = "CSRs" + parseCSRs(l) + elif l.startswith('--------'): + # End of disassembled instruction + printCSRs() + parseState = "idle" + else: + s = l.split() + for i in range(0,len(s),2): + if '/' in s[i]: + reg = s[i].split('/')[1] + val = int(s[i+1], 16) + if inPageFault: + pageFaultRegs[reg] = val + else: + if pageFaultRegs and (reg in pageFaultRegs): + if (val != pageFaultRegs[reg]): + del pageFaultRegs[reg] + regs[reg] = val + else: + regs[reg] = val + val = regs[reg] + print('{}{}{:#x} {}'.format(reg, ' '*(15-len(reg)), val, val)) + else: + sys.stderr.write("Whoops. Expected a list of reg file regs; got:\n"+l) + +############# +# Main Code # +############# +interrupt_line="" +for l in fileinput.input(): + #sys.stderr.write(l) + if l.startswith('riscv_cpu_do_interrupt'): + sys.stderr.write(l) + interrupt_line = l.strip('\n') + continue + elif l.startswith('qemu-system-riscv64: QEMU: Terminated via GDBstub'): + break + elif l.startswith('IN:'): + # New disassembled instr + if len(interrupt_line)>0: + print(interrupt_line) + interrupt_line="" + parseState = "instr" + elif (parseState == "instr") and l.startswith('0x'): + # New instruction + if len(interrupt_line)>0: + print(interrupt_line) + interrupt_line="" + if "out of bounds" in l: + sys.stderr.write("Detected QEMU page fault error\n") + beginPageFault = not inPageFault + if beginPageFault: + returnAdr = int(l.split()[0][2:-1], 16) + sys.stderr.write('Saving SEPC of '+hex(returnAdr)+'\n') + inPageFault = 1 + else: + endPageFault = inPageFault + inPageFault = 0 + adr = int(l.split()[0][2:-1], 16) + instrs[adr] = l + parseState = "CSRs" + elif parseState == "CSRs": + parseCSRs(l) + elif parseState == "regFile": + parseRegs(l) diff --git a/linux/testvector-generation/remove_dup.awk b/linux/testvector-generation/remove_dup.awk new file mode 100755 index 000000000..7963d76a6 --- /dev/null +++ b/linux/testvector-generation/remove_dup.awk @@ -0,0 +1,20 @@ +#!/usr/bin/awk -f + +BEGIN{ + old = "first" +} + +{ + if($1 != old){ + if(old != "first"){ + print oldAll + } + } + old=$1 + oldAll=$0 +} + +END{ + print oldAll +} +