mirror of
https://github.com/openhwgroup/cvw.git
synced 2025-04-23 05:17:20 -04:00
Merge branch 'openhwgroup:main' into add-linux
This commit is contained in:
commit
39ac6be103
119 changed files with 3081 additions and 2638 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -111,4 +111,5 @@ sim/imperas.log
|
|||
sim/results-error/
|
||||
sim/test1.rep
|
||||
sim/vsim.log
|
||||
tests/coverage/*.S
|
||||
tests/coverage/*.elf
|
||||
*.elf.memfile
|
333
Install
333
Install
|
@ -1,333 +0,0 @@
|
|||
Complete Wally Installation guide
|
||||
Formally RISC-V System on Chip Design Appendix D
|
||||
|
||||
Sections:
|
||||
1. RISC-V Tool Installation (Sys Admin)
|
||||
2. Core-v-wally Repo Installation
|
||||
3. Build and Run Regression Tests
|
||||
|
||||
Section 1 tool install should be done once by a system admin with root access. The specific details may need to be
|
||||
adjusted as some tools may already be present on the system. This guide assumes all compiled from source tools are
|
||||
installed at base diretory $RISCV.
|
||||
|
||||
* Tool-chain Installation (Sys Admin)
|
||||
|
||||
** TL;DR Open Source Tool-chain Installation
|
||||
|
||||
The installing details are involved, but can be skipped using the following script. wally-tool-chain-install.sh installs the open source tools to RISCV=/opt/riscv by default. Change by supplying an alternate path as an argument, (ie. wally-tool-chain-install.sh /mnt/disk1/riscv).
|
||||
This install script does NOT install buildroot or commercial EDA tools; Questa, Design Compiler, or Innovus.
|
||||
It must be run as root or with sudo.
|
||||
This script is tested for Ubuntu, 20.04 and 22.04
|
||||
|
||||
wally-tool-chain-install.sh
|
||||
|
||||
The step by step instructions include Red Hat 8 / Fedora.
|
||||
|
||||
** Detailed Tool-chain Instal Guide
|
||||
Section 2.1 described Wally platform requirements and Section 2.2 describes how a user gets started using Wally on a Linux server. This appendix describes how the system administrator installs RISC-V tools. Superuser privileges are necessary for many of the tools. Setting up all of the tools can be time-consuming and fussy, so this appendix also describes a fallback flow with Docker and Podman.
|
||||
|
||||
*** Open Source Software Installation
|
||||
|
||||
Compiling, assembling, and simulating RISC-V programs requires downloading and installing the following free tools:
|
||||
|
||||
1. The GCC cross-compiler
|
||||
2. A RISC-V simulator such as Spike, Sail, and/or QEMU
|
||||
3. Spike is easy to use but doesn’t support peripherals to boot Linux
|
||||
4. QEMU is faster and can boot Linux
|
||||
5. Sail is presently the official golden reference for RISC-V and is used by the riscof verification suite, but runs slowly and is painful to instal
|
||||
|
||||
This setup needs to be done once by the administrator
|
||||
|
||||
Note: The following directions assume you have an account called cad to install shared software and files. You can substitute a different user for cad if you prefer.
|
||||
|
||||
Note: Installing software in Linux is unreasonably touchy and varies with the flavor and version of your Linux distribution. Don’t be surprised if the installation directions have changed since the book was written or don’t work on your machine; you may need some ingenuity to adjust them. Browse the openhwgroup/core-v-wally repo and look at the README.md for the latest build instructions.
|
||||
|
||||
*** Create the $RISCV Directory
|
||||
|
||||
First, set up a directory for riscv software in some place such as /opt/riscv. We will call this shared directory $RISCV.
|
||||
|
||||
$ export RISCV=/opt/riscv
|
||||
$ sudo mkdir $RISCV
|
||||
$ sudo chown cad $RISCV
|
||||
$ sudo su cad (or root, if you don’t have a cad account)
|
||||
$ export RISCV=/opt/riscv
|
||||
$ chmod 755 $RISCV
|
||||
$ umask 0002
|
||||
$ cd $RISCV
|
||||
|
||||
*** Update Tools
|
||||
|
||||
Ubuntu users may need to install and update various tools.
|
||||
|
||||
$ sudo apt update
|
||||
$ sudo apt upgrade
|
||||
$ sudo apt install git gawk make texinfo bison flex build-essential python libz-dev libexpat-dev autoconf device-tree-compiler ninja-build libglib2.56-dev libpixman-1-dev build-essential ncurses-base ncurses-bin libncurses5-dev dialog
|
||||
|
||||
*** Install RISC-V GCC Cross-Compiler
|
||||
|
||||
To install GCC from source can take hours to compile. This configuration enables multilib to target many flavors of RISC-V. This book is tested with GCC 12.2 (tagged 2022.09.21), but will likely work with newer versions as well.
|
||||
|
||||
$ git clone https://github.com/riscv/riscv-gnu-toolchain
|
||||
$ cd riscv-gnu-toolchain
|
||||
$ git checkout 2022.09.21
|
||||
$ ./configure --prefix=$RISCV --enable-multilib --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;"
|
||||
$ make --jobs
|
||||
|
||||
Note: make --jobs will reduce compile time by compiling in parallel. However, adding this option could dramatically increase the memory utilization of your local machine.
|
||||
|
||||
*** Install elf2hex
|
||||
|
||||
We also need the elf2hex utility to convert executable files into hexadecimal files for Verilog simulation. Install with:
|
||||
|
||||
$ cd $RISCV
|
||||
$ export PATH=$RISCV/riscv-gnu-toolchain/bin:$PATH
|
||||
$ git clone https://github.com/sifive/elf2hex.git
|
||||
$ cd elf2hex
|
||||
$ autoreconf -i
|
||||
$ ./configure --target=riscv64-unknown-elf --prefix=$RISCV
|
||||
$ make
|
||||
$ make install
|
||||
|
||||
Note: The exe2hex utility that comes with Spike doesn’t work for our purposes because it doesn’t handle programs that start at 0x80000000. The SiFive version above is touchy to install. For example, if Python version 2.x is in your path, it won’t install correctly. Also, be sure riscv64-unknown-elf-objcopy shows up in your path in $RISCV/riscv-gnu-toolchain/bin at the time of compilation, or elf2hex won’t work properly.
|
||||
|
||||
*** Install RISC-V Spike Simulator
|
||||
|
||||
Spike also takes a while to install and compile, but this can be done concurrently with the GCC installation. After the build, we need to change two Makefiles to support atomic instructions .
|
||||
|
||||
$ cd $RISCV
|
||||
$ git clone https://github.com/riscv-software-src/riscv-isa-sim
|
||||
$ mkdir riscv-isa-sim/build
|
||||
$ cd riscv-isa-sim/build
|
||||
$ ../configure --prefix=$RISCV --enable-commitlog
|
||||
$ make --jobs
|
||||
$ make install
|
||||
$ cd ../arch_test_target/spike/device
|
||||
$ sed -i 's/--isa=rv32ic/--isa=rv32iac/' rv32i_m/privilege/Makefile.include
|
||||
$ sed -i 's/--isa=rv64ic/--isa=rv64iac/' rv64i_m/privilege/Makefile.include
|
||||
|
||||
*** Install Sail Simulator
|
||||
|
||||
Sail is the new golden reference model for RISC-V. Sail is written in OCaml, which is an object-oriented extension of ML, which in turn is a functional programming language suited to formal verification. OCaml is installed with the opam OCcaml package manager. Sail has so many dependencies that it can be difficult to install.
|
||||
|
||||
On Ubuntu, apt-get makes opam installation fairly simple.
|
||||
|
||||
$ sudo apt-get install opam build-essential libgmp-dev z3 pkg-config zlib1g-dev
|
||||
|
||||
If you are on RedHat/Rocky Linux 8, installation is much more difficult because packages are not available in the default package manager and some need to be built from source.
|
||||
|
||||
$ sudo bash -c "sh <(curl -fsSL https://raw.githubusercontent.com/ocaml/opam/master/shell/install.sh)"
|
||||
When prompted, put it in /usr/bin
|
||||
$ sudo yum groupinstall 'Development Tools'
|
||||
$ sudo yum -y install gmp-devel
|
||||
$ sudo yum -y install zlib-devel
|
||||
$ git clone https://github.com/Z3Prover/z3.git
|
||||
$ cd z3
|
||||
$ python scripts/mk_make.py
|
||||
$ cd build
|
||||
$ make
|
||||
$ sudo make install
|
||||
$ cd ../..
|
||||
$ sudo pip3 install chardet==3.0.4
|
||||
$ sudo pip3 install urllib3==1.22
|
||||
|
||||
Once you have installed the packages on either Ubuntu or RedHat, use opam to install the OCaml compiler and Sail. Run as the cad user because you will be installing Sail in $RISCV.
|
||||
|
||||
$ sudo su cad
|
||||
$ opam init -y --disable-sandboxing
|
||||
$ opam switch create ocaml-base-compiler.4.06.1
|
||||
$ opam install sail -y
|
||||
|
||||
Now you can clone and compile Sail-RISCV. This will take a while.
|
||||
|
||||
$ eval $(opam config env)
|
||||
$ cd $RISCV
|
||||
$ git clone https://github.com/riscv/sail-riscv.git
|
||||
$ cd sail-riscv
|
||||
$ make
|
||||
$ ARCH=RV32 make
|
||||
$ ARCH=RV64 make
|
||||
$ exit
|
||||
$ sudo su
|
||||
$ export RISCV=/opt/riscv
|
||||
$ ln -s $RISCV/sail-riscv/c_emulator/riscv_sim_RV64 /usr/bin/riscv_sim_RV64
|
||||
$ ln -s $RISCV/sail-riscv/c_emulator/riscv_sim_RV32 /usr/bin/riscv_sim_RV32
|
||||
$ exit
|
||||
|
||||
*** Install riscof
|
||||
|
||||
riscof is a Python library used as the RISC-V compatibility framework test an implementation such as Wally or Spike against the Sail golden reference. It will be used to compile the riscv-arch-test suite.
|
||||
|
||||
It is most convenient if the sysadmin installs riscof into the server’s Python libraries:
|
||||
|
||||
$ sudo pip3 install testresources
|
||||
$ sudo pip3 install riscof --ignore-installed PyYAML
|
||||
|
||||
However, riscof can also be installed and run locally by individual users.
|
||||
|
||||
*** Install Verilator
|
||||
|
||||
Verilator is a free Verilog simulator with a good Lint tool used to catch errors in the SystemVerilog code. It is needed to run regression.
|
||||
$ sudo apt install verilator
|
||||
|
||||
*** Install QEMU Simulator
|
||||
|
||||
QEMU is another simulator used when booting Linux in Chapter 17. You can optionally install it using the following commands.
|
||||
|
||||
<SIDEBAR>
|
||||
The QEMU patch changes the VirtIO driver to match the Wally peripherals, and also adds print statements to log the state of the CSRs (see Section 2.5XREF).
|
||||
</END>
|
||||
|
||||
$ cd $RISCV
|
||||
$ git clone --recurse-submodules https://github.com/qemu/qemu
|
||||
$ cd qemu
|
||||
$ git checkout v6.2.0 # last version tested; newer versions might be ok
|
||||
$ ./configure --target-list=riscv64-softmmu --prefix=$RISCV
|
||||
$ make --jobs
|
||||
$ make install
|
||||
|
||||
*** Cross-Compile Buildroot Linux
|
||||
|
||||
Building Linux is only necessary for exploring the boot process in Chapter 17. Building and generating a trace is a time-consuming operation that could be skipped for now; you can return to this section later if you are interested in the Linux details.
|
||||
|
||||
Buildroot depends on configuration files in riscv-wally, so the cad user must install Wally first according to the instructions in Section 2.2.2. However, don’t source ~/wally-riscv/setup.sh because it will set LD_LIBRARY_PATH in a way to cause make to fail on buildroot.
|
||||
|
||||
To configure and build Buildroot:
|
||||
|
||||
$ cd $RISCV
|
||||
$ export WALLY=~/riscv-wally # make sure you haven’t sourced ~/riscv-wally/setup.sh by now
|
||||
$ git clone https://github.com/buildroot/buildroot.git
|
||||
$ cd buildroot
|
||||
$ git checkout 2021.05 # last tested working version
|
||||
$ cp -r $WALLY/linux/buildroot-config-src/wally ./board
|
||||
$ cp ./board/wally/main.config .config
|
||||
$ make --jobs
|
||||
|
||||
To generate disassembly files and the device tree, run another make script. Note that you can expect some warnings about phandle references while running dtc on wally-virt.dtb.
|
||||
|
||||
$ source ~/riscv-wally/setup.sh
|
||||
$ cd $WALLY/linux/buildroot-scripts
|
||||
$ make all
|
||||
|
||||
Note: When the make tasks complete, you’ll find source code in $RISCV/buildroot/output/build and the executables in $RISCV/buildroot/output/images.
|
||||
|
||||
*** Download Synthesis Libraries
|
||||
|
||||
For logic synthesis, we need a synthesis tool (see Section 3.XREF) and a cell library. Clone the OSU 12-track cell library for the Skywater 130 nm process:
|
||||
|
||||
$ cd $RISCV
|
||||
$ mkdir cad
|
||||
$ mkdir cad/lib
|
||||
$ cd cad/lib
|
||||
$ git clone https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12
|
||||
|
||||
** Installing EDA Tools
|
||||
|
||||
Electronic Design Automation (EDA) tools are vital to implementations of System on Chip architectures as well as validating different designs. Open-source and commercial tools exist for multiple strategies and although the one can spend a lifetime using combinations of different tools, only a small subset of tools is utilized for this text. The tools are chosen because of their ease in access as well as their repeatability for accomplishing many of the tasks utilized to design Wally. It is anticipated that additional tools may be documented later after this is text is published to improve use and access.
|
||||
|
||||
Siemens Quest is the primary tool utilized for simulating and validating Wally. For logic synthesis, you will need Synopsys Design Compiler. Questa and Design Compiler are commercial tools that require an educational or commercial license.
|
||||
|
||||
Note: Some EDA tools utilize LM_LICENSE_FILE for their environmental variable to point to their license server. Some operating systems may also utilize MGLS_LICENSE_FILE instead, therefore, it is important to read the user manual on the preferred environmental variable required to point to a user’s license file. Although there are different mechanisms to allow licenses to work, many companies commonly utilize the FlexLM (i.e., Flex-enabled) license server manager that runs off a node locked license.
|
||||
|
||||
Although most EDA tools are Linux-friendly, they tend to have issues when not installed on recommended OS flavors. Both Red Hat Enterprise Linux and SUSE Linux products typically tend to be recommended for installing commercial-based EDA tools and are recommended for utilizing complex simulation and architecture exploration. Questa can also be installed on Microsoft Windows as well as Mac OS with a Virtual Machine such as Parallels.
|
||||
|
||||
Siemens Questa
|
||||
|
||||
Siemens Questa simulates behavioral, RTL and gate-level HDL. To install Siemens Questa first go to a web browser and navigate to
|
||||
https://eda.sw.siemens.com/en-US/ic/questa/simulation/advanced-simulator/. Click Sign In and log in with your credentials and the product can easily be downloaded and installed. Some Windows-based installations also require gcc libraries that are typically provided as a compressed zip download through Siemens.
|
||||
|
||||
Synopsys Design Compiler (DC)
|
||||
|
||||
Many commercial synthesis and place and route tools require a common installer. These installers are provided by the EDA vendor and Synopsys has one called Synopsys Installer. To use Synopsys Installer, you will need to acquire a license through Synopsys that is typically Called Synopsys Common Licensing (SCL). Both the Synopsys Installer, license key file, and Design Compiler can all be downloaded through Synopsys Solvnet. First open a web browser, log into Synsopsy Solvnet, and download the installer and Design Compiler installation files. Then, install the Installer
|
||||
|
||||
$ firefox &
|
||||
Navigate to https://solvnet.synopsys.com
|
||||
Log in with your institution’s username and password
|
||||
Click on Downloads, then scroll down to Synopsys Installer
|
||||
Select the latest version (currently 5.4). Click Download Here, agree,
|
||||
Click on SynopsysInstaller_v5.4.run
|
||||
Return to downloads and also get Design Compiler (synthesis) latest version, and any others you want.
|
||||
Click on all parts and the .spf file, then click Download Files near the top
|
||||
move the SynopsysIntaller into /cad/synopsys/Installer_5.4 with 755 permission for cad,
|
||||
move other files into /cad/synopsys/downloads and work as user cad from here on
|
||||
$ cd /cad/synopsys/installer_5.4
|
||||
$ ./SynopsysInstaller_v5.4.run
|
||||
Accept default installation directory
|
||||
$ ./installer
|
||||
Enter source path as /cad/synopsys/downloads, and installation path as /cad/synopsys
|
||||
When prompted, enter your site ID
|
||||
Follow prompts
|
||||
|
||||
Installer can be utilized in graphical or text-based modes. It is far easier to use the text-based installation tool. To install DC, navigate to the location where your downloaded DC files are and type installer. You should be prompted with questions related to where you wish to have your files installed.
|
||||
|
||||
The Synopsys Installer automatically installs all downloaded product files into a single top-level target directory. You do not need to specify the installation directory for each product. For example, if you specify /import/programs/synopsys as the target directory, your installation directory structure might look like this after installation:
|
||||
|
||||
/import/programs/synopsys/syn/S-2021.06-SP1
|
||||
|
||||
Note: Although most parts of Wally, including the software used in this chapter and Questa simulation, will work on most modern Linux platforms, as of 2022, the Synopsys CAD tools for SoC design are only supported on RedHat Enterprise Linux 7.4 or 8 or SUSE Linux Enterprise Server (SLES) 12 or 15. Moreover, the RISC-V formal specification (sail-riscv) does not build gracefully on RHEL7.
|
||||
|
||||
The Verilog simulation has been tested with Siemens Questa/ModelSim. This package is available to universities worldwide as part of the Design Verification Bundle through the Siemens Academic Partner Program members for $990/year.
|
||||
|
||||
If you want to implement your own version of the chip, your tool and license complexity rises significantly. Logic synthesis uses Synopsys Design Compiler. Placement and routing uses Cadence Innovus. Both Synopsys and Cadence offer their tools at a steep discount to their university program members, but the cost is still several thousand dollars per year. Most research universities with integrated circuit design programs have Siemens, Synopsys, and Cadence licenses. You also need a process design kit (PDK) for a specific integrated circuit technology and its libraries. The open-source Google Skywater 130 nm PDK is sufficient to synthesize the core but lacks memories. Google presently funds some fabrication runs for universities. IMEC and Muse Semiconductor offers full access to multiproject wafer fabrication on the TSMC 28 nm process including logic, I/O, and memory libraries; this involves three non-disclosure agreements. Fabrication costs on the order of $10,000 for a batch of 1 mm2 chips.
|
||||
|
||||
Startups can expect to spend more than $1 million on CAD tools to get a chip to market. Commercial CAD tools are not realistically available to individuals without a university or company connection.
|
||||
|
||||
* Core-v-wally Repo Installation
|
||||
** TL;DR Repo Install
|
||||
cd
|
||||
git clone --recurse-submodules https://github.com/davidharrishmc/riscv-wally
|
||||
cd riscv-wally
|
||||
source ./setup.sh # may require some modification for your system. Always run once after opening a new terminal.
|
||||
|
||||
** Detailed Repo Install Guide
|
||||
|
||||
1. cd
|
||||
Return to home directory. The home directory is sufficent a location for students.
|
||||
However more advanced users may choose to clone wally into another directory.
|
||||
|
||||
2. git clone --recurse-submodules https://github.com/davidharrishmc/riscv-wally
|
||||
Clone the wally repository and all dependent submodules into subdirectory riscv-wally.
|
||||
|
||||
3. cd riscv-wally
|
||||
Change directory to the wally repos riscv-wally.
|
||||
|
||||
4. source ./setup.sh
|
||||
setup.sh is s configuration script which creates several environment variables.
|
||||
WALLY: Absolute directory path to this repo clone.
|
||||
MGLS_LICENSE_FILE: Siemens license server for questa sim (modelsim). If your computer
|
||||
is already configured for questa remove variable.
|
||||
SNPSLMD_LICENSE_FILE: Synopsys license server. If remove if already setup.
|
||||
PATH: PATH is extended to include the installation directories for Siemens questa and
|
||||
Synopsys design compiler. Remove if already setup.
|
||||
Adds riscv-gnu-toolchain and spike to PATH. Adjust if installed in another location.
|
||||
Or remove if already in the PATH variable.
|
||||
Adds path to wally repo specific tools. (Must include.)
|
||||
Adds path to verilator. Remove if already in path.
|
||||
RISCV: This is the location of the riscv tool chain and other wally requirements.
|
||||
See the Sys Admin section for details.
|
||||
|
||||
If using ubuntu 22.04 setup.sh can be reduced to
|
||||
|
||||
echo "Executing Wally setup.sh"
|
||||
|
||||
# Path to Wally repository
|
||||
#!/bin/bash
|
||||
|
||||
WALLY=$(dirname ${BASH_SOURCE[0]:-$0})
|
||||
export WALLY=$(cd "$WALLY" && pwd)
|
||||
echo \$WALLY set to ${WALLY}
|
||||
|
||||
# Path to RISC-V Tools
|
||||
export RISCV=/opt/riscv # change this if you installed the tools in a different location
|
||||
|
||||
# utility functions in Wally repository
|
||||
export PATH=$PATH:$RISCV/bin
|
||||
export PATH=$WALLY/bin:$PATH
|
||||
|
||||
* Build and Run Regression Tests
|
||||
Ensure the system tools are installed.
|
||||
|
||||
cd <to location of repo clone>
|
||||
make
|
||||
cd sim
|
||||
./regression-wally #(depends on having Questa installed)
|
||||
|
274
README.md
274
README.md
|
@ -11,15 +11,15 @@ Wally is described in an upcoming textbook, *RISC-V System-on-Chip Design*, by H
|
|||
|
||||
New users may wish to do the following setup to access the server via a GUI and use a text editor.
|
||||
|
||||
Git started with Git configuration and authentication: B.1 (replace with your name and email)
|
||||
$ git config --global user.name "Ben Bitdiddle"
|
||||
$ git config --global user.email "ben_bitdiddle@wally.edu"
|
||||
$ git config --global pull.rebase false
|
||||
Optional: Download and install x2go - A.1.1
|
||||
Optional: Download and install VSCode - A.4.2
|
||||
Optional: Make sure you can log into your server via x2go and via a terminal
|
||||
Terminal on Mac, cmd on Windows, xterm on Linux
|
||||
See A.1 about ssh -Y login from a terminal
|
||||
Git started with Git configuration and authentication: B.1
|
||||
$ git config --global user.name ″Ben Bitdiddle″
|
||||
$ git config --global user.email ″ben_bitdiddle@wally.edu″
|
||||
$ git config --global pull.rebase false
|
||||
|
||||
Then clone the repo, source setup, make the tests and run regression
|
||||
|
||||
|
@ -30,20 +30,20 @@ Then clone the repo, source setup, make the tests and run regression
|
|||
|
||||
On the Linux computer where you will be working, log in
|
||||
|
||||
Clone your fork of the repo and run the setup script.
|
||||
|
||||
$ cd
|
||||
$ git clone --recurse-submodules https://github.com/<yourgithubid>/cvw
|
||||
$ git remote add upstream https://github.com/openhwgroup/cvw
|
||||
$ cd cvw
|
||||
$ source ./setup.sh
|
||||
|
||||
Add the following lines to your .bashrc or .bash_profile to run the setup script each time you log in.
|
||||
|
||||
if [ -f ~/cvw/setup.sh ]; then
|
||||
source ~/cvw/setup.sh
|
||||
fi
|
||||
|
||||
Clone your fork of the repo, run the setup script, and build the tests:
|
||||
|
||||
$ cd
|
||||
$ git clone --recurse-submodules https://github.com/<yourgithubid>/cvw
|
||||
$ cd cvw
|
||||
$ source ./setup.sh
|
||||
$ make
|
||||
|
||||
Edit setup.sh and change the following lines to point to the path and license server for your Siemens Questa and Synopsys Design Compiler installation and license server. If you only have Questa, you can still simulate but cannot run logic synthesis.
|
||||
|
||||
export MGLS_LICENSE_FILE=1717@solidworks.eng.hmc.edu # Change this to your Siemens license server
|
||||
|
@ -51,253 +51,27 @@ Edit setup.sh and change the following lines to point to the path and license se
|
|||
export QUESTAPATH=/cad/mentor/questa_sim-2021.2_1/questasim/bin # Change this for your path to Questa
|
||||
export SNPSPATH=/cad/synopsys/SYN/bin # Change this for your path to Design Compiler
|
||||
|
||||
Run a regression simulation with Questa to prove everything is installed.
|
||||
If the tools are not yet installed on your server, follow the Toolchain Installation instructions in the section below.
|
||||
|
||||
Build the tests and run a regression simulation with Questa to prove everything is installed. Building tests will take a while.
|
||||
|
||||
$ make
|
||||
$ cd sim
|
||||
$ ./regression-wally (depends on having Questa installed)
|
||||
|
||||
# Toolchain Installation (Sys Admin)
|
||||
|
||||
This section describes the open source toolchain installation. These steps should only be done once by the system admin.
|
||||
This section describes the open source toolchain installation. The
|
||||
current version of the toolchain has been tested on Ubuntu and Red
|
||||
Hat/Rocky 8 Linux. Ubuntu works more smoothly and is recommended
|
||||
unless you have a compelling need for RedHat.
|
||||
|
||||
## TL;DR Open Source Tool-chain Installation
|
||||
|
||||
The full instalation details are involved can be be skipped using the following script, wally-tool-chain-install.sh.
|
||||
The script installs the open source tools to /opt/riscv by default. This can be changed by supply the path as the first argument. This script does not install buildroot (see the Detailed Tool-chain Install Guide in the following section) and does not install commercial EDA tools; Siemens Questa, Synopsys Design Compiler, or Cadence Innovus (see section Installing IDA Tools). It must be run as root or with sudo. This script is tested for Ubuntu, 20.04 and 22.04. Fedora and Red Hat can be installed in the Detailed Tool-chain Install Guide.
|
||||
|
||||
$ sudo wally-tool-chain-install.sh <optional, install directory, defaults to /opt/riscv>
|
||||
|
||||
## Detailed Toolchain Install Guide
|
||||
|
||||
This section describes how to install the tools needed for CORE-V-Wally. Superuser privileges are necessary for many of the tools. Setting up all of the tools can be time-consuming and fussy, so Appendix D also describes an option with a Docker container.
|
||||
|
||||
### Open Source Software Installation
|
||||
|
||||
Compiling, assembling, and simulating RISC-V programs requires downloading and installing the following free tools:
|
||||
|
||||
1. The GCC cross-compiler
|
||||
2. A RISC-V simulator such as Spike, Sail, and/or QEMU
|
||||
3. Spike is easy to use but doesn’t support peripherals to boot Linux
|
||||
4. QEMU is faster and can boot Linux
|
||||
5. Sail is presently the official golden reference for RISC-V and is used by the riscof verification suite, but runs slowly and is painful to instal
|
||||
|
||||
This setup needs to be done once by the administrator
|
||||
|
||||
Note: The following directions assume you have an account called cad to install shared software and files. You can substitute a different user for cad if you prefer.
|
||||
|
||||
Note: Installing software in Linux is unreasonably touchy and varies with the flavor and version of your Linux distribution. Don’t be surprised if the installation directions have changed since the book was written or don’t work on your machine; you may need some ingenuity to adjust them. Browse the openhwgroup/core-v-wally repo and look at the README.md for the latest build instructions.
|
||||
|
||||
### Create the $RISCV Directory
|
||||
|
||||
First, set up a directory for riscv software in some place such as /opt/riscv. We will call this shared directory $RISCV.
|
||||
|
||||
$ export RISCV=/opt/riscv
|
||||
$ sudo mkdir $RISCV
|
||||
$ sudo chown cad $RISCV
|
||||
$ sudo su cad (or root, if you don’t have a cad account)
|
||||
$ export RISCV=/opt/riscv
|
||||
$ chmod 755 $RISCV
|
||||
$ umask 0002
|
||||
$ cd $RISCV
|
||||
|
||||
### Update Tools
|
||||
|
||||
Ubuntu users may need to install and update various tools. Beware when cutting and pasting that some lines are long!
|
||||
|
||||
$ sudo apt update
|
||||
$ sudo apt upgrade
|
||||
$ sudo apt install git gawk make texinfo bison flex build-essential python3 zlib1g-dev libexpat-dev autoconf device-tree-compiler ninja-build libglib2.0-dev libpixman-1-dev build-essential ncurses-base ncurses-bin libncurses5-dev dialog
|
||||
|
||||
### Install RISC-V GCC Cross-Compiler
|
||||
|
||||
To install GCC from source can take hours to compile. This configuration enables multilib to target many flavors of RISC-V. This book is tested with GCC 12.2 (tagged 2023.01.31), but will likely work with newer versions as well.
|
||||
|
||||
$ git clone https://github.com/riscv/riscv-gnu-toolchain
|
||||
$ cd riscv-gnu-toolchain
|
||||
$ git checkout 2023.01.31
|
||||
$ ./configure --prefix=$RISCV --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;"
|
||||
$ make --jobs
|
||||
|
||||
Note: make --jobs will reduce compile time by compiling in parallel. However, adding this option could dramatically increase the memory utilization of your local machine.
|
||||
|
||||
### Install elf2hex
|
||||
|
||||
We also need the elf2hex utility to convert executable files into hexadecimal files for Verilog simulation. Install with:
|
||||
|
||||
$ cd $RISCV
|
||||
$ export PATH=$RISCV/bin:$PATH
|
||||
$ git clone https://github.com/sifive/elf2hex.git
|
||||
$ cd elf2hex
|
||||
$ autoreconf -i
|
||||
$ ./configure --target=riscv64-unknown-elf --prefix=$RISCV
|
||||
$ make
|
||||
$ make install
|
||||
|
||||
Note: The exe2hex utility that comes with Spike doesn’t work for our purposes because it doesn’t handle programs that start at 0x80000000. The SiFive version above is touchy to install. For example, if Python version 2.x is in your path, it won’t install correctly. Also, be sure riscv64-unknown-elf-objcopy shows up in your path in $RISCV/riscv-gnu-toolchain/bin at the time of compilation, or elf2hex won’t work properly.
|
||||
|
||||
### Install RISC-V Spike Simulator
|
||||
|
||||
Spike also takes a while to install and compile, but this can be done concurrently with the GCC installation. After the build, we need to change two Makefiles to support atomic instructions .
|
||||
|
||||
$ cd $RISCV
|
||||
$ git clone https://github.com/riscv-software-src/riscv-isa-sim
|
||||
$ mkdir riscv-isa-sim/build
|
||||
$ cd riscv-isa-sim/build
|
||||
$ ../configure --prefix=$RISCV
|
||||
$ make --jobs
|
||||
$ make install
|
||||
$ cd ../arch_test_target/spike/device
|
||||
$ sed -i 's/--isa=rv32ic/--isa=rv32iac/' rv32i_m/privilege/Makefile.include
|
||||
$ sed -i 's/--isa=rv64ic/--isa=rv64iac/' rv64i_m/privilege/Makefile.include
|
||||
|
||||
### Install Sail Simulator
|
||||
|
||||
Sail is the new golden reference model for RISC-V. Sail is written in OCaml, which is an object-oriented extension of ML, which in turn is a functional programming language suited to formal verification. OCaml is installed with the opam OCcaml package manager. Sail has so many dependencies that it can be difficult to install.
|
||||
|
||||
On Ubuntu, apt-get makes opam installation fairly simple.
|
||||
|
||||
$ sudo apt-get install opam build-essential libgmp-dev z3 pkg-config zlib1g-dev
|
||||
|
||||
If you are on RedHat/Rocky Linux 8, installation is much more difficult because packages are not available in the default package manager and some need to be built from source.
|
||||
|
||||
$ sudo bash -c "sh <(curl -fsSL https://raw.githubusercontent.com/ocaml/opam/master/shell/install.sh)"
|
||||
When prompted, put it in /usr/bin
|
||||
$ sudo yum groupinstall 'Development Tools'
|
||||
$ sudo yum -y install gmp-devel
|
||||
$ sudo yum -y install zlib-devel
|
||||
$ git clone https://github.com/Z3Prover/z3.git
|
||||
$ cd z3
|
||||
$ python scripts/mk_make.py
|
||||
$ cd build
|
||||
$ make
|
||||
$ sudo make install
|
||||
$ cd ../..
|
||||
$ sudo pip3 install chardet==3.0.4
|
||||
$ sudo pip3 install urllib3==1.22
|
||||
|
||||
Once you have installed the packages on either Ubuntu or RedHat, use opam to install the OCaml compiler and Sail. Run as the cad user because you will be installing Sail in $RISCV.
|
||||
|
||||
$ sudo su cad
|
||||
$ opam init -y --disable-sandboxing
|
||||
$ opam switch create ocaml-base-compiler.4.06.1
|
||||
$ opam install sail -y
|
||||
|
||||
Now you can clone and compile Sail-RISCV. This will take a while.
|
||||
|
||||
$ eval $(opam config env)
|
||||
$ cd $RISCV
|
||||
$ git clone https://github.com/riscv/sail-riscv.git
|
||||
$ cd sail-riscv
|
||||
$ make
|
||||
$ ARCH=RV32 make
|
||||
$ ARCH=RV64 make
|
||||
$ exit
|
||||
$ sudo su
|
||||
$ export RISCV=/opt/riscv
|
||||
$ ln -sf $RISCV/sail-riscv/c_emulator/riscv_sim_RV64 /usr/bin/riscv_sim_RV64
|
||||
$ ln -sf $RISCV/sail-riscv/c_emulator/riscv_sim_RV32 /usr/bin/riscv_sim_RV32
|
||||
$ exit
|
||||
|
||||
### Install riscof
|
||||
|
||||
riscof is a Python library used as the RISC-V compatibility framework test an implementation such as Wally or Spike against the Sail golden reference. It will be used to compile the riscv-arch-test suite.
|
||||
|
||||
It is most convenient if the sysadmin installs riscof into the server’s Python libraries:
|
||||
|
||||
$ sudo pip3 install testresources
|
||||
$ sudo pip3 install riscof --ignore-installed PyYAML
|
||||
|
||||
However, riscof can also be installed and run locally by individual users.
|
||||
|
||||
### Other Python libraries
|
||||
|
||||
While a sysadmin is installing Python libraries, it's worth doing some more that will be needed by visualization scripts.
|
||||
|
||||
$ sudo pip3 install matplotlib scipy sklearn adjustText lief
|
||||
|
||||
### Install Verilator
|
||||
|
||||
Verilator is a free Verilog simulator with a good Lint tool used to catch errors in the SystemVerilog code. It is needed to run regression.
|
||||
$ sudo apt install verilator
|
||||
|
||||
### Install QEMU Simulator
|
||||
|
||||
QEMU is another simulator used when booting Linux in Chapter 17. You can optionally install it using the following commands.
|
||||
|
||||
<SIDEBAR>
|
||||
The QEMU patch changes the VirtIO driver to match the Wally peripherals, and also adds print statements to log the state of the CSRs (see Section 2.5XREF).
|
||||
</END>
|
||||
|
||||
$ cd $RISCV
|
||||
$ git clone --recurse-submodules https://github.com/qemu/qemu
|
||||
$ cd qemu
|
||||
$ git checkout v6.2.0 # last version tested; newer versions might be ok
|
||||
$ ./configure --target-list=riscv64-softmmu --prefix=$RISCV
|
||||
$ make --jobs
|
||||
$ make install
|
||||
|
||||
### Cross-Compile Buildroot Linux
|
||||
|
||||
Building Linux is only necessary for exploring the boot process in Chapter 17. Building and generating a trace is a time-consuming operation that could be skipped for now; you can return to this section later if you are interested in the Linux details.
|
||||
|
||||
Buildroot depends on configuration files in riscv-wally, so the cad user must install Wally first according to the instructions in Section 2.2.2. However, don’t source ~/wally-riscv/setup.sh because it will set LD_LIBRARY_PATH in a way to cause make to fail on buildroot.
|
||||
|
||||
To configure and build Buildroot:
|
||||
|
||||
$ cd $RISCV
|
||||
$ export WALLY=~/riscv-wally # make sure you haven’t sourced ~/riscv-wally/setup.sh by now
|
||||
$ git clone https://github.com/buildroot/buildroot.git
|
||||
$ cd buildroot
|
||||
$ git checkout 2021.05 # last tested working version
|
||||
$ cp -r $WALLY/linux/buildroot-config-src/wally ./board
|
||||
$ cp ./board/wally/main.config .config
|
||||
$ make --jobs
|
||||
|
||||
To generate disassembly files and the device tree, run another make script. Note that you can expect some warnings about phandle references while running dtc on wally-virt.dtb.
|
||||
|
||||
$ source ~/riscv-wally/setup.sh
|
||||
$ cd $WALLY/linux/buildroot-scripts
|
||||
$ make all
|
||||
|
||||
Note: When the make tasks complete, you’ll find source code in $RISCV/buildroot/output/build and the executables in $RISCV/buildroot/output/images.
|
||||
|
||||
### Generate load images for linux boot
|
||||
|
||||
The Questa linux boot uses preloaded bootram and ram memory. We use QEMU to generate these preloaded memory files. Files output in $RISCV/linux-testvectors
|
||||
|
||||
cd cvw/linux/testvector-generation
|
||||
./genInitMem.sh
|
||||
|
||||
This may require changing file permissions to the linux-testvectors directory.
|
||||
|
||||
### Generate QEMU linux trace
|
||||
|
||||
The linux testbench can instruction by instruction compare Wally's committed instructions against QEMU. To do this QEMU outputs a log file consisting of all instructions executed. Interrupts are handled by forcing the testbench to generate an interrupt at the same cycle as in QEMU. Generating this trace will take more than 24 hours.
|
||||
|
||||
cd cvw/linux/testvector-generation
|
||||
./genTrace.sh
|
||||
|
||||
### Download Synthesis Libraries
|
||||
|
||||
For logic synthesis, we need a synthesis tool (see Section 3.XREF) and a cell library. Clone the OSU 12-track cell library for the Skywater 130 nm process:
|
||||
|
||||
$ cd $RISCV
|
||||
$ mkdir cad
|
||||
$ mkdir cad/lib
|
||||
$ cd cad/lib
|
||||
$ git clone https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12
|
||||
|
||||
### Install github cli
|
||||
|
||||
The github cli allows users to directly issue pull requests from their fork back to openhwgroup/cvw using the command line.
|
||||
|
||||
$ type -p curl >/dev/null || sudo apt install curl -y
|
||||
$ curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \ && sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
|
||||
&& sudo apt update \
|
||||
&& sudo apt install gh -y
|
||||
Ubuntu users can install the tools by running
|
||||
|
||||
$ sudo $WALLY/bin/wally-tool-chain-install.sh
|
||||
|
||||
See wally-tool-chain-install.sh for a detailed description of each component,
|
||||
or to issue the commands one at a time to install on the command line.
|
||||
## Installing EDA Tools
|
||||
|
||||
Electronic Design Automation (EDA) tools are vital to implementations of System on Chip architectures as well as validating different designs. Open-source and commercial tools exist for multiple strategies and although the one can spend a lifetime using combinations of different tools, only a small subset of tools is utilized for this text. The tools are chosen because of their ease in access as well as their repeatability for accomplishing many of the tasks utilized to design Wally. It is anticipated that additional tools may be documented later after this is text is published to improve use and access.
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
## Written: Ross Thompson ross1728@gmail.com
|
||||
## Created: 18 January 2023
|
||||
## Modified: 22 January 2023
|
||||
## Modified: 23 March 2023
|
||||
##
|
||||
## Purpose: Open source tool chain installation script
|
||||
##
|
||||
|
@ -26,22 +27,27 @@
|
|||
## and limitations under the License.
|
||||
################################################################################################
|
||||
|
||||
# Use /opt/riscv for installation - may require running script with sudo
|
||||
export RISCV="${1:-/opt/riscv}"
|
||||
export PATH=$PATH:$RISCV/bin
|
||||
|
||||
set -e # break on error
|
||||
|
||||
NUM_THREADS=1 # for low memory machines > 16GiB
|
||||
#NUM_THREADS=8 # for >= 32GiB
|
||||
# Modify accordingly for your machine
|
||||
# Increasing NUM_THREADS will speed up parallel compilation of the tools
|
||||
#NUM_THREADS=2 # for low memory machines > 16GiB
|
||||
NUM_THREADS=8 # for >= 32GiB
|
||||
#NUM_THREADS=16 # for >= 64GiB
|
||||
|
||||
sudo mkdir -p $RISCV
|
||||
|
||||
# UPDATE / UPGRADE
|
||||
apt update
|
||||
# Update and Upgrade tools (see https://itsfoss.com/apt-update-vs-upgrade/)
|
||||
apt update -y
|
||||
apt upgrade -y
|
||||
apt install -y git gawk make texinfo bison flex build-essential python3 libz-dev libexpat-dev autoconf device-tree-compiler ninja-build libpixman-1-dev ncurses-base ncurses-bin libncurses5-dev dialog curl wget ftp libgmp-dev libglib2.0-dev python3-pip pkg-config opam z3 zlib1g-dev verilator
|
||||
|
||||
# INSTALL
|
||||
apt install -y git gawk make texinfo bison flex build-essential python3 libz-dev libexpat-dev autoconf device-tree-compiler ninja-build libpixman-1-dev build-essential ncurses-base ncurses-bin libncurses5-dev dialog curl wget ftp libgmp-dev
|
||||
# Other python libraries used through the book.
|
||||
pip3 install matplotlib scipy scikit-learn adjustText lief
|
||||
|
||||
# needed for Ubuntu 22.04, gcc cross compiler expects python not python2 or python3.
|
||||
if ! command -v python &> /dev/null
|
||||
|
@ -50,7 +56,15 @@ then
|
|||
ln -sf /usr/bin/python3 /usr/bin/python
|
||||
fi
|
||||
|
||||
# gcc cross-compiler
|
||||
# gcc cross-compiler (https://github.com/riscv-collab/riscv-gnu-toolchain)
|
||||
# To install GCC from source can take hours to compile.
|
||||
#This configuration enables multilib to target many flavors of RISC-V.
|
||||
# This book is tested with GCC 12.2 (tagged 2023.01.31), but will likely work with newer versions as well.
|
||||
# Note that GCC12.2 has binutils 2.39, which has a known performance bug that causes
|
||||
# objdump to run 100x slower than in previous versions, causing riscof to make versy slowly.
|
||||
# However GCC12.x is needed for bit manipulation instructions. There is an open issue to fix this:
|
||||
# https://github.com/riscv-collab/riscv-gnu-toolchain/issues/1188
|
||||
|
||||
cd $RISCV
|
||||
git clone https://github.com/riscv/riscv-gnu-toolchain
|
||||
cd riscv-gnu-toolchain
|
||||
|
@ -59,9 +73,14 @@ git checkout 2023.01.31
|
|||
make -j ${NUM_THREADS}
|
||||
make install
|
||||
|
||||
# elf2hex
|
||||
# elf2hex (https://github.com/sifive/elf2hex)
|
||||
#The elf2hex utility to converts executable files into hexadecimal files for Verilog simulation.
|
||||
# Note: The exe2hex utility that comes with Spike doesn’t work for our purposes because it doesn’t
|
||||
# handle programs that start at 0x80000000. The SiFive version above is touchy to install.
|
||||
# For example, if Python version 2.x is in your path, it won’t install correctly.
|
||||
# Also, be sure riscv64-unknown-elf-objcopy shows up in your path in $RISCV/riscv-gnu-toolchain/bin
|
||||
# at the time of compilation, or elf2hex won’t work properly.
|
||||
cd $RISCV
|
||||
#export PATH=$RISCV/riscv-gnu-toolchain/bin:$PATH
|
||||
export PATH=$RISCV/bin:$PATH
|
||||
git clone https://github.com/sifive/elf2hex.git
|
||||
cd elf2hex
|
||||
|
@ -70,13 +89,8 @@ autoreconf -i
|
|||
make
|
||||
make install
|
||||
|
||||
# Update Python3.6 for QEMU
|
||||
apt-get -y update
|
||||
apt-get -y install python3-pip
|
||||
apt-get -y install pkg-config
|
||||
apt-get -y install libglib2.0-dev
|
||||
|
||||
# QEMU
|
||||
# QEMU (https://www.qemu.org/docs/master/system/target-riscv.html)
|
||||
cd $RISCV
|
||||
git clone --recurse-submodules https://github.com/qemu/qemu
|
||||
cd qemu
|
||||
|
@ -84,7 +98,9 @@ cd qemu
|
|||
make -j ${NUM_THREADS}
|
||||
make install
|
||||
|
||||
# Spike
|
||||
# Spike (https://github.com/riscv-software-src/riscv-isa-sim)
|
||||
# Spike also takes a while to install and compile, but this can be done concurrently
|
||||
#with the GCC installation. After the build, we need to change two Makefiles to support atomic instructions.
|
||||
cd $RISCV
|
||||
git clone https://github.com/riscv-software-src/riscv-isa-sim
|
||||
mkdir -p riscv-isa-sim/build
|
||||
|
@ -96,18 +112,25 @@ cd ../arch_test_target/spike/device
|
|||
sed -i 's/--isa=rv32ic/--isa=rv32iac/' rv32i_m/privilege/Makefile.include
|
||||
sed -i 's/--isa=rv64ic/--isa=rv64iac/' rv64i_m/privilege/Makefile.include
|
||||
|
||||
# SAIL
|
||||
cd $RISCV
|
||||
apt-get install -y opam build-essential libgmp-dev z3 pkg-config zlib1g-dev
|
||||
git clone https://github.com/Z3Prover/z3.git
|
||||
cd z3
|
||||
python scripts/mk_make.py
|
||||
cd build
|
||||
make -j ${NUM_THREADS}
|
||||
make install
|
||||
cd ../..
|
||||
pip3 install chardet==3.0.4
|
||||
pip3 install urllib3==1.22
|
||||
# Sail (https://github.com/riscv/sail-riscv)
|
||||
# Sail is the new golden reference model for RISC-V. Sail is written in OCaml, which
|
||||
# is an object-oriented extension of ML, which in turn is a functional programming
|
||||
# language suited to formal verification. OCaml is installed with the opam OCcaml
|
||||
# package manager. Sail has so many dependencies that it can be difficult to install.
|
||||
# This script works for Ubuntu.
|
||||
|
||||
# Do these commands only for RedHat / Rocky 8 to build from source.
|
||||
#cd $RISCV
|
||||
#git clone https://github.com/Z3Prover/z3.git
|
||||
#cd z3
|
||||
#python scripts/mk_make.py
|
||||
#cd build
|
||||
#make -j ${NUM_THREADS}
|
||||
#make install
|
||||
#cd ../..
|
||||
#pip3 install chardet==3.0.4
|
||||
#pip3 install urllib3==1.22
|
||||
|
||||
opam init -y --disable-sandboxing
|
||||
opam switch create ocaml-base-compiler.4.06.1
|
||||
opam install sail -y
|
||||
|
@ -127,13 +150,3 @@ ln -sf $RISCV/sail-riscv/c_emulator/riscv_sim_RV32 /usr/bin/riscv_sim_RV32
|
|||
pip3 install testresources
|
||||
pip3 install riscof --ignore-installed PyYAML
|
||||
|
||||
# Verilator
|
||||
apt install -y verilator
|
||||
|
||||
# install github cli (gh)
|
||||
type -p curl >/dev/null || sudo apt install curl -y
|
||||
curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
|
||||
&& sudo apt update \
|
||||
&& sudo apt install gh -y
|
||||
|
|
|
@ -145,10 +145,10 @@
|
|||
`define DIVCOPIES 32'h4
|
||||
|
||||
// bit manipulation
|
||||
`define ZBA_SUPPORTED 0
|
||||
`define ZBB_SUPPORTED 0
|
||||
`define ZBC_SUPPORTED 0
|
||||
`define ZBS_SUPPORTED 0
|
||||
`define ZBA_SUPPORTED 1
|
||||
`define ZBB_SUPPORTED 1
|
||||
`define ZBC_SUPPORTED 1
|
||||
`define ZBS_SUPPORTED 1
|
||||
|
||||
// Memory synthesis configuration
|
||||
`define USE_SRAM 0
|
||||
|
|
|
@ -148,10 +148,10 @@
|
|||
`define DIVCOPIES 32'h4
|
||||
|
||||
// bit manipulation
|
||||
`define ZBA_SUPPORTED 0
|
||||
`define ZBB_SUPPORTED 0
|
||||
`define ZBC_SUPPORTED 0
|
||||
`define ZBS_SUPPORTED 0
|
||||
`define ZBA_SUPPORTED 1
|
||||
`define ZBB_SUPPORTED 1
|
||||
`define ZBC_SUPPORTED 1
|
||||
`define ZBS_SUPPORTED 1
|
||||
|
||||
// Memory synthesis configuration
|
||||
`define USE_SRAM 0
|
||||
|
|
33
sim/Makefile
33
sim/Makefile
|
@ -1,3 +1,20 @@
|
|||
|
||||
all: riscoftests memfiles
|
||||
# *** Build old tests/imperas-riscv-tests for now;
|
||||
# Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test
|
||||
# DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired
|
||||
#make -C ../tests/imperas-riscv-tests --jobs
|
||||
#make -C ../tests/imperas-riscv-tests XLEN=64 --jobs
|
||||
# Only compile Imperas tests if they are installed locally.
|
||||
# They are usually a symlink to $RISCV/imperas-riscv-tests and only
|
||||
# get compiled there manually during installation
|
||||
#make -C ../addins/imperas-riscv-tests
|
||||
#make -C ../addins/imperas-riscv-tests XLEN=64
|
||||
#cd ../addins/imperas-riscv-tests; elf2hex.sh
|
||||
#cd ../addins/imperas-riscv-tests; extractFunctionRadix.sh work/*/*/*.elf.objdump
|
||||
# Link Linux test vectors
|
||||
#cd ../tests/linux-testgen/linux-testvectors/;./tvLinker.sh
|
||||
|
||||
coverage:
|
||||
#make -C ../tests/coverage --jobs
|
||||
#iter-elf.bash --cover --search ../tests/coverage
|
||||
|
@ -21,22 +38,6 @@ coverage:
|
|||
# vcover report -recursive cov/cov.ucdb > cov/rv64gc_recursive.rpt
|
||||
vcover report -details -threshH 100 -html cov/cov.ucdb
|
||||
|
||||
all: riscoftests memfiles
|
||||
# *** Build old tests/imperas-riscv-tests for now;
|
||||
# Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test
|
||||
# DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired
|
||||
#make -C ../tests/imperas-riscv-tests --jobs
|
||||
#make -C ../tests/imperas-riscv-tests XLEN=64 --jobs
|
||||
# Only compile Imperas tests if they are installed locally.
|
||||
# They are usually a symlink to $RISCV/imperas-riscv-tests and only
|
||||
# get compiled there manually during installation
|
||||
#make -C ../addins/imperas-riscv-tests
|
||||
#make -C ../addins/imperas-riscv-tests XLEN=64
|
||||
#cd ../addins/imperas-riscv-tests; elf2hex.sh
|
||||
#cd ../addins/imperas-riscv-tests; extractFunctionRadix.sh work/*/*/*.elf.objdump
|
||||
# Link Linux test vectors
|
||||
#cd ../tests/linux-testgen/linux-testvectors/;./tvLinker.sh
|
||||
|
||||
allclean: clean all
|
||||
|
||||
clean:
|
||||
|
|
|
@ -24,11 +24,22 @@
|
|||
#// and limitations under the License.
|
||||
#////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
# LZA (i<64) statement confuses coverage tool
|
||||
# This is ugly to exlcude the whole file - is there a better option
|
||||
coverage exclude -srcfile lzc.sv
|
||||
|
||||
|
||||
######################
|
||||
# Toggle exclusions
|
||||
# Not used because toggle coverage isn't measured
|
||||
######################
|
||||
|
||||
# Exclude DivBusyE from all design units because rv64gc uses the fdivsqrt unit for integer division
|
||||
coverage exclude -togglenode DivBusyE -du *
|
||||
#coverage exclude -togglenode DivBusyE -du *
|
||||
# Exclude QuotM and RemM from MDU because rv64gc uses the fdivsqrt rather tha div unit for integer division
|
||||
coverage exclude -togglenode /dut/core/mdu/mdu/QuotM
|
||||
coverage exclude -togglenode /dut/core/mdu/mdu/RemM
|
||||
#coverage exclude -togglenode /dut/core/mdu/mdu/QuotM
|
||||
#coverage exclude -togglenode /dut/core/mdu/mdu/RemM
|
||||
|
||||
# StallFCause is hardwired to 0
|
||||
coverage exclude -togglenode /dut/core/hzu/StallFCause
|
||||
#coverage exclude -togglenode /dut/core/hzu/StallFCause
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ for test in tests64i:
|
|||
configs.append(tc)
|
||||
|
||||
tests32gcimperas = ["imperas32i", "imperas32f", "imperas32m", "imperas32c"] # unused
|
||||
tests32gc = ["arch32f", "arch32d", "arch32i", "arch32priv", "arch32c", "arch32m", "arch32zi", "wally32a", "wally32priv", "wally32periph"]
|
||||
tests32gc = ["arch32f", "arch32d", "arch32i", "arch32priv", "arch32c", "arch32m", "arch32zi", "arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "wally32a", "wally32priv", "wally32periph"]
|
||||
for test in tests32gc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
|
@ -126,12 +126,18 @@ for test in ahbTests:
|
|||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
#tests64gc = ["arch64i", "arch64c", "arch64m"]
|
||||
tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv"]
|
||||
tests64gc = ["arch64f", "arch64d", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs",
|
||||
"arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv"]
|
||||
if (coverage): # delete all but 64gc tests when running coverage
|
||||
configs = []
|
||||
tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv", "imperas64f", "imperas64d", "imperas64c", "imperas64i"]
|
||||
# tests64gc.append(["imperas64f", "imperas64d", "imperas64c", "imperas64i"])
|
||||
tests64gc = ["coverage64gc", "arch64i", "arch64priv", "arch64c", "arch64m",
|
||||
"arch64zi", "wally64a", "wally64periph", "wally64priv",
|
||||
"arch64zba", "arch64zbb", "arch64zbc", "arch64zbs",
|
||||
"imperas64f", "imperas64d", "imperas64c", "imperas64i"]
|
||||
# tests64gc = ["coverage64gc", "arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m",
|
||||
# "arch64zi", "wally64a", "wally64periph", "wally64priv",
|
||||
# "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs",
|
||||
# "imperas64f", "imperas64d", "imperas64c", "imperas64i"]
|
||||
coverStr = '-coverage'
|
||||
else:
|
||||
coverStr = ''
|
||||
|
@ -156,7 +162,7 @@ def run_test_case(config):
|
|||
"""Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
|
||||
logname = "logs/"+config.variant+"_"+config.name+".log"
|
||||
cmd = config.cmd.format(logname)
|
||||
print(cmd)
|
||||
# print(cmd)
|
||||
os.chdir(regressionDir)
|
||||
os.system(cmd)
|
||||
if search_log_for_text(config.grepstr, logname):
|
||||
|
|
22
src/cache/cache.sv
vendored
22
src/cache/cache.sv
vendored
|
@ -98,9 +98,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
|||
logic CacheEn;
|
||||
logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
|
||||
logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel;
|
||||
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
|
||||
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
|
||||
|
||||
genvar index;
|
||||
genvar index;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Read Path
|
||||
|
@ -154,9 +154,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
|||
|
||||
// Bus address for fetch, writeback, or flush writeback
|
||||
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
|
||||
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
|
||||
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
|
||||
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Write Path
|
||||
|
@ -198,11 +198,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
|||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cachefsm #(READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck,
|
||||
.FlushStage, .CacheRW, .CacheAtomic, .Stall,
|
||||
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
|
||||
.CacheMiss, .CacheAccess, .SelAdr,
|
||||
.ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush,
|
||||
.FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst,
|
||||
.FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
|
||||
.FlushStage, .CacheRW, .CacheAtomic, .Stall,
|
||||
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
|
||||
.CacheMiss, .CacheAccess, .SelAdr,
|
||||
.ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush,
|
||||
.FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst,
|
||||
.FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
|
||||
.InvalidateCache, .CacheEn, .LRUWriteEn);
|
||||
endmodule
|
||||
|
|
76
src/cache/cachefsm.sv
vendored
76
src/cache/cachefsm.sv
vendored
|
@ -47,7 +47,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
|
|||
output logic [1:0] CacheBusRW, // [1] Read (cache line fetch) or [0] write bus (cache line writeback)
|
||||
// performance counter outputs
|
||||
output logic CacheMiss, // Cache miss
|
||||
output logic CacheAccess, // Cache access
|
||||
output logic CacheAccess, // Cache access
|
||||
|
||||
// cache internals
|
||||
input logic CacheHit, // Exactly 1 way hits
|
||||
|
@ -69,21 +69,21 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
|
|||
output logic CacheEn // Enable the cache memory arrays. Disable hold read data constant
|
||||
);
|
||||
|
||||
logic resetDelay;
|
||||
logic AMO, StoreAMO;
|
||||
logic AnyUpdateHit, AnyHit;
|
||||
logic AnyMiss;
|
||||
logic FlushFlag;
|
||||
logic resetDelay;
|
||||
logic AMO, StoreAMO;
|
||||
logic AnyUpdateHit, AnyHit;
|
||||
logic AnyMiss;
|
||||
logic FlushFlag;
|
||||
|
||||
typedef enum logic [3:0]{STATE_READY, // hit states
|
||||
// miss states
|
||||
STATE_FETCH,
|
||||
STATE_WRITEBACK,
|
||||
STATE_WRITE_LINE,
|
||||
STATE_READ_HOLD, // required for back to back reads. structural hazard on writting SRAM
|
||||
// flush cache
|
||||
STATE_FLUSH,
|
||||
STATE_FLUSH_WRITEBACK} statetype;
|
||||
// miss states
|
||||
STATE_FETCH,
|
||||
STATE_WRITEBACK,
|
||||
STATE_WRITE_LINE,
|
||||
STATE_READ_HOLD, // required for back to back reads. structural hazard on writting SRAM
|
||||
// flush cache
|
||||
STATE_FLUSH,
|
||||
STATE_FLUSH_WRITEBACK} statetype;
|
||||
|
||||
statetype CurrState, NextState;
|
||||
|
||||
|
@ -111,26 +111,26 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
|
|||
always_comb begin
|
||||
NextState = STATE_READY;
|
||||
case (CurrState)
|
||||
STATE_READY: if(InvalidateCache) NextState = STATE_READY;
|
||||
else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH;
|
||||
else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH;
|
||||
else if(AnyMiss & LineDirty) NextState = STATE_WRITEBACK;
|
||||
else NextState = STATE_READY;
|
||||
STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE;
|
||||
else NextState = STATE_FETCH;
|
||||
STATE_WRITE_LINE: NextState = STATE_READ_HOLD;
|
||||
STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD;
|
||||
else NextState = STATE_READY;
|
||||
STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH;
|
||||
else NextState = STATE_WRITEBACK;
|
||||
STATE_READY: if(InvalidateCache) NextState = STATE_READY;
|
||||
else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH;
|
||||
else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH;
|
||||
else if(AnyMiss & LineDirty) NextState = STATE_WRITEBACK;
|
||||
else NextState = STATE_READY;
|
||||
STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE;
|
||||
else NextState = STATE_FETCH;
|
||||
STATE_WRITE_LINE: NextState = STATE_READ_HOLD;
|
||||
STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD;
|
||||
else NextState = STATE_READY;
|
||||
STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH;
|
||||
else NextState = STATE_WRITEBACK;
|
||||
// eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack.
|
||||
STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK;
|
||||
else if (FlushFlag) NextState = STATE_READ_HOLD;
|
||||
else NextState = STATE_FLUSH;
|
||||
STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH;
|
||||
else if(CacheBusAck) NextState = STATE_READ_HOLD;
|
||||
else NextState = STATE_FLUSH_WRITEBACK;
|
||||
default: NextState = STATE_READY;
|
||||
STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK;
|
||||
else if (FlushFlag) NextState = STATE_READ_HOLD;
|
||||
else NextState = STATE_FLUSH;
|
||||
STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH;
|
||||
else if(CacheBusAck) NextState = STATE_READ_HOLD;
|
||||
else NextState = STATE_FLUSH_WRITEBACK;
|
||||
default: NextState = STATE_READY;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
@ -156,14 +156,14 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
|
|||
(CurrState == STATE_READY & AnyMiss & LineDirty);
|
||||
|
||||
assign SelFlush = (CurrState == STATE_READY & FlushCache) |
|
||||
(CurrState == STATE_FLUSH) |
|
||||
(CurrState == STATE_FLUSH_WRITEBACK);
|
||||
(CurrState == STATE_FLUSH) |
|
||||
(CurrState == STATE_FLUSH_WRITEBACK);
|
||||
assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) |
|
||||
(CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty);
|
||||
(CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty);
|
||||
assign FlushWayCntEn = (CurrState == STATE_FLUSH & ~LineDirty) |
|
||||
(CurrState == STATE_FLUSH_WRITEBACK & CacheBusAck);
|
||||
(CurrState == STATE_FLUSH_WRITEBACK & CacheBusAck);
|
||||
assign FlushCntRst = (CurrState == STATE_FLUSH & FlushFlag & ~LineDirty) |
|
||||
(CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck);
|
||||
(CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck);
|
||||
// Bus interface controls
|
||||
assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) |
|
||||
(CurrState == STATE_FETCH & ~CacheBusAck) |
|
||||
|
|
14
src/cache/cacheway.sv
vendored
14
src/cache/cacheway.sv
vendored
|
@ -30,7 +30,7 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) (
|
||||
OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
|
||||
|
@ -86,8 +86,6 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
|||
assign SelNonHit = FlushWayEn | SetValid | SelWriteback;
|
||||
|
||||
mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag);
|
||||
//assign SelTag = VictimWay | FlushWay;
|
||||
//assign SelData = HitWay | FlushWayEn | VictimWayEn;
|
||||
|
||||
mux2 #(1) selectedwaymux(HitWay, SelTag, SelNonHit , SelData);
|
||||
|
||||
|
@ -95,10 +93,6 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
|||
// Write Enable demux
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// RT: Can we merge these two muxes? This is also shared in cacheLRU.
|
||||
//mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelData);
|
||||
//mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelNonHit}, SelData);
|
||||
|
||||
assign SetValidWay = SetValid & SelData;
|
||||
assign ClearValidWay = ClearValid & SelData;
|
||||
assign SetDirtyWay = SetDirty & SelData;
|
||||
|
@ -117,8 +111,6 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
|||
.addr(CacheSet), .dout(ReadTag), .bwe('1),
|
||||
.din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
|
||||
|
||||
|
||||
|
||||
// AND portion of distributed tag multiplexer
|
||||
assign TagWay = SelTag ? ReadTag : '0; // AND part of AOMux
|
||||
assign DirtyWay = SelTag & Dirty & ValidWay;
|
||||
|
@ -152,8 +144,8 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
|||
always_ff @(posedge clk) begin // Valid bit array,
|
||||
if (reset) ValidBits <= #1 '0;
|
||||
if(CacheEn) begin
|
||||
ValidWay <= #1 ValidBits[CacheSet];
|
||||
if(InvalidateCache) ValidBits <= #1 '0;
|
||||
ValidWay <= #1 ValidBits[CacheSet];
|
||||
if(InvalidateCache) ValidBits <= #1 '0;
|
||||
else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CacheSet] <= #1 SetValidWay;
|
||||
end
|
||||
end
|
||||
|
|
6
src/cache/subcachelineread.sv
vendored
6
src/cache/subcachelineread.sv
vendored
|
@ -33,8 +33,8 @@ module subcachelineread #(parameter LINELEN, WORDLEN,
|
|||
parameter MUXINTERVAL )( // The number of bits between mux. Set to 16 for I$ to support compressed. Set to `LLEN for D$
|
||||
|
||||
input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr, // Physical address
|
||||
input logic [LINELEN-1:0] ReadDataLine,// Read data of the whole cacheline
|
||||
output logic [WORDLEN-1:0] ReadDataWord // read data of selected word.
|
||||
input logic [LINELEN-1:0] ReadDataLine,// Read data of the whole cacheline
|
||||
output logic [WORDLEN-1:0] ReadDataWord // read data of selected word.
|
||||
);
|
||||
|
||||
localparam WORDSPERLINE = LINELEN/MUXINTERVAL;
|
||||
|
@ -50,7 +50,7 @@ module subcachelineread #(parameter LINELEN, WORDLEN,
|
|||
|
||||
genvar index;
|
||||
for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux
|
||||
assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1 : (index*MUXINTERVAL)];
|
||||
assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1 : (index*MUXINTERVAL)];
|
||||
end
|
||||
|
||||
// variable input mux
|
||||
|
|
|
@ -35,7 +35,7 @@ module ahbcacheinterface #(
|
|||
parameter LINELEN, // Number of bits in cacheline
|
||||
parameter LLENPOVERAHBW // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
|
||||
)(
|
||||
input logic HCLK, HRESETn,
|
||||
input logic HCLK, HRESETn,
|
||||
// bus interface controls
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
|
@ -56,7 +56,7 @@ module ahbcacheinterface #(
|
|||
input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
|
||||
output logic CacheBusAck, // Handshack to $ indicating bus transaction completed
|
||||
output logic [LINELEN-1:0] FetchBuffer, // Register to hold beats of cache line as the arrive from bus
|
||||
output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase
|
||||
output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase
|
||||
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
|
||||
|
||||
// uncached interface
|
||||
|
@ -76,10 +76,10 @@ module ahbcacheinterface #(
|
|||
logic [`PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation
|
||||
logic [AHBWLOGBWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage
|
||||
logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA
|
||||
logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s
|
||||
logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s
|
||||
logic [`AHBW-1:0] PreHWDATA; // AHB Address phase write data
|
||||
|
||||
genvar index;
|
||||
genvar index;
|
||||
|
||||
// fetch buffer is made of BEATSPERLINE flip-flops
|
||||
for (index = 0; index < BEATSPERLINE; index++) begin:fetchbuffer
|
||||
|
@ -100,7 +100,7 @@ module ahbcacheinterface #(
|
|||
logic [`AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0];
|
||||
genvar index;
|
||||
for (index = 0; index < LLENPOVERAHBW; index++) begin:readdatalinesetsmux
|
||||
assign AHBWordSets[index] = CacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)];
|
||||
assign AHBWordSets[index] = CacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)];
|
||||
end
|
||||
assign CacheReadDataWordAHB = AHBWordSets[BeatCount[$clog2(LLENPOVERAHBW)-1:0]];
|
||||
end else assign CacheReadDataWordAHB = CacheReadDataWordM[`AHBW-1:0];
|
||||
|
@ -118,5 +118,5 @@ module ahbcacheinterface #(
|
|||
buscachefsm #(BeatCountThreshold, AHBWLOGBWPL) AHBBuscachefsm(
|
||||
.HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat,
|
||||
.CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed,
|
||||
.HREADY, .HTRANS, .HWRITE, .HBURST);
|
||||
.HREADY, .HTRANS, .HWRITE, .HBURST);
|
||||
endmodule
|
||||
|
|
|
@ -32,29 +32,28 @@
|
|||
module ahbinterface #(
|
||||
parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits
|
||||
)(
|
||||
input logic HCLK, HRESETn,
|
||||
input logic HCLK, HRESETn,
|
||||
// bus interface
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
output logic HWRITE, // AHB 0: Read operation 1: Write operation
|
||||
input logic [`XLEN-1:0] HRDATA, // AHB read data
|
||||
output logic [`XLEN-1:0] HWDATA, // AHB write data
|
||||
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
output logic HWRITE, // AHB 0: Read operation 1: Write operation
|
||||
input logic [`XLEN-1:0] HRDATA, // AHB read data
|
||||
output logic [`XLEN-1:0] HWDATA, // AHB write data
|
||||
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask
|
||||
|
||||
// lsu/ifu interface
|
||||
input logic Stall, // Core pipeline is stalled
|
||||
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
|
||||
input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write
|
||||
input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word
|
||||
input logic [`XLEN-1:0] WriteData, // IEU write data for a store
|
||||
output logic BusStall, // Bus is busy with an in flight memory operation
|
||||
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
|
||||
input logic Stall, // Core pipeline is stalled
|
||||
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
|
||||
input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write
|
||||
input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word
|
||||
input logic [`XLEN-1:0] WriteData, // IEU write data for a store
|
||||
output logic BusStall, // Bus is busy with an in flight memory operation
|
||||
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
|
||||
output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
|
||||
);
|
||||
|
||||
logic CaptureEn;
|
||||
|
||||
localparam LEN = (LSU ? `XLEN : 32); // 32 bits for IFU, XLEN for LSU
|
||||
logic CaptureEn;
|
||||
localparam LEN = (LSU ? `XLEN : 32); // 32 bits for IFU, XLEN for LSU
|
||||
|
||||
flopen #(LEN) fb(.clk(HCLK), .en(CaptureEn), .d(HRDATA[LEN-1:0]), .q(FetchBuffer));
|
||||
|
||||
|
@ -70,4 +69,5 @@ module ahbinterface #(
|
|||
busfsm busfsm(.HCLK, .HRESETn, .Flush, .BusRW,
|
||||
.BusCommitted, .Stall, .BusStall, .CaptureEn, .HREADY,
|
||||
.HTRANS, .HWRITE);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -35,33 +35,33 @@ module buscachefsm #(
|
|||
parameter BeatCountThreshold, // Largest beat index
|
||||
parameter AHBWLOGBWPL // Log2 of BEATSPERLINE
|
||||
)(
|
||||
input logic HCLK,
|
||||
input logic HRESETn,
|
||||
input logic HCLK,
|
||||
input logic HRESETn,
|
||||
|
||||
// IEU interface
|
||||
input logic Stall, // Core pipeline is stalled
|
||||
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
|
||||
input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write
|
||||
output logic BusStall, // Bus is busy with an in flight memory operation
|
||||
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
|
||||
|
||||
// ahb cache interface locals.
|
||||
output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA
|
||||
|
||||
// cache interface
|
||||
input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
|
||||
output logic CacheBusAck, // Handshack to $ indicating bus transaction completed
|
||||
input logic Stall, // Core pipeline is stalled
|
||||
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
|
||||
input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write
|
||||
output logic BusStall, // Bus is busy with an in flight memory operation
|
||||
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
|
||||
|
||||
// ahb cache interface locals.
|
||||
output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA
|
||||
|
||||
// cache interface
|
||||
input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
|
||||
output logic CacheBusAck, // Handshack to $ indicating bus transaction completed
|
||||
|
||||
// lsu interface
|
||||
output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase
|
||||
output logic [AHBWLOGBWPL-1:0] BeatCountDelayed, // Beat within the cache line in the second (Data) cache stage
|
||||
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
|
||||
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
|
||||
|
||||
// BUS interface
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
output logic HWRITE, // AHB 0: Read operation 1: Write operation
|
||||
output logic [2:0] HBURST // AHB burst length
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
output logic HWRITE, // AHB 0: Read operation 1: Write operation
|
||||
output logic [2:0] HBURST // AHB burst length
|
||||
);
|
||||
|
||||
typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype;
|
||||
|
@ -70,26 +70,26 @@ module buscachefsm #(
|
|||
busstatetype CurrState, NextState;
|
||||
|
||||
logic [AHBWLOGBWPL-1:0] NextBeatCount;
|
||||
logic FinalBeatCount;
|
||||
logic [2:0] LocalBurstType;
|
||||
logic BeatCntEn;
|
||||
logic BeatCntReset;
|
||||
logic CacheAccess;
|
||||
logic FinalBeatCount;
|
||||
logic [2:0] LocalBurstType;
|
||||
logic BeatCntEn;
|
||||
logic BeatCntReset;
|
||||
logic CacheAccess;
|
||||
|
||||
always_ff @(posedge HCLK)
|
||||
if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE;
|
||||
else CurrState <= #1 NextState;
|
||||
|
||||
always_comb begin
|
||||
case(CurrState)
|
||||
ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE;
|
||||
else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK;
|
||||
else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH;
|
||||
else NextState = ADR_PHASE;
|
||||
DATA_PHASE: if(HREADY) NextState = MEM3;
|
||||
else NextState = DATA_PHASE;
|
||||
MEM3: if(Stall) NextState = MEM3;
|
||||
else NextState = ADR_PHASE;
|
||||
case(CurrState)
|
||||
ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE;
|
||||
else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK;
|
||||
else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH;
|
||||
else NextState = ADR_PHASE;
|
||||
DATA_PHASE: if(HREADY) NextState = MEM3;
|
||||
else NextState = DATA_PHASE;
|
||||
MEM3: if(Stall) NextState = MEM3;
|
||||
else NextState = ADR_PHASE;
|
||||
CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK;
|
||||
else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH;
|
||||
else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE;
|
||||
|
@ -98,8 +98,8 @@ module buscachefsm #(
|
|||
else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH;
|
||||
else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE;
|
||||
else NextState = CACHE_WRITEBACK;
|
||||
default: NextState = ADR_PHASE;
|
||||
endcase
|
||||
default: NextState = ADR_PHASE;
|
||||
endcase
|
||||
end
|
||||
|
||||
// IEU, LSU, and IFU controls
|
||||
|
@ -117,8 +117,8 @@ module buscachefsm #(
|
|||
assign CacheAccess = CurrState == CACHE_FETCH | CurrState == CACHE_WRITEBACK;
|
||||
|
||||
assign BusStall = (CurrState == ADR_PHASE & ((|BusRW) | (|CacheBusRW))) |
|
||||
//(CurrState == DATA_PHASE & ~BusRW[0]) | // *** replace the next line with this. Fails uart test but i think it's a test problem not a hardware problem.
|
||||
(CurrState == DATA_PHASE) |
|
||||
//(CurrState == DATA_PHASE & ~BusRW[0]) | // *** replace the next line with this. Fails uart test but i think it's a test problem not a hardware problem.
|
||||
(CurrState == DATA_PHASE) |
|
||||
(CurrState == CACHE_FETCH & ~HREADY) |
|
||||
(CurrState == CACHE_WRITEBACK & ~HREADY);
|
||||
assign BusCommitted = CurrState != ADR_PHASE;
|
||||
|
@ -144,7 +144,7 @@ module buscachefsm #(
|
|||
// communication to cache
|
||||
assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount);
|
||||
assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | CacheBusRW[0])) |
|
||||
(CurrState == DATA_PHASE & BusRW[0]) |
|
||||
(CurrState == DATA_PHASE & BusRW[0]) |
|
||||
(CurrState == CACHE_WRITEBACK) |
|
||||
(CurrState == CACHE_FETCH);
|
||||
|
||||
|
|
|
@ -57,20 +57,20 @@ module busfsm (
|
|||
else CurrState <= #1 NextState;
|
||||
|
||||
always_comb begin
|
||||
case(CurrState)
|
||||
ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE;
|
||||
else NextState = ADR_PHASE;
|
||||
DATA_PHASE: if(HREADY) NextState = MEM3;
|
||||
else NextState = DATA_PHASE;
|
||||
MEM3: if(Stall) NextState = MEM3;
|
||||
else NextState = ADR_PHASE;
|
||||
default: NextState = ADR_PHASE;
|
||||
endcase
|
||||
case(CurrState)
|
||||
ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE;
|
||||
else NextState = ADR_PHASE;
|
||||
DATA_PHASE: if(HREADY) NextState = MEM3;
|
||||
else NextState = DATA_PHASE;
|
||||
MEM3: if(Stall) NextState = MEM3;
|
||||
else NextState = ADR_PHASE;
|
||||
default: NextState = ADR_PHASE;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign BusStall = (CurrState == ADR_PHASE & |BusRW) |
|
||||
// (CurrState == DATA_PHASE & ~BusRW[0]); // possible optimization here. fails uart test, but i'm not sure the failure is valid.
|
||||
(CurrState == DATA_PHASE);
|
||||
// (CurrState == DATA_PHASE & ~BusRW[0]); // possible optimization here. fails uart test, but i'm not sure the failure is valid.
|
||||
(CurrState == DATA_PHASE);
|
||||
|
||||
assign BusCommitted = CurrState != ADR_PHASE;
|
||||
|
||||
|
|
|
@ -36,26 +36,26 @@
|
|||
module controllerinputstage #(
|
||||
parameter SAVE_ENABLED = 1 // 1: Save manager inputs if Save = 1, 0: Don't save inputs
|
||||
)(
|
||||
input logic HCLK,
|
||||
input logic HRESETn,
|
||||
input logic Save, // Two or more managers requesting (HTRANS != 00) at the same time. Save the non-granted manager inputs
|
||||
input logic Restore, // Restore a saved manager inputs when it is finally granted
|
||||
input logic Disable, // Supress HREADY to the non-granted manager
|
||||
output logic Request, // This manager is making a request
|
||||
input logic HCLK,
|
||||
input logic HRESETn,
|
||||
input logic Save, // Two or more managers requesting (HTRANS != 00) at the same time. Save the non-granted manager inputs
|
||||
input logic Restore, // Restore a saved manager inputs when it is finally granted
|
||||
input logic Disable, // Supress HREADY to the non-granted manager
|
||||
output logic Request, // This manager is making a request
|
||||
// controller input
|
||||
input logic [1:0] HTRANSIn, // Manager input. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation
|
||||
input logic [2:0] HSIZEIn, // Manager input. AHB transaction width
|
||||
input logic [2:0] HBURSTIn, // Manager input. AHB burst length
|
||||
input logic [1:0] HTRANSIn, // Manager input. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation
|
||||
input logic [2:0] HSIZEIn, // Manager input. AHB transaction width
|
||||
input logic [2:0] HBURSTIn, // Manager input. AHB burst length
|
||||
input logic [`PA_BITS-1:0] HADDRIn, // Manager input. AHB address
|
||||
output logic HREADYOut, // Indicate to manager the peripherial is not busy and another manager does not have priority
|
||||
output logic HREADYOut, // Indicate to manager the peripherial is not busy and another manager does not have priority
|
||||
// controller output
|
||||
output logic [1:0] HTRANSOut, // Aribrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
output logic HWRITEOut, // Aribrated manager transaction. AHB 0: Read operation 1: Write operation
|
||||
output logic [2:0] HSIZEOut, // Aribrated manager transaction. AHB transaction width
|
||||
output logic [2:0] HBURSTOut, // Aribrated manager transaction. AHB burst length
|
||||
output logic [1:0] HTRANSOut, // Aribrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
output logic HWRITEOut, // Aribrated manager transaction. AHB 0: Read operation 1: Write operation
|
||||
output logic [2:0] HSIZEOut, // Aribrated manager transaction. AHB transaction width
|
||||
output logic [2:0] HBURSTOut, // Aribrated manager transaction. AHB burst length
|
||||
output logic [`PA_BITS-1:0] HADDROut, // Aribrated manager transaction. AHB address
|
||||
input logic HREADYIn // Peripherial ready
|
||||
input logic HREADYIn // Peripherial ready
|
||||
);
|
||||
|
||||
logic HWRITESave;
|
||||
|
|
|
@ -52,27 +52,26 @@ module ebu (
|
|||
output logic LSUHREADY, // AHB peripheral. Never gated as LSU always has priority
|
||||
|
||||
// AHB-Lite external signals
|
||||
output logic HCLK, HRESETn,
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
input logic HRESP, // AHB peripheral response. 0: OK 1: Error
|
||||
output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration
|
||||
output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration
|
||||
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration
|
||||
output logic HWRITE, // AHB transaction direction after arbitration
|
||||
output logic [2:0] HSIZE, // AHB transaction size after arbitration
|
||||
output logic [2:0] HBURST, // AHB burst length after arbitration
|
||||
output logic [3:0] HPROT, // AHB protection. Wally does not use
|
||||
output logic [1:0] HTRANS, // AHB transaction request after arbitration
|
||||
output logic HMASTLOCK // AHB master lock. Wally does not use
|
||||
output logic HCLK, HRESETn,
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
input logic HRESP, // AHB peripheral response. 0: OK 1: Error
|
||||
output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration
|
||||
output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration
|
||||
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration
|
||||
output logic HWRITE, // AHB transaction direction after arbitration
|
||||
output logic [2:0] HSIZE, // AHB transaction size after arbitration
|
||||
output logic [2:0] HBURST, // AHB burst length after arbitration
|
||||
output logic [3:0] HPROT, // AHB protection. Wally does not use
|
||||
output logic [1:0] HTRANS, // AHB transaction request after arbitration
|
||||
output logic HMASTLOCK // AHB master lock. Wally does not use
|
||||
);
|
||||
|
||||
|
||||
logic LSUDisable;
|
||||
logic LSUSelect;
|
||||
logic LSUSelect;
|
||||
logic IFUSave;
|
||||
logic IFURestore;
|
||||
logic IFUDisable;
|
||||
logic IFUSelect;
|
||||
logic IFURestore;
|
||||
logic IFUDisable;
|
||||
logic IFUSelect;
|
||||
|
||||
logic [`PA_BITS-1:0] IFUHADDROut;
|
||||
logic [1:0] IFUHTRANSOut;
|
||||
|
@ -87,10 +86,8 @@ module ebu (
|
|||
logic LSUHWRITEOut;
|
||||
|
||||
logic IFUReq;
|
||||
logic LSUReq;
|
||||
logic LSUReq;
|
||||
|
||||
|
||||
|
||||
assign HCLK = clk;
|
||||
assign HRESETn = ~reset;
|
||||
|
||||
|
@ -129,7 +126,7 @@ module ebu (
|
|||
// HRDATA is sent to all controllers at the core level.
|
||||
|
||||
ebufsmarb ebufsmarb(.HCLK, .HRESETn, .HBURST, .HREADY, .LSUReq, .IFUReq, .IFUSave,
|
||||
.IFURestore, .IFUDisable, .IFUSelect, .LSUDisable, .LSUSelect);
|
||||
.IFURestore, .IFUDisable, .IFUSelect, .LSUDisable, .LSUSelect);
|
||||
|
||||
endmodule
|
||||
|
||||
|
|
|
@ -31,34 +31,33 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module ebufsmarb (
|
||||
input logic HCLK,
|
||||
input logic HRESETn,
|
||||
input logic HCLK,
|
||||
input logic HRESETn,
|
||||
input logic [2:0] HBURST,
|
||||
// AHB burst length
|
||||
|
||||
input logic HREADY,
|
||||
input logic HREADY,
|
||||
|
||||
input logic LSUReq,
|
||||
input logic IFUReq,
|
||||
input logic LSUReq,
|
||||
input logic IFUReq,
|
||||
|
||||
output logic IFUSave,
|
||||
output logic IFURestore,
|
||||
output logic IFUDisable,
|
||||
output logic IFUSelect,
|
||||
output logic LSUDisable,
|
||||
output logic LSUSelect);
|
||||
|
||||
output logic IFUSave,
|
||||
output logic IFURestore,
|
||||
output logic IFUDisable,
|
||||
output logic IFUSelect,
|
||||
output logic LSUDisable,
|
||||
output logic LSUSelect);
|
||||
|
||||
typedef enum logic [1:0] {IDLE, ARBITRATE} statetype;
|
||||
typedef enum logic [1:0] {IDLE, ARBITRATE} statetype;
|
||||
statetype CurrState, NextState;
|
||||
|
||||
logic both; // Both the LSU and IFU request at the same time
|
||||
logic IFUReqD; // 1 cycle delayed IFU request. Part of arbitration
|
||||
logic FinalBeat, FinalBeatD; // Indicates the last beat of a burst
|
||||
logic BeatCntEn;
|
||||
logic [4-1:0] NextBeatCount, BeatCount; // Position within a burst transfer
|
||||
logic CntReset;
|
||||
logic [3:0] Threshold; // Number of beats derived from HBURST
|
||||
logic both; // Both the LSU and IFU request at the same time
|
||||
logic IFUReqD; // 1 cycle delayed IFU request. Part of arbitration
|
||||
logic FinalBeat, FinalBeatD; // Indicates the last beat of a burst
|
||||
logic BeatCntEn;
|
||||
logic [3:0] BeatCount; // Position within a burst transfer
|
||||
logic BeatCntReset;
|
||||
logic [3:0] Threshold; // Number of beats derived from HBURST
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Aribtration scheme
|
||||
|
@ -70,8 +69,8 @@ module ebufsmarb (
|
|||
flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextState, IDLE, CurrState);
|
||||
always_comb
|
||||
case (CurrState)
|
||||
IDLE: if (both) NextState = ARBITRATE;
|
||||
else NextState = IDLE;
|
||||
IDLE: if (both) NextState = ARBITRATE;
|
||||
else NextState = IDLE;
|
||||
ARBITRATE: if (HREADY & FinalBeatD & ~(LSUReq & IFUReq)) NextState = IDLE;
|
||||
else NextState = ARBITRATE;
|
||||
default: NextState = IDLE;
|
||||
|
@ -91,31 +90,33 @@ module ebufsmarb (
|
|||
// This is necessary because the pipeline is stalled for the entire duration of both transactions,
|
||||
// and the LSU memory request will stil be active.
|
||||
flopr #(1) ifureqreg(HCLK, ~HRESETn, IFUReq, IFUReqD);
|
||||
assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD));
|
||||
assign LSUSelect = NextState == ARBITRATE ? 1'b1: LSUReq;
|
||||
assign LSUDisable = (CurrState == ARBITRATE) ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD));
|
||||
assign LSUSelect = (NextState == ARBITRATE) ? 1'b1: LSUReq;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Burst mode logic
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
flopenr #(4) BeatCountReg(HCLK, ~HRESETn | CntReset | FinalBeat, BeatCntEn, NextBeatCount, BeatCount);
|
||||
assign NextBeatCount = BeatCount + 1'b1;
|
||||
|
||||
assign CntReset = NextState == IDLE;
|
||||
assign BeatCntReset = NextState == IDLE;
|
||||
assign FinalBeat = (BeatCount == Threshold); // Detect when we are waiting on the final access.
|
||||
assign BeatCntEn = (NextState == ARBITRATE & HREADY);
|
||||
|
||||
// Counting the beats in the EBU is only necessary when both the LSU and IFU request concurrently.
|
||||
// LSU has priority. HREADY serves double duty during a burst transaction. It indicates when the
|
||||
// beat completes and when the transaction finishes. However there is nothing external to
|
||||
// differentiate them. The EBU counts the HREADY beats so it knows when to switch to the IFU's
|
||||
// request.
|
||||
assign BeatCntEn = (NextState == ARBITRATE) & HREADY;
|
||||
counter #(4) BeatCounter(HCLK, ~HRESETn | BeatCntReset | FinalBeat, BeatCntEn, BeatCount);
|
||||
|
||||
// Used to store data from data phase of AHB.
|
||||
flopenr #(1) FinalBeatReg(HCLK, ~HRESETn | CntReset, BeatCntEn, FinalBeat, FinalBeatD);
|
||||
flopenr #(1) FinalBeatReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, FinalBeat, FinalBeatD);
|
||||
|
||||
// unlike the bus fsm in lsu/ifu, we need to derive the number of beats from HBURST.
|
||||
always_comb begin
|
||||
case(HBURST)
|
||||
0: Threshold = 4'b0000;
|
||||
3: Threshold = 4'b0011; // INCR4
|
||||
5: Threshold = 4'b0111; // INCR8
|
||||
7: Threshold = 4'b1111; // INCR16
|
||||
default: Threshold = 4'b0000; // INCR without end.
|
||||
endcase
|
||||
end
|
||||
// unlike the bus fsm in lsu/ifu, we need to derive the number of beats from HBURST, Threshold = num beats - 1.
|
||||
// HBURST[2:1] Beats threshold
|
||||
// 00 1 0
|
||||
// 01 4 3
|
||||
// 10 8 7
|
||||
// 11 16 15
|
||||
always_comb
|
||||
if (HBURST[2:1] == 2'b00) Threshold = 4'b0000;
|
||||
else Threshold = (2 << HBURST[2:1]) - 1;
|
||||
endmodule
|
||||
|
|
|
@ -31,53 +31,53 @@ module fctrl (
|
|||
input logic clk,
|
||||
input logic reset,
|
||||
// input control signals
|
||||
input logic StallE, StallM, StallW, // stall signals
|
||||
input logic FlushE, FlushM, FlushW, // flush signals
|
||||
input logic IntDivE, // is inteteger division
|
||||
input logic [2:0] FRM_REGW, // rounding mode from CSR
|
||||
input logic [1:0] STATUS_FS, // is FPU enabled?
|
||||
input logic FDivBusyE, // is the divider busy
|
||||
// intruction
|
||||
input logic [31:0] InstrD, // the full instruction
|
||||
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
|
||||
input logic [6:0] OpD, // bits 6:0 of instruction
|
||||
input logic [4:0] Rs2D, // bits 24:20 of instruction
|
||||
input logic [2:0] Funct3D, Funct3E, // bits 14:12 of instruction - may contain rounding mode
|
||||
// input mux selections
|
||||
output logic XEnD, YEnD, ZEnD, // enable inputs
|
||||
output logic XEnE, YEnE, ZEnE, // enable inputs
|
||||
// opperation mux selections
|
||||
output logic FCvtIntE, FCvtIntW, // convert to integer opperation
|
||||
output logic [2:0] FrmM, // FP rounding mode
|
||||
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
|
||||
output logic FpLoadStoreM, // FP load or store instruction
|
||||
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
|
||||
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
|
||||
input logic StallE, StallM, StallW, // stall signals
|
||||
input logic FlushE, FlushM, FlushW, // flush signals
|
||||
input logic IntDivE, // is inteteger division
|
||||
input logic [2:0] FRM_REGW, // rounding mode from CSR
|
||||
input logic [1:0] STATUS_FS, // is FPU enabled?
|
||||
input logic FDivBusyE, // is the divider busy
|
||||
// intruction
|
||||
input logic [31:0] InstrD, // the full instruction
|
||||
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
|
||||
input logic [6:0] OpD, // bits 6:0 of instruction
|
||||
input logic [4:0] Rs2D, // bits 24:20 of instruction
|
||||
input logic [2:0] Funct3D, Funct3E, // bits 14:12 of instruction - may contain rounding mode
|
||||
// input mux selections
|
||||
output logic XEnD, YEnD, ZEnD, // enable inputs
|
||||
output logic XEnE, YEnE, ZEnE, // enable inputs
|
||||
// opperation mux selections
|
||||
output logic FCvtIntE, FCvtIntW, // convert to integer opperation
|
||||
output logic [2:0] FrmM, // FP rounding mode
|
||||
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
|
||||
output logic FpLoadStoreM, // FP load or store instruction
|
||||
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
|
||||
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
|
||||
// register control signals
|
||||
output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable
|
||||
output logic FWriteIntE, FWriteIntM, // Write to integer register
|
||||
output logic [4:0] Adr1D, Adr2D, Adr3D, // adresses of each input
|
||||
output logic [4:0] Adr1E, Adr2E, Adr3E, // adresses of each input
|
||||
output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable
|
||||
output logic FWriteIntE, FWriteIntM, // Write to integer register
|
||||
output logic [4:0] Adr1D, Adr2D, Adr3D, // adresses of each input
|
||||
output logic [4:0] Adr1E, Adr2E, Adr3E, // adresses of each input
|
||||
// other control signals
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic FDivStartE, IDivStartE // Start division or squareroot
|
||||
output logic FDivStartE, IDivStartE // Start division or squareroot
|
||||
);
|
||||
|
||||
`define FCTRLW 12
|
||||
|
||||
logic [`FCTRLW-1:0] ControlsD; // control signals
|
||||
logic FRegWriteD; // FP register write enable
|
||||
logic FDivStartD; // start division/sqrt
|
||||
logic FWriteIntD; // integer register write enable
|
||||
logic [2:0] OpCtrlD; // Select which opperation to do in each component
|
||||
logic [1:0] PostProcSelD; // select result in the post processing unit
|
||||
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
|
||||
logic [2:0] FrmD, FrmE; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtD; // FP format
|
||||
logic [1:0] Fmt; // format - before possible reduction
|
||||
logic SupportedFmt; // is the format supported
|
||||
logic FCvtIntD, FCvtIntM; // convert to integer opperation
|
||||
logic [`FCTRLW-1:0] ControlsD; // control signals
|
||||
logic FRegWriteD; // FP register write enable
|
||||
logic FDivStartD; // start division/sqrt
|
||||
logic FWriteIntD; // integer register write enable
|
||||
logic [2:0] OpCtrlD; // Select which opperation to do in each component
|
||||
logic [1:0] PostProcSelD; // select result in the post processing unit
|
||||
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
|
||||
logic [2:0] FrmD, FrmE; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtD; // FP format
|
||||
logic [1:0] Fmt; // format - before possible reduction
|
||||
logic SupportedFmt; // is the format supported
|
||||
logic FCvtIntD, FCvtIntM; // convert to integer opperation
|
||||
|
||||
// FPU Instruction Decoder
|
||||
assign Fmt = Funct7D[1:0];
|
||||
|
@ -123,7 +123,7 @@ module fctrl (
|
|||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv
|
||||
7'b01011??: ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt
|
||||
7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt
|
||||
7'b00100??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_xx_000_0_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_xx_001_0_0_0; // fsgnjn
|
||||
|
@ -141,7 +141,8 @@ module fctrl (
|
|||
3'b000: ControlsD = `FCTRLW'b0_1_00_xx_011_0_0_0; // fle
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_000__0_1_0; // non-implemented instruction
|
||||
endcase
|
||||
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_10_xx_000_0_0_0; // fclass
|
||||
7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)
|
||||
ControlsD = `FCTRLW'b0_1_10_xx_000_0_0_0; // fclass
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.w to int reg
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.d to int reg
|
||||
else ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction
|
||||
|
|
|
@ -30,20 +30,20 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module fcvt (
|
||||
input logic Xs, // input's sign
|
||||
input logic [`NE-1:0] Xe, // input's exponent
|
||||
input logic [`NF:0] Xm, // input's fraction
|
||||
input logic [`XLEN-1:0] Int, // integer input - from IEU
|
||||
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic ToInt, // is fp->int (since it's writting to the integer register)
|
||||
input logic XZero, // is the input zero
|
||||
input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`NE:0] Ce, // the calculated expoent
|
||||
output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by
|
||||
input logic Xs, // input's sign
|
||||
input logic [`NE-1:0] Xe, // input's exponent
|
||||
input logic [`NF:0] Xm, // input's fraction
|
||||
input logic [`XLEN-1:0] Int, // integer input - from IEU
|
||||
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic ToInt, // is fp->int (since it's writting to the integer register)
|
||||
input logic XZero, // is the input zero
|
||||
input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`NE:0] Ce, // the calculated expoent
|
||||
output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by
|
||||
output logic ResSubnormUf,// does the result underflow or is subnormal
|
||||
output logic Cs, // the result's sign
|
||||
output logic IntZero, // is the integer zero?
|
||||
output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder)
|
||||
output logic Cs, // the result's sign
|
||||
output logic IntZero, // is the integer zero?
|
||||
output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder)
|
||||
);
|
||||
|
||||
// OpCtrls:
|
||||
|
@ -60,7 +60,7 @@ module fcvt (
|
|||
logic [`XLEN-1:0] PosInt; // the positive integer input
|
||||
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
|
||||
logic [`NE-2:0] NewBias; // the bias of the final result
|
||||
logic [`NE-1:0] OldExp; // the old exponent
|
||||
logic [`NE-1:0] OldExp; // the old exponent
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
|
|
|
@ -29,49 +29,49 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrt(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic XsE,
|
||||
input logic [`NF:0] XmE, YmE,
|
||||
input logic [`NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
output logic DivStickyM,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DIVb:0] QmM,
|
||||
output logic [`XLEN-1:0] FIntDivResultM
|
||||
input logic XsE,
|
||||
input logic [`NF:0] XmE, YmE,
|
||||
input logic [`NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
output logic DivStickyM,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DIVb:0] QmM,
|
||||
output logic [`XLEN-1:0] FIntDivResultM
|
||||
);
|
||||
|
||||
// Floating-point division and square root module, with optional integer division and remainder
|
||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||
|
||||
logic [`DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [`DIVb-1:0] DPreproc, D; // Iterator Divisor
|
||||
logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [`DIVb+1:0] FirstC; // Step tracker
|
||||
logic Firstun; // Quotient selection
|
||||
logic WZeroE; // Early termination flag
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic IntDivM; // Integer operation
|
||||
logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
logic [`DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [`DIVb-1:0] DPreproc, D; // Iterator Divisor
|
||||
logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [`DIVb+1:0] FirstC; // Step tracker
|
||||
logic Firstun; // Quotient selection
|
||||
logic WZeroE; // Early termination flag
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic IntDivM; // Integer operation
|
||||
logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
|
||||
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
|
||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||
|
@ -100,4 +100,4 @@ module fdivsqrt(
|
|||
// Int-specific
|
||||
.nM, .mM, .ALTBM, .AsM, .BZeroM, .NegQuotM, .W64M, .RemOpM(Funct3M[1]), .AM,
|
||||
.FIntDivResultM);
|
||||
endmodule
|
||||
endmodule
|
||||
|
|
|
@ -69,6 +69,6 @@ module fdivsqrtexpcalc(
|
|||
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
|
||||
|
||||
// correct exponent for subnormal input's normalization shifts
|
||||
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; // *** why Xzero? Is this a hack for postprocessor?
|
||||
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
||||
assign Qe = Sqrt ? SExp : DExp;
|
||||
endmodule
|
||||
|
|
|
@ -37,7 +37,7 @@ module fdivsqrtpostproc(
|
|||
input logic [`DIVb+1:0] FirstC,
|
||||
input logic SqrtE,
|
||||
input logic Firstun, SqrtM, SpecialCaseM, NegQuotM,
|
||||
input logic [`XLEN-1:0] AM,
|
||||
input logic [`XLEN-1:0] AM,
|
||||
input logic RemOpM, ALTBM, BZeroM, AsM, W64M,
|
||||
input logic [`DIVBLEN:0] nM, mM,
|
||||
output logic [`DIVb:0] QmM,
|
||||
|
@ -46,11 +46,11 @@ module fdivsqrtpostproc(
|
|||
output logic [`XLEN-1:0] FIntDivResultM
|
||||
);
|
||||
|
||||
logic [`DIVb+3:0] W, Sum, DM;
|
||||
logic [`DIVb:0] PreQmM;
|
||||
logic NegStickyM;
|
||||
logic weq0E, WZeroM;
|
||||
logic [`XLEN-1:0] IntDivResultM;
|
||||
logic [`DIVb+3:0] W, Sum, DM;
|
||||
logic [`DIVb:0] PreQmM;
|
||||
logic NegStickyM;
|
||||
logic weq0E, WZeroM;
|
||||
logic [`XLEN-1:0] IntDivResultM;
|
||||
|
||||
//////////////////////////
|
||||
// Execute Stage: Detect early termination for an exact result
|
||||
|
@ -134,4 +134,4 @@ module fdivsqrtpostproc(
|
|||
end else
|
||||
assign FIntDivResultM = IntDivResultM[`XLEN-1:0];
|
||||
end
|
||||
endmodule
|
||||
endmodule
|
||||
|
|
|
@ -29,35 +29,35 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtpreproc (
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic [`NF:0] Xm, Ym,
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic [`NF:0] Xm, Ym,
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic [`FMTBITS-1:0] Fmt,
|
||||
input logic Sqrt,
|
||||
input logic XZeroE,
|
||||
input logic [2:0] Funct3E,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DIVb+3:0] X,
|
||||
output logic [`DIVb-1:0] DPreproc,
|
||||
input logic Sqrt,
|
||||
input logic XZeroE,
|
||||
input logic [2:0] Funct3E,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DIVb+3:0] X,
|
||||
output logic [`DIVb-1:0] DPreproc,
|
||||
// Int-specific
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic IntDivE, W64E,
|
||||
output logic ISpecialCaseE,
|
||||
output logic [`DIVBLEN:0] nE, nM, mM,
|
||||
output logic NegQuotM, ALTBM, IntDivM, W64M,
|
||||
output logic AsM, BZeroM,
|
||||
output logic [`XLEN-1:0] AM
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic IntDivE, W64E,
|
||||
output logic ISpecialCaseE,
|
||||
output logic [`DIVBLEN:0] nE, nM, mM,
|
||||
output logic NegQuotM, ALTBM, IntDivM, W64M,
|
||||
output logic AsM, BZeroM,
|
||||
output logic [`XLEN-1:0] AM
|
||||
);
|
||||
|
||||
logic [`DIVb-1:0] XPreproc;
|
||||
logic [`DIVb:0] PreSqrtX;
|
||||
logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||
logic [`NE+1:0] QeE; // Quotient Exponent (FP only)
|
||||
logic [`DIVb-1:0] IFNormLenX, IFNormLenD; // Correctly-sized inputs for iterator
|
||||
logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs
|
||||
logic NumerZeroE; // Numerator is zero (X or A)
|
||||
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
||||
logic [`DIVb-1:0] XPreproc;
|
||||
logic [`DIVb:0] PreSqrtX;
|
||||
logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||
logic [`NE+1:0] QeE; // Quotient Exponent (FP only)
|
||||
logic [`DIVb-1:0] IFNormLenX, IFNormLenD; // Correctly-sized inputs for iterator
|
||||
logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs
|
||||
logic NumerZeroE; // Numerator is zero (X or A)
|
||||
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
||||
|
||||
if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
|
||||
logic signedDiv, NegQuotE;
|
||||
|
|
|
@ -45,11 +45,11 @@ module fdivsqrtqsel2 (
|
|||
assign g = ps & pc;
|
||||
|
||||
assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) &
|
||||
(ps[0]^pc[0]));
|
||||
(ps[0]^pc[0]));
|
||||
assign sign = (ps[3]^pc[3])^
|
||||
(ps[2] & pc[2] | ((ps[2]^pc[2]) &
|
||||
(ps[1]&pc[1] | ((ps[1]^pc[1]) &
|
||||
(ps[0]&pc[0])))));
|
||||
(ps[1]&pc[1] | ((ps[1]^pc[1]) &
|
||||
(ps[0]&pc[0])))));
|
||||
|
||||
// Produce digit = +1, 0, or -1
|
||||
assign up = magnitude & ~sign;
|
||||
|
|
|
@ -32,21 +32,21 @@ module fdivsqrtqsel4 (
|
|||
input logic [2:0] Dmsbs,
|
||||
input logic [4:0] Smsbs,
|
||||
input logic [7:0] WSmsbs, WCmsbs,
|
||||
input logic Sqrt, j1,
|
||||
input logic Sqrt, j1,
|
||||
output logic [3:0] udigit
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] A;
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] A;
|
||||
|
||||
assign PreWmsbs = WCmsbs + WSmsbs;
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
assign PreWmsbs = WCmsbs + WSmsbs;
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
// W = xxxx.xxx...
|
||||
// Wmsbs = | |
|
||||
// Wmsbs = | |
|
||||
|
||||
logic [3:0] USel4[1023:0];
|
||||
logic [3:0] USel4[1023:0];
|
||||
|
||||
// Prepopulate selection table; this is constant at compile time
|
||||
always_comb begin
|
||||
|
@ -109,5 +109,5 @@ module fdivsqrtqsel4 (
|
|||
end else A = Dmsbs;
|
||||
|
||||
// Select quotient digit from lookup table based on A and W
|
||||
assign udigit = USel4[{A,Wmsbs}];
|
||||
assign udigit = USel4[{A,Wmsbs}];
|
||||
endmodule
|
||||
|
|
|
@ -32,19 +32,19 @@ module fdivsqrtqsel4cmp (
|
|||
input logic [2:0] Dmsbs,
|
||||
input logic [4:0] Smsbs,
|
||||
input logic [7:0] WSmsbs, WCmsbs,
|
||||
input logic SqrtE, j1,
|
||||
input logic SqrtE, j1,
|
||||
output logic [3:0] udigit
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] A;
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] A;
|
||||
|
||||
assign PreWmsbs = WCmsbs + WSmsbs;
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
assign PreWmsbs = WCmsbs + WSmsbs;
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
// W = xxxx.xxx...
|
||||
// Wmsbs = | |
|
||||
// Wmsbs = | |
|
||||
|
||||
logic [6:0] mk2, mk1, mk0, mkm1;
|
||||
logic [6:0] mks2[7:0], mks1[7:0];
|
||||
|
@ -87,5 +87,5 @@ module fdivsqrtqsel4cmp (
|
|||
else if ($signed(Wmsbs) >= $signed(mk1)) udigit = 4'b0100; // choose 1
|
||||
else if ($signed(Wmsbs) >= $signed(mk0)) udigit = 4'b0000; // choose 0
|
||||
else if ($signed(Wmsbs) >= $signed(mkm1)) udigit = 4'b0010; // choose -1
|
||||
else udigit = 4'b0001; // choose -2
|
||||
else udigit = 4'b0001; // choose -2
|
||||
endmodule
|
||||
|
|
|
@ -31,32 +31,32 @@
|
|||
/* verilator lint_off UNOPTFLAT */
|
||||
module fdivsqrtstage2 (
|
||||
input logic [`DIVb-1:0] D,
|
||||
input logic [`DIVb+3:0] DBar,
|
||||
input logic [`DIVb:0] U, UM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
input logic [`DIVb+3:0] DBar,
|
||||
input logic [`DIVb:0] U, UM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
input logic [`DIVb+1:0] C,
|
||||
input logic SqrtE,
|
||||
output logic un,
|
||||
input logic SqrtE,
|
||||
output logic un,
|
||||
output logic [`DIVb+1:0] CNext,
|
||||
output logic [`DIVb:0] UNext, UMNext,
|
||||
output logic [`DIVb+3:0] WSNext, WCNext
|
||||
output logic [`DIVb:0] UNext, UMNext,
|
||||
output logic [`DIVb+3:0] WSNext, WCNext
|
||||
);
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
|
||||
logic [`DIVb+3:0] Dsel;
|
||||
logic up, uz;
|
||||
logic [`DIVb+3:0] F;
|
||||
logic [`DIVb+3:0] AddIn;
|
||||
logic [`DIVb+3:0] WSA, WCA;
|
||||
logic [`DIVb+3:0] Dsel;
|
||||
logic up, uz;
|
||||
logic [`DIVb+3:0] F;
|
||||
logic [`DIVb+3:0] AddIn;
|
||||
logic [`DIVb+3:0] WSA, WCA;
|
||||
|
||||
// Qmient Selection logic
|
||||
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
|
||||
// q encoding:
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un);
|
||||
|
||||
// Sqrt F generation. Extend C, U, UM to Q4.k
|
||||
|
|
|
@ -30,34 +30,34 @@
|
|||
|
||||
module fdivsqrtstage4 (
|
||||
input logic [`DIVb-1:0] D,
|
||||
input logic [`DIVb+3:0] DBar, D2, DBar2,
|
||||
input logic [`DIVb:0] U, UM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
input logic [`DIVb+3:0] DBar, D2, DBar2,
|
||||
input logic [`DIVb:0] U,UM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
input logic [`DIVb+1:0] C,
|
||||
input logic SqrtE, j1,
|
||||
input logic SqrtE, j1,
|
||||
output logic [`DIVb+1:0] CNext,
|
||||
output logic un,
|
||||
output logic [`DIVb:0] UNext, UMNext,
|
||||
output logic [`DIVb+3:0] WSNext, WCNext
|
||||
output logic un,
|
||||
output logic [`DIVb:0] UNext, UMNext,
|
||||
output logic [`DIVb+3:0] WSNext, WCNext
|
||||
);
|
||||
|
||||
logic [`DIVb+3:0] Dsel;
|
||||
logic [3:0] udigit;
|
||||
logic [`DIVb+3:0] F;
|
||||
logic [`DIVb+3:0] AddIn;
|
||||
logic [4:0] Smsbs;
|
||||
logic [2:0] Dmsbs;
|
||||
logic [7:0] WCmsbs, WSmsbs;
|
||||
logic CarryIn;
|
||||
logic [`DIVb+3:0] WSA, WCA;
|
||||
logic [`DIVb+3:0] Dsel;
|
||||
logic [3:0] udigit;
|
||||
logic [`DIVb+3:0] F;
|
||||
logic [`DIVb+3:0] AddIn;
|
||||
logic [4:0] Smsbs;
|
||||
logic [2:0] Dmsbs;
|
||||
logic [7:0] WCmsbs, WSmsbs;
|
||||
logic CarryIn;
|
||||
logic [`DIVb+3:0] WSA, WCA;
|
||||
|
||||
// Digit Selection logic
|
||||
// u encoding:
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
assign Smsbs = U[`DIVb:`DIVb-4];
|
||||
assign Dmsbs = D[`DIVb-1:`DIVb-3];
|
||||
assign WCmsbs = WC[`DIVb+3:`DIVb-4];
|
||||
|
|
|
@ -32,12 +32,12 @@
|
|||
module fmalza #(WIDTH) (
|
||||
input logic [WIDTH-1:0] A, // addend
|
||||
input logic [2*`NF+1:0] Pm, // product
|
||||
input logic Cin, // carry in
|
||||
input logic Cin, // carry in
|
||||
input logic sub, // subtraction
|
||||
output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result
|
||||
);
|
||||
|
||||
logic [WIDTH:0] F; // most significant bit of F indicates leading digit
|
||||
logic [WIDTH:0] F; // most significant bit of F indicates leading digit
|
||||
logic [WIDTH-1:0] B; // zero-extended product with same size as aligned A
|
||||
logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column
|
||||
logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1
|
||||
|
|
222
src/fpu/fpu.sv
222
src/fpu/fpu.sv
|
@ -29,40 +29,40 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module fpu (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
// Hazards
|
||||
input logic StallE, StallM, StallW, // stall signals (from HZU)
|
||||
input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
|
||||
output logic FPUStallD, // Stall the decode stage (To HZU)
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
|
||||
// CSRs
|
||||
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
|
||||
input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
|
||||
// Decode stage
|
||||
input logic [31:0] InstrD, // instruction (from IFU)
|
||||
// Execute stage
|
||||
input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations
|
||||
input logic IntDivE, W64E, // Integer division on FPU
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
|
||||
input logic [4:0] RdE, // which FP register to write to (from IEU)
|
||||
output logic FWriteIntE, // integer register write enable (to IEU)
|
||||
output logic FCvtIntE, // Convert to int (to IEU)
|
||||
// Memory stage
|
||||
input logic [2:0] Funct3M, // Funct fields of instruction specify type of operations
|
||||
input logic [4:0] RdM, // which FP register to write to (from IEU)
|
||||
output logic FRegWriteM, // FP register write enable (to privileged unit)
|
||||
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to IFU)
|
||||
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
|
||||
// Writeback stage
|
||||
input logic [4:0] RdW, // which FP register to write to (from IEU)
|
||||
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
|
||||
output logic FCvtIntW, // select FCvtIntRes (to IEU)
|
||||
output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
|
||||
input logic StallE, StallM, StallW, // stall signals (from HZU)
|
||||
input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
|
||||
output logic FPUStallD, // Stall the decode stage (To HZU)
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
|
||||
// CSRs
|
||||
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
|
||||
input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
|
||||
// Decode stage
|
||||
input logic [31:0] InstrD, // instruction (from IFU)
|
||||
// Execute stage
|
||||
input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations
|
||||
input logic IntDivE, W64E, // Integer division on FPU
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
|
||||
input logic [4:0] RdE, // which FP register to write to (from IEU)
|
||||
output logic FWriteIntE, // integer register write enable (to IEU)
|
||||
output logic FCvtIntE, // Convert to int (to IEU)
|
||||
// Memory stage
|
||||
input logic [2:0] Funct3M, // Funct fields of instruction specify type of operations
|
||||
input logic [4:0] RdM, // which FP register to write to (from IEU)
|
||||
output logic FRegWriteM, // FP register write enable (to privileged unit)
|
||||
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to IFU)
|
||||
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
|
||||
// Writeback stage
|
||||
input logic [4:0] RdW, // which FP register to write to (from IEU)
|
||||
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
|
||||
output logic FCvtIntW, // select FCvtIntRes (to IEU)
|
||||
output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
|
||||
);
|
||||
|
||||
// RISC-V FPU specifics:
|
||||
|
@ -70,97 +70,97 @@ module fpu (
|
|||
// - RISC-V detects underflow after rounding
|
||||
|
||||
// control signals
|
||||
logic FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmM; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
|
||||
logic FDivStartE, IDivStartE; // Start division or squareroot
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
|
||||
logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
|
||||
logic [4:0] Adr1D, Adr2D, Adr3D; // register adresses of each input
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // register adresses of each input
|
||||
logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation
|
||||
logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation
|
||||
logic FRegWriteE; // Write floating-point register
|
||||
logic FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmM; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
|
||||
logic FDivStartE, IDivStartE; // Start division or squareroot
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
|
||||
logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
|
||||
logic [4:0] Adr1D, Adr2D, Adr3D; // register adresses of each input
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // register adresses of each input
|
||||
logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation
|
||||
logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation
|
||||
logic FRegWriteE; // Write floating-point register
|
||||
|
||||
// regfile signals
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XsE, YsE, ZsE; // input's sign - execute stage
|
||||
logic XsM, YsM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZeM; // input's exponent - memory stage
|
||||
logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage
|
||||
logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
|
||||
logic XSubnormE; // is the input subnormal
|
||||
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
|
||||
logic XZeroM, YZeroM; // is the input zero - memory stage
|
||||
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
|
||||
logic XsE, YsE, ZsE; // input's sign - execute stage
|
||||
logic XsM, YsM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZeM; // input's exponent - memory stage
|
||||
logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage
|
||||
logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
|
||||
logic XSubnormE; // is the input subnormal
|
||||
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
|
||||
logic XZeroM, YZeroM; // is the input zero - memory stage
|
||||
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
|
||||
|
||||
// Fma Signals
|
||||
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
|
||||
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
|
||||
logic [3*`NF+3:0] SmE, SmM; // Sum significand
|
||||
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
|
||||
logic [`NE+1:0] SeE,SeM; // Sum exponent
|
||||
logic InvAE, InvAM; // Invert addend
|
||||
logic AsE, AsM; // Addend sign
|
||||
logic PsE, PsM; // Product sign
|
||||
logic SsE, SsM; // Sum sign
|
||||
logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
|
||||
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
|
||||
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
|
||||
logic [3*`NF+3:0] SmE, SmM; // Sum significand
|
||||
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
|
||||
logic [`NE+1:0] SeE,SeM; // Sum exponent
|
||||
logic InvAE, InvAM; // Invert addend
|
||||
logic AsE, AsM; // Addend sign
|
||||
logic PsE, PsM; // Product sign
|
||||
logic SsE, SsM; // Sum sign
|
||||
logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
|
||||
|
||||
// Cvt Signals
|
||||
logic [`NE:0] CeE, CeM; // convert intermediate expoent
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal
|
||||
logic CsE, CsM; // convert result sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
||||
logic [`NE:0] CeE, CeM; // convert intermediate expoent
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal
|
||||
logic CsE, CsM; // convert result sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
||||
|
||||
// divide signals
|
||||
logic [`DIVb:0] QmM; // fdivsqrt signifcand
|
||||
logic [`NE+1:0] QeM; // fdivsqrt exponent
|
||||
logic DivStickyM; // fdivsqrt sticky bit
|
||||
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
|
||||
logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
||||
logic [`DIVb:0] QmM; // fdivsqrt signifcand
|
||||
logic [`NE+1:0] QeM; // fdivsqrt exponent
|
||||
logic DivStickyM; // fdivsqrt sticky bit
|
||||
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
|
||||
logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
||||
|
||||
// result and flag signals
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
|
||||
logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
|
||||
logic [`FLEN-1:0] PostProcResM; // Postprocessor output
|
||||
logic [4:0] PostProcFlgM; // Postprocessor flags
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result
|
||||
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
|
||||
logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
|
||||
logic [`FLEN-1:0] PostProcResM; // Postprocessor output
|
||||
logic [4:0] PostProcFlgM; // Postprocessor flags
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result
|
||||
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register
|
||||
|
||||
// other signals
|
||||
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
|
||||
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
|
||||
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
|
||||
logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
|
||||
logic mvsgn; // sign bit for extending move
|
||||
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
|
||||
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
|
||||
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
|
||||
logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
|
||||
logic mvsgn; // sign bit for extending move
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Decode Stage: fctrl decoder, read register file
|
||||
|
@ -180,7 +180,7 @@ module fpu (
|
|||
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
|
||||
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]),
|
||||
.a4(RdW), .wd4(FResultW),
|
||||
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
|
||||
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
|
||||
|
||||
// D/E pipeline registers
|
||||
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
|
|
|
@ -29,8 +29,8 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module fregfile (
|
||||
input logic clk, reset,
|
||||
input logic we4, // write enable
|
||||
input logic clk, reset,
|
||||
input logic we4, // write enable
|
||||
input logic [4:0] a1, a2, a3, a4, // adresses
|
||||
input logic [`FLEN-1:0] wd4, // write data
|
||||
output logic [`FLEN-1:0] rd1, rd2, rd3 // read data
|
||||
|
@ -46,7 +46,7 @@ module fregfile (
|
|||
|
||||
always_ff @(negedge clk) // or posedge reset)
|
||||
if (reset) for(i=0; i<32; i++) rf[i] <= 0;
|
||||
else if (we4) rf[a4] <= wd4;
|
||||
else if (we4) rf[a4] <= wd4;
|
||||
|
||||
assign #2 rd1 = rf[a1];
|
||||
assign #2 rd2 = rf[a2];
|
||||
|
|
|
@ -29,43 +29,43 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module fsgninj (
|
||||
input logic Xs, Ys, // X and Y sign bits
|
||||
input logic [`FLEN-1:0] X, // X
|
||||
input logic [`FMTBITS-1:0] Fmt, // format
|
||||
input logic [1:0] OpCtrl, // operation control
|
||||
output logic [`FLEN-1:0] SgnRes // result
|
||||
input logic Xs, Ys, // X and Y sign bits
|
||||
input logic [`FLEN-1:0] X, // X
|
||||
input logic [`FMTBITS-1:0] Fmt, // format
|
||||
input logic [1:0] OpCtrl, // operation control
|
||||
output logic [`FLEN-1:0] SgnRes // result
|
||||
);
|
||||
|
||||
logic ResSgn; // result sign
|
||||
logic ResSgn; // result sign
|
||||
|
||||
// OpCtrl:
|
||||
// 00 - fsgnj - directly copy over sign value of Y
|
||||
// 01 - fsgnjn - negate sign value of Y
|
||||
// 10 - fsgnjx - XOR sign values of X and Y
|
||||
|
||||
// calculate the result's sign
|
||||
assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
|
||||
|
||||
// format final result based on precision
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
|
||||
// OpCtrl:
|
||||
// 00 - fsgnj - directly copy over sign value of Y
|
||||
// 01 - fsgnjn - negate sign value of Y
|
||||
// 10 - fsgnjx - XOR sign values of X and Y
|
||||
|
||||
// calculate the result's sign
|
||||
assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
|
||||
|
||||
// format final result based on precision
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
|
||||
if (`FPSIZES == 1)
|
||||
assign SgnRes = {ResSgn, X[`FLEN-2:0]};
|
||||
assign SgnRes = {ResSgn, X[`FLEN-2:0]};
|
||||
else if (`FPSIZES == 2)
|
||||
assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
|
||||
assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
|
||||
else if (`FPSIZES == 3) begin
|
||||
logic [2:0] SgnBits;
|
||||
logic [2:0] SgnBits;
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
|
||||
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
|
||||
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
|
||||
`FMT2: SgnBits = {2'b11, ResSgn};
|
||||
default: SgnBits = {3{1'bx}};
|
||||
endcase
|
||||
assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [3:0] SgnBits;
|
||||
assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [3:0] SgnBits;
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
|
||||
|
@ -73,7 +73,7 @@ module fsgninj (
|
|||
`S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
|
||||
`H_FMT: SgnBits = {3'b111, ResSgn};
|
||||
endcase
|
||||
assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
|
||||
end
|
||||
assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -30,13 +30,13 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module arrs(
|
||||
input logic clk,
|
||||
input logic areset,
|
||||
input logic clk,
|
||||
input logic areset,
|
||||
output logic reset
|
||||
);
|
||||
|
||||
logic metaStable;
|
||||
logic resetB;
|
||||
logic metaStable;
|
||||
logic resetB;
|
||||
|
||||
always_ff @(posedge clk , posedge areset) begin
|
||||
if (areset) begin
|
||||
|
|
|
@ -27,9 +27,9 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module clockgater (
|
||||
input logic E,
|
||||
input logic SE,
|
||||
input logic CLK,
|
||||
input logic E,
|
||||
input logic SE,
|
||||
input logic CLK,
|
||||
output logic ECLK
|
||||
);
|
||||
|
||||
|
@ -39,10 +39,10 @@ module clockgater (
|
|||
// VERY IMPORTANT.
|
||||
// This part functionally models a clock gater, but does not necessarily meet the timing constrains a real standard cell would.
|
||||
// Do not use this in synthesis!
|
||||
logic enable_q;
|
||||
logic enable_q;
|
||||
always_latch begin
|
||||
if(~CLK) begin
|
||||
enable_q <= E | SE;
|
||||
enable_q <= E | SE;
|
||||
end
|
||||
end
|
||||
assign ECLK = enable_q & CLK;
|
||||
|
|
|
@ -32,7 +32,7 @@ module lzc #(parameter WIDTH = 1) (
|
|||
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one
|
||||
while ((i < WIDTH) & ~num[WIDTH-1-i]) i = i+1; // search for leading one
|
||||
ZeroCnt = i[$clog2(WIDTH+1)-1:0];
|
||||
end
|
||||
endmodule
|
||||
|
|
|
@ -49,39 +49,39 @@ module ram1p1rwbe #(parameter DEPTH=64, WIDTH=44) (
|
|||
// ***************************************************************************
|
||||
// TRUE SRAM macro
|
||||
// ***************************************************************************
|
||||
if ((`USE_SRAM == 1) & (WIDTH == 128) & (DEPTH == 64)) begin // Cache data subarray
|
||||
if ((`USE_SRAM == 1) & (WIDTH == 128) & (DEPTH == 64)) begin // Cache data subarray
|
||||
genvar index;
|
||||
// 64 x 128-bit SRAM
|
||||
logic [WIDTH-1:0] BitWriteMask;
|
||||
for (index=0; index < WIDTH; index++)
|
||||
assign BitWriteMask[index] = bwe[index/8];
|
||||
// 64 x 128-bit SRAM
|
||||
logic [WIDTH-1:0] BitWriteMask;
|
||||
for (index=0; index < WIDTH; index++)
|
||||
assign BitWriteMask[index] = bwe[index/8];
|
||||
ram1p1rwbe_64x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we),
|
||||
.A(addr), .D(din),
|
||||
.BWEB(~BitWriteMask), .Q(dout));
|
||||
.A(addr), .D(din),
|
||||
.BWEB(~BitWriteMask), .Q(dout));
|
||||
|
||||
end else if ((`USE_SRAM == 1) & (WIDTH == 44) & (DEPTH == 64)) begin // RV64 cache tag
|
||||
genvar index;
|
||||
// 64 x 44-bit SRAM
|
||||
logic [WIDTH-1:0] BitWriteMask;
|
||||
for (index=0; index < WIDTH; index++)
|
||||
assign BitWriteMask[index] = bwe[index/8];
|
||||
ram1p1rwbe_64x44 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we),
|
||||
.A(addr), .D(din),
|
||||
.BWEB(~BitWriteMask), .Q(dout));
|
||||
genvar index;
|
||||
// 64 x 44-bit SRAM
|
||||
logic [WIDTH-1:0] BitWriteMask;
|
||||
for (index=0; index < WIDTH; index++)
|
||||
assign BitWriteMask[index] = bwe[index/8];
|
||||
ram1p1rwbe_64x44 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we),
|
||||
.A(addr), .D(din),
|
||||
.BWEB(~BitWriteMask), .Q(dout));
|
||||
|
||||
end else if ((`USE_SRAM == 1) & (WIDTH == 22) & (DEPTH == 64)) begin // RV32 cache tag
|
||||
genvar index;
|
||||
// 64 x 22-bit SRAM
|
||||
logic [WIDTH-1:0] BitWriteMask;
|
||||
for (index=0; index < WIDTH; index++)
|
||||
assign BitWriteMask[index] = bwe[index/8];
|
||||
ram1p1rwbe_64x22 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we),
|
||||
.A(addr), .D(din),
|
||||
.BWEB(~BitWriteMask), .Q(dout));
|
||||
genvar index;
|
||||
// 64 x 22-bit SRAM
|
||||
logic [WIDTH-1:0] BitWriteMask;
|
||||
for (index=0; index < WIDTH; index++)
|
||||
assign BitWriteMask[index] = bwe[index/8];
|
||||
ram1p1rwbe_64x22 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we),
|
||||
.A(addr), .D(din),
|
||||
.BWEB(~BitWriteMask), .Q(dout));
|
||||
|
||||
// ***************************************************************************
|
||||
// READ first SRAM model
|
||||
// ***************************************************************************
|
||||
// ***************************************************************************
|
||||
// READ first SRAM model
|
||||
// ***************************************************************************
|
||||
end else begin: ram
|
||||
integer i;
|
||||
|
||||
|
@ -91,19 +91,18 @@ module ram1p1rwbe #(parameter DEPTH=64, WIDTH=44) (
|
|||
assign dout = RAM[addrd];
|
||||
|
||||
/* // Read
|
||||
always_ff @(posedge clk)
|
||||
if(ce) dout <= #1 mem[addr]; */
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if(ce) dout <= #1 mem[addr]; */
|
||||
|
||||
// Write divided into part for bytes and part for extra msbs
|
||||
// Questa sim version 2022.3_2 does not allow multiple drivers for RAM when using always_ff.
|
||||
// Therefore these always blocks use the older always @(posedge clk)
|
||||
// Questa sim version 2022.3_2 does not allow multiple drivers for RAM when using always_ff.
|
||||
// Therefore these always blocks use the older always @(posedge clk)
|
||||
if(WIDTH >= 8)
|
||||
always @(posedge clk)
|
||||
if (ce & we)
|
||||
for(i = 0; i < WIDTH/8; i++)
|
||||
if(bwe[i]) RAM[addr][i*8 +: 8] <= #1 din[i*8 +: 8];
|
||||
|
||||
|
||||
if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8
|
||||
always @(posedge clk)
|
||||
if (ce & we & bwe[WIDTH/8])
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
module ram1p1rwbe_64x128(
|
||||
input logic CLK,
|
||||
input logic CEB,
|
||||
input logic CEB,
|
||||
input logic WEB,
|
||||
input logic [5:0] A,
|
||||
input logic [127:0] D,
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
module ram1p1rwbe_64x22(
|
||||
input logic CLK,
|
||||
input logic CEB,
|
||||
input logic CEB,
|
||||
input logic WEB,
|
||||
input logic [5:0] A,
|
||||
input logic [21:0] D,
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
module ram1p1rwbe_64x44(
|
||||
input logic CLK,
|
||||
input logic CEB,
|
||||
input logic CEB,
|
||||
input logic WEB,
|
||||
input logic [5:0] A,
|
||||
input logic [43:0] D,
|
||||
|
|
|
@ -44,96 +44,94 @@ module ram2p1r1wbe #(parameter DEPTH=1024, WIDTH=68) (
|
|||
output logic [WIDTH-1:0] rd1
|
||||
);
|
||||
|
||||
logic [WIDTH-1:0] mem[DEPTH-1:0];
|
||||
localparam SRAMWIDTH = 32;
|
||||
localparam SRAMNUMSETS = SRAMWIDTH/WIDTH;
|
||||
logic [WIDTH-1:0] mem[DEPTH-1:0];
|
||||
localparam SRAMWIDTH = 32;
|
||||
localparam SRAMNUMSETS = SRAMWIDTH/WIDTH;
|
||||
|
||||
// ***************************************************************************
|
||||
// TRUE Smem macro
|
||||
// ***************************************************************************
|
||||
|
||||
if ((`USE_SRAM == 1) & (WIDTH == 68) & (DEPTH == 1024)) begin
|
||||
|
||||
ram2p1r1wbe_1024x68 memory1(.CLKA(clk), .CLKB(clk),
|
||||
.CEBA(~ce1), .CEBB(~ce2),
|
||||
.WEBA('0), .WEBB(~we2),
|
||||
.AA(ra1), .AB(wa2),
|
||||
.DA('0),
|
||||
.DB(wd2),
|
||||
.BWEBA('0), .BWEBB('1),
|
||||
.QA(rd1),
|
||||
.QB());
|
||||
if ((`USE_SRAM == 1) & (WIDTH == 68) & (DEPTH == 1024)) begin
|
||||
|
||||
ram2p1r1wbe_1024x68 memory1(.CLKA(clk), .CLKB(clk),
|
||||
.CEBA(~ce1), .CEBB(~ce2),
|
||||
.WEBA('0), .WEBB(~we2),
|
||||
.AA(ra1), .AB(wa2),
|
||||
.DA('0),
|
||||
.DB(wd2),
|
||||
.BWEBA('0), .BWEBB('1),
|
||||
.QA(rd1),
|
||||
.QB());
|
||||
|
||||
end else if ((`USE_SRAM == 1) & (WIDTH == 36) & (DEPTH == 1024)) begin
|
||||
|
||||
ram2p1r1wbe_1024x36 memory1(.CLKA(clk), .CLKB(clk),
|
||||
.CEBA(~ce1), .CEBB(~ce2),
|
||||
.WEBA('0), .WEBB(~we2),
|
||||
.AA(ra1), .AB(wa2),
|
||||
.DA('0),
|
||||
.DB(wd2),
|
||||
.BWEBA('0), .BWEBB('1),
|
||||
.QA(rd1),
|
||||
.QB());
|
||||
end else if ((`USE_SRAM == 1) & (WIDTH == 36) & (DEPTH == 1024)) begin
|
||||
|
||||
ram2p1r1wbe_1024x36 memory1(.CLKA(clk), .CLKB(clk),
|
||||
.CEBA(~ce1), .CEBB(~ce2),
|
||||
.WEBA('0), .WEBB(~we2),
|
||||
.AA(ra1), .AB(wa2),
|
||||
.DA('0),
|
||||
.DB(wd2),
|
||||
.BWEBA('0), .BWEBB('1),
|
||||
.QA(rd1),
|
||||
.QB());
|
||||
|
||||
end else if ((`USE_SRAM == 1) & (WIDTH == 2) & (DEPTH == 1024)) begin
|
||||
end else if ((`USE_SRAM == 1) & (WIDTH == 2) & (DEPTH == 1024)) begin
|
||||
|
||||
logic [SRAMWIDTH-1:0] SRAMReadData;
|
||||
logic [SRAMWIDTH-1:0] SRAMWriteData;
|
||||
logic [SRAMWIDTH-1:0] RD1Sets[SRAMNUMSETS-1:0];
|
||||
logic [SRAMNUMSETS-1:0] SRAMBitMaskPre;
|
||||
logic [SRAMWIDTH-1:0] SRAMBitMask;
|
||||
logic [$clog2(DEPTH)-1:0] RA1Q;
|
||||
|
||||
|
||||
onehotdecoder #($clog2(SRAMNUMSETS)) oh1(wa2[$clog2(SRAMNUMSETS)-1:0], SRAMBitMaskPre);
|
||||
genvar index;
|
||||
for (index = 0; index < SRAMNUMSETS; index++) begin:readdatalinesetsmux
|
||||
assign RD1Sets[index] = SRAMReadData[(index*WIDTH)+WIDTH-1 : (index*WIDTH)];
|
||||
assign SRAMWriteData[index*2+1:index*2] = wd2;
|
||||
assign SRAMBitMask[index*2+1:index*2] = {2{SRAMBitMaskPre[index]}};
|
||||
end
|
||||
flopen #($clog2(DEPTH)) mem_reg1 (clk, ce1, ra1, RA1Q);
|
||||
assign rd1 = RD1Sets[RA1Q[$clog2(SRAMWIDTH)-1:0]];
|
||||
ram2p1r1wbe_64x32 memory2(.CLKA(clk), .CLKB(clk),
|
||||
.CEBA(~ce1), .CEBB(~ce2),
|
||||
.WEBA('0), .WEBB(~we2),
|
||||
.AA(ra1[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]),
|
||||
.AB(wa2[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]),
|
||||
.DA('0),
|
||||
.DB(SRAMWriteData),
|
||||
.BWEBA('0), .BWEBB(SRAMBitMask),
|
||||
.QA(SRAMReadData),
|
||||
.QB());
|
||||
logic [SRAMWIDTH-1:0] SRAMReadData;
|
||||
logic [SRAMWIDTH-1:0] SRAMWriteData;
|
||||
logic [SRAMWIDTH-1:0] RD1Sets[SRAMNUMSETS-1:0];
|
||||
logic [SRAMNUMSETS-1:0] SRAMBitMaskPre;
|
||||
logic [SRAMWIDTH-1:0] SRAMBitMask;
|
||||
logic [$clog2(DEPTH)-1:0] RA1Q;
|
||||
|
||||
onehotdecoder #($clog2(SRAMNUMSETS)) oh1(wa2[$clog2(SRAMNUMSETS)-1:0], SRAMBitMaskPre);
|
||||
genvar index;
|
||||
for (index = 0; index < SRAMNUMSETS; index++) begin:readdatalinesetsmux
|
||||
assign RD1Sets[index] = SRAMReadData[(index*WIDTH)+WIDTH-1 : (index*WIDTH)];
|
||||
assign SRAMWriteData[index*2+1:index*2] = wd2;
|
||||
assign SRAMBitMask[index*2+1:index*2] = {2{SRAMBitMaskPre[index]}};
|
||||
end
|
||||
flopen #($clog2(DEPTH)) mem_reg1 (clk, ce1, ra1, RA1Q);
|
||||
assign rd1 = RD1Sets[RA1Q[$clog2(SRAMWIDTH)-1:0]];
|
||||
ram2p1r1wbe_64x32 memory2(.CLKA(clk), .CLKB(clk),
|
||||
.CEBA(~ce1), .CEBB(~ce2),
|
||||
.WEBA('0), .WEBB(~we2),
|
||||
.AA(ra1[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]),
|
||||
.AB(wa2[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]),
|
||||
.DA('0),
|
||||
.DB(SRAMWriteData),
|
||||
.BWEBA('0), .BWEBB(SRAMBitMask),
|
||||
.QA(SRAMReadData),
|
||||
.QB());
|
||||
|
||||
end else begin
|
||||
|
||||
// ***************************************************************************
|
||||
// READ first SRAM model
|
||||
// ***************************************************************************
|
||||
integer i;
|
||||
|
||||
end else begin
|
||||
|
||||
// ***************************************************************************
|
||||
// READ first SRAM model
|
||||
// ***************************************************************************
|
||||
integer i;
|
||||
|
||||
// Read
|
||||
logic [$clog2(DEPTH)-1:0] ra1d;
|
||||
flopen #($clog2(DEPTH)) adrreg(clk, ce1, ra1, ra1d);
|
||||
assign rd1 = mem[ra1d];
|
||||
|
||||
/* // Read
|
||||
always_ff @(posedge clk)
|
||||
if(ce1) rd1 <= #1 mem[ra1]; */
|
||||
|
||||
// Write divided into part for bytes and part for extra msbs
|
||||
if(WIDTH >= 8)
|
||||
always @(posedge clk)
|
||||
if (ce2 & we2)
|
||||
for(i = 0; i < WIDTH/8; i++)
|
||||
if(bwe2[i]) mem[wa2][i*8 +: 8] <= #1 wd2[i*8 +: 8];
|
||||
|
||||
if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8
|
||||
always @(posedge clk)
|
||||
if (ce2 & we2 & bwe2[WIDTH/8])
|
||||
mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= #1 wd2[WIDTH-1:WIDTH-WIDTH%8];
|
||||
|
||||
/* // Read
|
||||
always_ff @(posedge clk)
|
||||
if(ce1) rd1 <= #1 mem[ra1]; */
|
||||
|
||||
// Write divided into part for bytes and part for extra msbs
|
||||
if(WIDTH >= 8)
|
||||
always @(posedge clk)
|
||||
if (ce2 & we2)
|
||||
for(i = 0; i < WIDTH/8; i++)
|
||||
if(bwe2[i]) mem[wa2][i*8 +: 8] <= #1 wd2[i*8 +: 8];
|
||||
|
||||
if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8
|
||||
always @(posedge clk)
|
||||
if (ce2 & we2 & bwe2[WIDTH/8])
|
||||
mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= #1 wd2[WIDTH-1:WIDTH-WIDTH%8];
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -27,8 +27,8 @@
|
|||
module ram2p1r1wbe_1024x36(
|
||||
input logic CLKA,
|
||||
input logic CLKB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic WEBA,
|
||||
input logic WEBB,
|
||||
input logic [9:0] AA,
|
||||
|
@ -43,12 +43,12 @@ module ram2p1r1wbe_1024x36(
|
|||
|
||||
// replace "generic1024x36RAM" with "TSDN..1024X36.." module from your memory vendor
|
||||
//generic1024x36RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
// use part of a larger RAM to avoid generating more flavors of RAM
|
||||
logic [67:0] QAfull, QBfull;
|
||||
TSDN28HPCPA1024X68M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
.AA, .AB, .DA({32'b0, DA[35:0]}), .DB({32'b0, DB[35:0]}),
|
||||
.BWEBA({32'b0, BWEBA[35:0]}), .BWEBB({32'b0, BWEBB[35:0]}), .QA(QAfull), .QB(QBfull));
|
||||
.AA, .AB, .DA({32'b0, DA[35:0]}), .DB({32'b0, DB[35:0]}),
|
||||
.BWEBA({32'b0, BWEBA[35:0]}), .BWEBB({32'b0, BWEBB[35:0]}), .QA(QAfull), .QB(QBfull));
|
||||
assign QA = QAfull[35:0];
|
||||
assign QB = QBfull[35:0];
|
||||
|
||||
|
|
|
@ -27,8 +27,8 @@
|
|||
module ram2p1r1wbe_1024x68(
|
||||
input logic CLKA,
|
||||
input logic CLKB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic WEBA,
|
||||
input logic WEBB,
|
||||
input logic [9:0] AA,
|
||||
|
@ -43,8 +43,8 @@ module ram2p1r1wbe_1024x68(
|
|||
|
||||
// replace "generic1024x68RAM" with "TSDN..1024X68.." module from your memory vendor
|
||||
//generic1024x68RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
TSDN28HPCPA1024X68M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
.AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
.AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -27,8 +27,8 @@
|
|||
module ram2p1r1wbe_128x64(
|
||||
input logic CLKA,
|
||||
input logic CLKB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic WEBA,
|
||||
input logic WEBB,
|
||||
input logic [6:0] AA,
|
||||
|
@ -43,8 +43,8 @@ module ram2p1r1wbe_128x64(
|
|||
|
||||
// replace "generic128x64RAM" with "TSDN..128X64.." module from your memory vendor
|
||||
TSDN28HPCPA128X64M4FW sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
.AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
.AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
// generic128x64RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -27,8 +27,8 @@
|
|||
module ram2p1r1wbe_2048x64(
|
||||
input logic CLKA,
|
||||
input logic CLKB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic WEBA,
|
||||
input logic WEBB,
|
||||
input logic [8:0] AA,
|
||||
|
@ -43,8 +43,8 @@ module ram2p1r1wbe_2048x64(
|
|||
|
||||
// replace "generic2048x64RAM" with "TSDN..2048X64.." module from your memory vendor
|
||||
TSDN28HPCPA2048X64MMFW sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
.AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
.AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
// generic2048x64RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -27,8 +27,8 @@
|
|||
module ram2p1r1wbe_64x32(
|
||||
input logic CLKA,
|
||||
input logic CLKB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic CEBA,
|
||||
input logic CEBB,
|
||||
input logic WEBA,
|
||||
input logic WEBB,
|
||||
input logic [5:0] AA,
|
||||
|
@ -43,7 +43,7 @@ module ram2p1r1wbe_64x32(
|
|||
|
||||
// replace "generic64x32RAM" with "TSDN..64X32.." module from your memory vendor
|
||||
//generic64x32RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
TSDN28HPCPA64X32M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB,
|
||||
.AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
.AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB);
|
||||
endmodule
|
||||
|
|
|
@ -28,8 +28,8 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module rom1p1r #(parameter ADDR_WIDTH = 8,
|
||||
parameter DATA_WIDTH = 32,
|
||||
parameter PRELOAD_ENABLED = 0)
|
||||
parameter DATA_WIDTH = 32,
|
||||
parameter PRELOAD_ENABLED = 0)
|
||||
(input logic clk,
|
||||
input logic ce,
|
||||
input logic [ADDR_WIDTH-1:0] addr,
|
||||
|
@ -37,7 +37,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8,
|
|||
);
|
||||
|
||||
// Core Memory
|
||||
logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0];
|
||||
logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0];
|
||||
/* if ((`USE_SRAM == 1) & (ADDR_WDITH == 7) & (DATA_WIDTH == 64)) begin
|
||||
rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
|
||||
|
||||
|
@ -46,55 +46,55 @@ module rom1p1r #(parameter ADDR_WIDTH = 8,
|
|||
|
||||
end else begin */
|
||||
always @ (posedge clk) begin
|
||||
if(ce) dout <= ROM[addr];
|
||||
if(ce) dout <= ROM[addr];
|
||||
end
|
||||
|
||||
// for FPGA, initialize with zero-stage bootloader
|
||||
if(PRELOAD_ENABLED)
|
||||
initial begin
|
||||
ROM[0] = 64'h9581819300002197;
|
||||
ROM[1] = 64'h4281420141014081;
|
||||
ROM[2] = 64'h4481440143814301;
|
||||
ROM[3] = 64'h4681460145814501;
|
||||
ROM[4] = 64'h4881480147814701;
|
||||
ROM[5] = 64'h4a814a0149814901;
|
||||
ROM[6] = 64'h4c814c014b814b01;
|
||||
ROM[7] = 64'h4e814e014d814d01;
|
||||
ROM[8] = 64'h0110011b4f814f01;
|
||||
ROM[9] = 64'h059b45011161016e;
|
||||
ROM[10] = 64'h0004063705fe0010;
|
||||
ROM[11] = 64'h05a000ef8006061b;
|
||||
ROM[12] = 64'h0ff003930000100f;
|
||||
ROM[13] = 64'h4e952e3110060e37;
|
||||
ROM[14] = 64'hc602829b0053f2b7;
|
||||
ROM[15] = 64'h2023fe02dfe312fd;
|
||||
ROM[16] = 64'h829b0053f2b7007e;
|
||||
ROM[17] = 64'hfe02dfe312fdc602;
|
||||
ROM[18] = 64'h4de31efd000e2023;
|
||||
ROM[19] = 64'h059bf1402573fdd0;
|
||||
ROM[20] = 64'h0000061705e20870;
|
||||
ROM[21] = 64'h0010029b01260613;
|
||||
ROM[22] = 64'h11010002806702fe;
|
||||
ROM[23] = 64'h84b2842ae426e822;
|
||||
ROM[24] = 64'h892ee04aec064511;
|
||||
ROM[25] = 64'h06e000ef07e000ef;
|
||||
ROM[26] = 64'h979334fd02905563;
|
||||
ROM[27] = 64'h07930177d4930204;
|
||||
ROM[28] = 64'h4089093394be2004;
|
||||
ROM[29] = 64'h04138522008905b3;
|
||||
ROM[30] = 64'h19e3014000ef2004;
|
||||
ROM[31] = 64'h64a2644260e2fe94;
|
||||
ROM[32] = 64'h6749808261056902;
|
||||
ROM[33] = 64'hdfed8b8510472783;
|
||||
ROM[34] = 64'h2423479110a73823;
|
||||
ROM[35] = 64'h10472783674910f7;
|
||||
ROM[36] = 64'h20058693ffed8b89;
|
||||
ROM[37] = 64'h05a1118737836749;
|
||||
ROM[38] = 64'hfed59be3fef5bc23;
|
||||
ROM[39] = 64'h1047278367498082;
|
||||
ROM[40] = 64'h47858082dfed8b85;
|
||||
ROM[41] = 64'h40a7853b4015551b;
|
||||
ROM[42] = 64'h808210a7a02367c9;
|
||||
ROM[0] = 64'h9581819300002197;
|
||||
ROM[1] = 64'h4281420141014081;
|
||||
ROM[2] = 64'h4481440143814301;
|
||||
ROM[3] = 64'h4681460145814501;
|
||||
ROM[4] = 64'h4881480147814701;
|
||||
ROM[5] = 64'h4a814a0149814901;
|
||||
ROM[6] = 64'h4c814c014b814b01;
|
||||
ROM[7] = 64'h4e814e014d814d01;
|
||||
ROM[8] = 64'h0110011b4f814f01;
|
||||
ROM[9] = 64'h059b45011161016e;
|
||||
ROM[10] = 64'h0004063705fe0010;
|
||||
ROM[11] = 64'h05a000ef8006061b;
|
||||
ROM[12] = 64'h0ff003930000100f;
|
||||
ROM[13] = 64'h4e952e3110060e37;
|
||||
ROM[14] = 64'hc602829b0053f2b7;
|
||||
ROM[15] = 64'h2023fe02dfe312fd;
|
||||
ROM[16] = 64'h829b0053f2b7007e;
|
||||
ROM[17] = 64'hfe02dfe312fdc602;
|
||||
ROM[18] = 64'h4de31efd000e2023;
|
||||
ROM[19] = 64'h059bf1402573fdd0;
|
||||
ROM[20] = 64'h0000061705e20870;
|
||||
ROM[21] = 64'h0010029b01260613;
|
||||
ROM[22] = 64'h11010002806702fe;
|
||||
ROM[23] = 64'h84b2842ae426e822;
|
||||
ROM[24] = 64'h892ee04aec064511;
|
||||
ROM[25] = 64'h06e000ef07e000ef;
|
||||
ROM[26] = 64'h979334fd02905563;
|
||||
ROM[27] = 64'h07930177d4930204;
|
||||
ROM[28] = 64'h4089093394be2004;
|
||||
ROM[29] = 64'h04138522008905b3;
|
||||
ROM[30] = 64'h19e3014000ef2004;
|
||||
ROM[31] = 64'h64a2644260e2fe94;
|
||||
ROM[32] = 64'h6749808261056902;
|
||||
ROM[33] = 64'hdfed8b8510472783;
|
||||
ROM[34] = 64'h2423479110a73823;
|
||||
ROM[35] = 64'h10472783674910f7;
|
||||
ROM[36] = 64'h20058693ffed8b89;
|
||||
ROM[37] = 64'h05a1118737836749;
|
||||
ROM[38] = 64'hfed59be3fef5bc23;
|
||||
ROM[39] = 64'h1047278367498082;
|
||||
ROM[40] = 64'h47858082dfed8b85;
|
||||
ROM[41] = 64'h40a7853b4015551b;
|
||||
ROM[42] = 64'h808210a7a02367c9;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
module rom1p1r_128x32(
|
||||
input logic CLK,
|
||||
input logic CEB,
|
||||
input logic CEB,
|
||||
input logic [6:0] A,
|
||||
output logic [31:0] Q
|
||||
);
|
||||
|
|
|
@ -25,14 +25,14 @@
|
|||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module rom1p1r_128x64(
|
||||
input logic CLK,
|
||||
input logic CEB,
|
||||
input logic CLK,
|
||||
input logic CEB,
|
||||
input logic [6:0] A,
|
||||
output logic [63:0] Q
|
||||
);
|
||||
|
||||
// replace "generic64x128RAM" with "TS3N..64X128.." module from your memory vendor
|
||||
ts3n28hpcpa128x64m8m romIP (.CLK, .CEB, .A, .Q);
|
||||
ts3n28hpcpa128x64m8m romIP (.CLK, .CEB, .A, .Q);
|
||||
// generic64x128ROM romIP (.CLK, .CEB, .A, .Q);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
///////////////////////////////////////////
|
||||
// alu.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, Sarah.Harris@unlv.edu
|
||||
// Written: David_Harris@hmc.edu, Sarah.Harris@unlv.edu, kekim@hmc.edu
|
||||
// Created: 9 January 2021
|
||||
// Modified:
|
||||
// Modified: 3 March 2023
|
||||
//
|
||||
// Purpose: RISC-V Arithmetic/Logic Unit
|
||||
//
|
||||
|
@ -30,31 +30,34 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module alu #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, B, // Operands
|
||||
input logic [2:0] ALUControl, // With Funct3, indicates operation to perform
|
||||
input logic [2:0] Funct3, // With ALUControl, indicates operation to perform
|
||||
output logic [WIDTH-1:0] Result, // ALU result
|
||||
output logic [WIDTH-1:0] Sum); // Sum of operands
|
||||
input logic [WIDTH-1:0] A, B, // Operands
|
||||
input logic W64, // W64-type instruction
|
||||
input logic SubArith, // Subtraction or arithmetic shift
|
||||
input logic [2:0] ALUSelect, // ALU mux select signal
|
||||
input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
|
||||
input logic [2:0] ZBBSelect, // ZBB mux select signal
|
||||
input logic [2:0] Funct3, // For BMU decoding
|
||||
input logic [1:0] CompFlags, // Comparator flags
|
||||
input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage
|
||||
output logic [WIDTH-1:0] Result, // ALU result
|
||||
output logic [WIDTH-1:0] Sum); // Sum of operands
|
||||
|
||||
// CondInvB = ~B when subtracting, B otherwise. Shift = shift result. SLT/U = result of a slt/u instruction.
|
||||
// FullResult = ALU result before adjusting for a RV64 w-suffix instruction.
|
||||
logic [WIDTH-1:0] CondInvB, Shift, FullResult; // Intermediate results
|
||||
logic Carry, Neg; // Flags: carry out, negative
|
||||
logic LT, LTU; // Less than, Less than unsigned
|
||||
logic W64; // RV64 W-type instruction
|
||||
logic SubArith; // Performing subtraction or arithmetic right shift
|
||||
logic ALUOp; // 0 for address generation addition or 1 for regular ALU ops
|
||||
logic Asign, Bsign; // Sign bits of A, B
|
||||
|
||||
// Extract control signals from ALUControl.
|
||||
assign {W64, SubArith, ALUOp} = ALUControl;
|
||||
logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, ALUResult; // Intermediate Signals
|
||||
logic [WIDTH-1:0] CondMaskB; // Result of B mask select mux
|
||||
logic [WIDTH-1:0] CondShiftA; // Result of A shifted select mux
|
||||
logic [WIDTH-1:0] CondExtA; // Result of Zero Extend A select mux
|
||||
logic Carry, Neg; // Flags: carry out, negative
|
||||
logic LT, LTU; // Less than, Less than unsigned
|
||||
logic Asign, Bsign; // Sign bits of A, B
|
||||
|
||||
// Addition
|
||||
assign CondInvB = SubArith ? ~B : B;
|
||||
assign {Carry, Sum} = A + CondInvB + {{(WIDTH-1){1'b0}}, SubArith};
|
||||
assign CondMaskInvB = SubArith ? ~CondMaskB : CondMaskB;
|
||||
assign {Carry, Sum} = CondShiftA + CondMaskInvB + {{(WIDTH-1){1'b0}}, SubArith};
|
||||
|
||||
// Shifts
|
||||
shifter sh(.A, .Amt(B[`LOG_XLEN-1:0]), .Right(Funct3[2]), .Arith(SubArith), .W64, .Y(Shift));
|
||||
// Shifts (configurable for rotation)
|
||||
shifter sh(.A, .Amt(B[`LOG_XLEN-1:0]), .Right(Funct3[2]), .W64, .SubArith, .Y(Shift), .Rotate(BALUControl[2]));
|
||||
|
||||
// Condition code flags are based on subtraction output Sum = A-B.
|
||||
// Overflow occurs when the numbers being subtracted have the opposite sign
|
||||
|
@ -67,20 +70,31 @@ module alu #(parameter WIDTH=32) (
|
|||
assign LTU = ~Carry;
|
||||
|
||||
// Select appropriate ALU Result
|
||||
always_comb
|
||||
if (~ALUOp) FullResult = Sum; // Always add for ALUOp = 0 (address generation)
|
||||
else casez (Funct3) // Otherwise check Funct3
|
||||
3'b000: FullResult = Sum; // add or sub
|
||||
3'b?01: FullResult = Shift; // sll, sra, or srl
|
||||
3'b010: FullResult = {{(WIDTH-1){1'b0}}, LT}; // slt
|
||||
3'b011: FullResult = {{(WIDTH-1){1'b0}}, LTU}; // sltu
|
||||
3'b100: FullResult = A ^ B; // xor
|
||||
3'b110: FullResult = A | B; // or
|
||||
3'b111: FullResult = A & B; // and
|
||||
always_comb begin
|
||||
case (ALUSelect)
|
||||
3'b000: FullResult = Sum; // add or sub (including address generation)
|
||||
3'b001: FullResult = Shift; // sll, sra, or srl
|
||||
3'b010: FullResult = {{(WIDTH-1){1'b0}}, LT}; // slt
|
||||
3'b011: FullResult = {{(WIDTH-1){1'b0}}, LTU}; // sltu
|
||||
3'b100: FullResult = A ^ CondMaskInvB; // xor, xnor, binv
|
||||
3'b101: FullResult = (`ZBS_SUPPORTED | `ZBB_SUPPORTED) ? {{(WIDTH-1){1'b0}},{|(A & CondMaskB)}} : Shift; // bext (or IEU shift when BMU not supported)
|
||||
3'b110: FullResult = A | CondMaskInvB; // or, orn, bset
|
||||
3'b111: FullResult = A & CondMaskInvB; // and, bclr
|
||||
endcase
|
||||
end
|
||||
|
||||
// Support RV64I W-type addw/subw/addiw/shifts that discard upper 32 bits and sign-extend 32-bit result to 64 bits
|
||||
if (WIDTH == 64) assign Result = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
|
||||
else assign Result = FullResult;
|
||||
endmodule
|
||||
if (WIDTH == 64) assign ALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
|
||||
else assign ALUResult = FullResult;
|
||||
|
||||
// Final Result B instruction select mux
|
||||
if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu
|
||||
bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect,
|
||||
.Funct3, .CompFlags, .BALUControl, .ALUResult, .FullResult,
|
||||
.CondMaskB, .CondShiftA, .Result);
|
||||
end else begin
|
||||
assign Result = ALUResult;
|
||||
assign CondMaskB = B;
|
||||
assign CondShiftA = A;
|
||||
end
|
||||
endmodule
|
99
src/ieu/bmu/bitmanipalu.sv
Normal file
99
src/ieu/bmu/bitmanipalu.sv
Normal file
|
@ -0,0 +1,99 @@
|
|||
///////////////////////////////////////////
|
||||
// bitmanipalu.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu>
|
||||
// Created: 23 March 2023
|
||||
// Modified: 23 March 2023
|
||||
//
|
||||
// Purpose: RISC-V Arithmetic/Logic Unit Bit-Manipulation Extension
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module bitmanipalu #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, B, // Operands
|
||||
input logic W64, // W64-type instruction
|
||||
input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
|
||||
input logic [2:0] ZBBSelect, // ZBB mux select signal
|
||||
input logic [2:0] Funct3, // Funct3 field of opcode indicates operation to perform
|
||||
input logic [1:0] CompFlags, // Comparator flags
|
||||
input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage
|
||||
input logic [WIDTH-1:0] ALUResult, FullResult, // ALUResult, FullResult signals
|
||||
output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions
|
||||
output logic [WIDTH-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions
|
||||
output logic [WIDTH-1:0] Result); // Result
|
||||
|
||||
logic [WIDTH-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result
|
||||
logic [WIDTH-1:0] MaskB; // BitMask of B
|
||||
logic [WIDTH-1:0] RevA; // Bit-reversed A
|
||||
logic Rotate; // Indicates if it is Rotate instruction
|
||||
logic Mask; // Indicates if it is ZBS instruction
|
||||
logic PreShift; // Inidicates if it is sh1add, sh2add, sh3add instruction
|
||||
logic [1:0] PreShiftAmt; // Amount to Pre-Shift A
|
||||
logic [WIDTH-1:0] CondZextA; // A Conditional Extend Intermediary Signal
|
||||
|
||||
// Extract control signals from bitmanip ALUControl.
|
||||
assign {Mask, PreShift} = BALUControl[1:0];
|
||||
|
||||
// Mask Generation Mux
|
||||
if (`ZBS_SUPPORTED) begin: zbsdec
|
||||
decoder #($clog2(WIDTH)) maskgen(B[$clog2(WIDTH)-1:0], MaskB);
|
||||
mux2 #(WIDTH) maskmux(B, MaskB, Mask, CondMaskB);
|
||||
end else assign CondMaskB = B;
|
||||
|
||||
// 0-3 bit Pre-Shift Mux
|
||||
if (`ZBA_SUPPORTED) begin: zbapreshift
|
||||
if (WIDTH == 64) begin
|
||||
mux2 #(64) zextmux(A, {{32{1'b0}}, A[31:0]}, W64, CondZextA);
|
||||
end else assign CondZextA = A;
|
||||
assign PreShiftAmt = Funct3[2:1] & {2{PreShift}};
|
||||
assign CondShiftA = CondZextA << (PreShiftAmt);
|
||||
end else begin
|
||||
assign PreShiftAmt = 2'b0;
|
||||
assign CondShiftA = A;
|
||||
end
|
||||
|
||||
// Bit reverse needed for some ZBB, ZBC instructions
|
||||
if (`ZBC_SUPPORTED | `ZBB_SUPPORTED) begin: bitreverse
|
||||
bitreverse #(WIDTH) brA(.A, .RevA);
|
||||
end
|
||||
|
||||
// ZBC Unit
|
||||
if (`ZBC_SUPPORTED) begin: zbc
|
||||
zbc #(WIDTH) ZBC(.A, .RevA, .B, .Funct3, .ZBCResult);
|
||||
end else assign ZBCResult = 0;
|
||||
|
||||
// ZBB Unit
|
||||
if (`ZBB_SUPPORTED) begin: zbb
|
||||
zbb #(WIDTH) ZBB(.A, .RevA, .B, .ALUResult, .W64, .lt(CompFlags[0]), .ZBBSelect, .ZBBResult);
|
||||
end else assign ZBBResult = 0;
|
||||
|
||||
// Result Select Mux
|
||||
always_comb
|
||||
case (BSelect)
|
||||
// 00: ALU, 01: ZBA/ZBS, 10: ZBB, 11: ZBC
|
||||
2'b00: Result = ALUResult;
|
||||
2'b01: Result = FullResult; // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word.
|
||||
2'b10: Result = ZBBResult;
|
||||
2'b11: Result = ZBCResult;
|
||||
endcase
|
||||
endmodule
|
42
src/ieu/bmu/bitreverse.sv
Normal file
42
src/ieu/bmu/bitreverse.sv
Normal file
|
@ -0,0 +1,42 @@
|
|||
|
||||
///////////////////////////////////////////
|
||||
// bitreverse.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu> and Kip Macsai-Goren <kmacsaigoren@hmc.edu>
|
||||
// Created: 1 February 2023
|
||||
// Modified: 6 March 2023
|
||||
//
|
||||
// Purpose: Bit reverse submodule
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module bitreverse #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A,
|
||||
output logic [WIDTH-1:0] RevA);
|
||||
|
||||
genvar i;
|
||||
for (i=0; i<WIDTH;i++) begin:loop
|
||||
assign RevA[WIDTH-i-1] = A[i];
|
||||
end
|
||||
endmodule
|
||||
|
||||
|
183
src/ieu/bmu/bmuctrl.sv
Normal file
183
src/ieu/bmu/bmuctrl.sv
Normal file
|
@ -0,0 +1,183 @@
|
|||
///////////////////////////////////////////
|
||||
// bmuctrl.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu>
|
||||
// Created: 16 February 2023
|
||||
// Modified: 6 March 2023
|
||||
//
|
||||
// Purpose: Top level bit manipulation instruction decoder
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module bmuctrl(
|
||||
input logic clk, reset,
|
||||
// Decode stage control signals
|
||||
input logic StallD, FlushD, // Stall, flush Decode stage
|
||||
input logic [31:0] InstrD, // Instruction in Decode stage
|
||||
input logic ALUOpD, // Regular ALU Operation
|
||||
output logic [1:0] BSelectD, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding in Decode stage
|
||||
output logic [2:0] ZBBSelectD, // ZBB mux select signal in Decode stage NOTE: do we need this in decode?
|
||||
output logic BRegWriteD, // Indicates if it is a R type B instruction in Decode Stage
|
||||
output logic BALUSrcBD, // Indicates if it is an I/IW (non auipc) type B instruction in Decode Stage
|
||||
output logic BW64D, // Indiciates if it is a W type B instruction in Decode Stage
|
||||
output logic BSubArithD, // TRUE if ext, clr, andn, orn, xnor instruction in Decode Stage
|
||||
output logic IllegalBitmanipInstrD, // Indicates if it is unrecognized B instruction in Decode Stage
|
||||
// Execute stage control signals
|
||||
input logic StallE, FlushE, // Stall, flush Execute stage
|
||||
output logic [2:0] ALUSelectD, // ALU select
|
||||
output logic [1:0] BSelectE, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding
|
||||
output logic [2:0] ZBBSelectE, // ZBB mux select signal
|
||||
output logic BRegWriteE, // Indicates if it is a R type B instruction in Execute
|
||||
output logic BComparatorSignedE, // Indicates if comparator signed in Execute Stage
|
||||
output logic [2:0] BALUControlE // ALU Control signals for B instructions in Execute Stage
|
||||
);
|
||||
|
||||
logic [6:0] OpD; // Opcode in Decode stage
|
||||
logic [2:0] Funct3D; // Funct3 field in Decode stage
|
||||
logic [6:0] Funct7D; // Funct7 field in Decode stage
|
||||
logic [4:0] Rs2D; // Rs2 source register in Decode stage
|
||||
logic BComparatorSignedD; // Indicates if comparator signed (max, min instruction) in Decode Stage
|
||||
logic RotateD; // Indicates if rotate instruction in Decode Stage
|
||||
logic MaskD; // Indicates if zbs instruction in Decode Stage
|
||||
logic PreShiftD; // Indicates if sh1add, sh2add, sh3add instruction in Decode Stage
|
||||
logic [2:0] BALUControlD; // ALU Control signals for B instructions
|
||||
logic [2:0] BALUSelectD; // ALU Mux select signal in Decode Stage for BMU operations
|
||||
logic BALUOpD; // Indicates if it is an ALU B instruction in Decode Stage
|
||||
|
||||
`define BMUCTRLW 17
|
||||
|
||||
logic [`BMUCTRLW-1:0] BMUControlsD; // Main B Instructions Decoder control signals
|
||||
|
||||
// Extract fields
|
||||
assign OpD = InstrD[6:0];
|
||||
assign Funct3D = InstrD[14:12];
|
||||
assign Funct7D = InstrD[31:25];
|
||||
assign Rs2D = InstrD[24:20];
|
||||
|
||||
// Main Instruction Decoder
|
||||
always_comb begin
|
||||
// BALUSelect_BSelect_ZBBSelect_BRegWrite_BALUSrcB_BW64_BALUOp_BSubArithD_RotateD_MaskD_PreShiftD_IllegalBitmanipInstrD
|
||||
BMUControlsD = `BMUCTRLW'b000_00_000_0_0_0_0_0_0_0_0_1; // default: Illegal bmu instruction;
|
||||
if (`ZBA_SUPPORTED) begin
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh1add
|
||||
17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh2add
|
||||
17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh3add
|
||||
endcase
|
||||
if (`XLEN==64)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh1add.uw
|
||||
17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh2add.uw
|
||||
17'b0111011_0010000_110: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh3add.uw
|
||||
17'b0111011_0000100_000: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_0_0; // add.uw
|
||||
17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_01_000_1_1_1_1_0_0_0_0_0; // slli.uw
|
||||
endcase
|
||||
end
|
||||
if (`ZBB_SUPPORTED) begin
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // rol
|
||||
17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // ror
|
||||
17'b0010011_0110000_001: if ((Rs2D[4:1] == 4'b0010))
|
||||
BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // sign extend instruction
|
||||
else if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0]))
|
||||
BMUControlsD = `BMUCTRLW'b000_10_000_1_1_0_1_0_0_0_0_0; // count instruction
|
||||
17'b0110011_0000100_100: if (`XLEN == 32)
|
||||
BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32)
|
||||
17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_01_111_1_0_0_1_1_0_0_0_0; // andn
|
||||
17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_01_111_1_0_0_1_1_0_0_0_0; // orn
|
||||
17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_01_111_1_0_0_1_1_0_0_0_0; // xnor
|
||||
17'b0010011_011010?_101: if ((`XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000))
|
||||
BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // rev8
|
||||
17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111)
|
||||
BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // orc.b
|
||||
17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0; // max
|
||||
17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0; // maxu
|
||||
17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0; // min
|
||||
17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0; // minu
|
||||
endcase
|
||||
if (`XLEN==32)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32)
|
||||
17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_0_1_0_1_0_0_0; // rori (rv32)
|
||||
endcase
|
||||
else if (`XLEN==64)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_0_0_1_0_0_0_0_0; // zexth (rv64)
|
||||
17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_00_111_1_0_1_1_0_1_0_0_0; // rolw
|
||||
17'b0111011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_0_1_1_0_1_0_0_0; // rorw
|
||||
17'b0010011_011000?_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_0_1_0_1_0_0_0; // rori (rv64)
|
||||
17'b0011011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_1_1_0_1_0_0_0; // roriw
|
||||
17'b0011011_0110000_001: if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0]))
|
||||
BMUControlsD = `BMUCTRLW'b000_10_000_1_1_1_1_0_0_0_0_0; // count word instruction
|
||||
endcase
|
||||
end
|
||||
if (`ZBC_SUPPORTED)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_11_000_1_0_0_1_0_0_0_0_0; // ZBC instruction
|
||||
endcase
|
||||
if (`ZBS_SUPPORTED) begin // ZBS
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_0_0_1_1_0_1_0_0; // bclr
|
||||
17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_0_0_1_1_0_1_0_0; // bext
|
||||
17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_0_0_1_0_0_1_0_0; // binv
|
||||
17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_0_0_1_0_0_1_0_0; // bset
|
||||
endcase
|
||||
if (`XLEN==32) // ZBS 64-bit
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri
|
||||
17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti
|
||||
17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_1_0_1_0_0_1_0_0; // binvi
|
||||
17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti
|
||||
endcase
|
||||
else if (`XLEN==64) // ZBS 64-bit
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri (rv64)
|
||||
17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti (rv64)
|
||||
17'b0010011_011010?_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_1_0_1_0_0_1_0_0; // binvi (rv64)
|
||||
17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti (rv64)
|
||||
endcase
|
||||
end
|
||||
if (`ZBB_SUPPORTED | `ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_0_0_1_0_0_0_0_0; // sra, srl, sll
|
||||
17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_1_0_1_0_0_0_0_0; // srai, srli, slli
|
||||
17'b0111011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_0_1_1_0_0_0_0_0; // sraw, srlw, sllw
|
||||
17'b0011011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_1_1_1_0_0_0_0_0; // sraiw, srliw, slliw
|
||||
endcase
|
||||
end
|
||||
|
||||
// Unpack Control Signals
|
||||
assign {BALUSelectD, BSelectD, ZBBSelectD, BRegWriteD,BALUSrcBD, BW64D, BALUOpD, BSubArithD, RotateD, MaskD, PreShiftD, IllegalBitmanipInstrD} = BMUControlsD;
|
||||
|
||||
// Pack BALUControl Signals
|
||||
assign BALUControlD = {RotateD, MaskD, PreShiftD};
|
||||
|
||||
// Comparator should perform signed comparison when min/max instruction. We have overlap in funct3 with some branch instructions so we use opcode to differentiate betwen min/max and branches
|
||||
assign BComparatorSignedD = (Funct3D[2]^Funct3D[0]) & ~OpD[6];
|
||||
|
||||
// Choose ALUSelect brom BMU for BMU operations, Funct3 for IEU operations, or 0 for addition
|
||||
assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000);
|
||||
|
||||
// BMU Execute stage pipieline control register
|
||||
flopenrc#(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BComparatorSignedD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BComparatorSignedE, BALUControlE});
|
||||
endmodule
|
46
src/ieu/bmu/byte.sv
Normal file
46
src/ieu/bmu/byte.sv
Normal file
|
@ -0,0 +1,46 @@
|
|||
///////////////////////////////////////////
|
||||
// byte.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu>
|
||||
// Created: 1 February 2023
|
||||
// Modified: 6 March 2023
|
||||
//
|
||||
// Purpose: RISCV bitmanip byte-wise operation unit
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module byteUnit #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, // Operands
|
||||
input logic ByteSelect, // LSB of Immediate
|
||||
output logic [WIDTH-1:0] ByteResult); // rev8, orcb result
|
||||
|
||||
logic [WIDTH-1:0] OrcBResult, Rev8Result;
|
||||
genvar i;
|
||||
|
||||
for (i=0;i<WIDTH;i+=8) begin:loop
|
||||
assign OrcBResult[i+7:i] = {8{|A[i+7:i]}};
|
||||
assign Rev8Result[WIDTH-i-1:WIDTH-i-8] = A[i+7:i];
|
||||
end
|
||||
|
||||
mux2 #(WIDTH) bytemux(Rev8Result, OrcBResult, ByteSelect, ByteResult);
|
||||
endmodule
|
51
src/ieu/bmu/clmul.sv
Normal file
51
src/ieu/bmu/clmul.sv
Normal file
|
@ -0,0 +1,51 @@
|
|||
///////////////////////////////////////////
|
||||
// clmul.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu> and Kip Macsai-Goren <kmacsaigoren@hmc.edu>
|
||||
// Created: 1 February 2023
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Carry-Less multiplication unit
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module clmul #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, B, // Operands
|
||||
output logic [WIDTH-1:0] ClmulResult); // ZBS result
|
||||
|
||||
logic [(WIDTH*WIDTH)-1:0] s; // intermediary signals for carry-less multiply
|
||||
|
||||
integer i,j;
|
||||
|
||||
always_comb begin
|
||||
for (i=0;i<WIDTH;i++) begin: outer
|
||||
s[WIDTH*i]=A[0]&B[i];
|
||||
for (j=1;j<=i;j++) begin: inner
|
||||
s[WIDTH*i+j] = (A[j]&B[i-j])^s[WIDTH*i+j-1];
|
||||
end
|
||||
ClmulResult[i] = s[WIDTH*i+j-1];
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
||||
|
65
src/ieu/bmu/cnt.sv
Normal file
65
src/ieu/bmu/cnt.sv
Normal file
|
@ -0,0 +1,65 @@
|
|||
|
||||
///////////////////////////////////////////
|
||||
// cnt.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu>
|
||||
// Created: 4 February 2023
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Count Instruction Submodule
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cnt #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] A, RevA, // Operands
|
||||
input logic [4:0] B, // Last 5 bits of immediate
|
||||
input logic W64, // Indicates word operation
|
||||
output logic [WIDTH-1:0] CntResult // count result
|
||||
);
|
||||
|
||||
//count instructions
|
||||
logic [WIDTH-1:0] czResult; // count zeros result
|
||||
logic [WIDTH-1:0] cpopResult; // population count result
|
||||
logic [WIDTH-1:0] lzcA, popcntA;
|
||||
|
||||
//only in rv64
|
||||
if (WIDTH==64) begin
|
||||
//clz input select mux
|
||||
mux4 #(WIDTH) lzcmux64(A, {A[31:0],{32{1'b1}}}, RevA, {RevA[63:32],{32{1'b1}}}, {B[0],W64}, lzcA);
|
||||
//cpop select mux
|
||||
mux2 #(WIDTH) popcntmux64(A, {{32{1'b0}}, A[31:0]}, W64, popcntA);
|
||||
end
|
||||
//rv32
|
||||
else begin
|
||||
assign popcntA = A;
|
||||
mux2 #(WIDTH) lzcmux32(A, RevA, B[0], lzcA);
|
||||
end
|
||||
|
||||
lzc #(WIDTH) lzc(.num(lzcA), .ZeroCnt(czResult[$clog2(WIDTH):0]));
|
||||
popcnt #(WIDTH) popcntw(.num(popcntA), .PopCnt(cpopResult[$clog2(WIDTH):0]));
|
||||
// zero extend these results to fit into width
|
||||
assign czResult[WIDTH-1:$clog2(WIDTH)+1] = '0;
|
||||
assign cpopResult[WIDTH-1:$clog2(WIDTH)+1] = '0;
|
||||
|
||||
mux2 #(WIDTH) cntresultmux(czResult, cpopResult, B[1], CntResult);
|
||||
endmodule
|
45
src/ieu/bmu/ext.sv
Normal file
45
src/ieu/bmu/ext.sv
Normal file
|
@ -0,0 +1,45 @@
|
|||
|
||||
///////////////////////////////////////////
|
||||
// ext.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu>
|
||||
// Created: 4 February 2023
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Sign/Zero Extension Submodule
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ext #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] A, // Operands
|
||||
input logic [1:0] ExtSelect, // B[2], B[0] of immediate
|
||||
output logic [WIDTH-1:0] ExtResult); // Extend Result
|
||||
|
||||
logic [WIDTH-1:0] sexthResult, zexthResult, sextbResult;
|
||||
|
||||
assign sexthResult = {{(WIDTH-16){A[15]}},A[15:0]};
|
||||
assign zexthResult = {{(WIDTH-16){1'b0}},A[15:0]};
|
||||
assign sextbResult = {{(WIDTH-8){A[7]}},A[7:0]};
|
||||
|
||||
mux3 #(WIDTH) extmux(sextbResult, sexthResult, zexthResult, ExtSelect, ExtResult);
|
||||
endmodule
|
44
src/ieu/bmu/popcnt.sv
Normal file
44
src/ieu/bmu/popcnt.sv
Normal file
|
@ -0,0 +1,44 @@
|
|||
|
||||
///////////////////////////////////////////
|
||||
// popccnt.sv
|
||||
// Written: Kevin Kim <kekim@hmc.edu>
|
||||
// Modified: 2/4/2023
|
||||
//
|
||||
// Purpose: Population Count
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module popcnt #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] num, // number to count total ones
|
||||
output logic [$clog2(WIDTH):0] PopCnt // the total number of ones
|
||||
);
|
||||
|
||||
logic [$clog2(WIDTH):0] sum;
|
||||
|
||||
always_comb begin
|
||||
sum = 0;
|
||||
for (int i=0;i<WIDTH;i++) begin:loop
|
||||
sum = (num[i]) ? sum + 1 : sum;
|
||||
end
|
||||
end
|
||||
|
||||
assign PopCnt = sum;
|
||||
endmodule
|
55
src/ieu/bmu/zbb.sv
Normal file
55
src/ieu/bmu/zbb.sv
Normal file
|
@ -0,0 +1,55 @@
|
|||
|
||||
///////////////////////////////////////////
|
||||
// zbb.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu> and Kip Macsai-Goren <kmacsaigoren@hmc.edu>
|
||||
// Created: 2 February 2023
|
||||
// Modified: March 6 2023
|
||||
//
|
||||
// Purpose: RISC-V ZBB top level unit
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module zbb #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, RevA, B, // Operands
|
||||
input logic [WIDTH-1:0] ALUResult, // ALU Result
|
||||
input logic W64, // Indicates word operation
|
||||
input logic lt, // lt flag
|
||||
input logic [2:0] ZBBSelect, // Indicates word operation
|
||||
output logic [WIDTH-1:0] ZBBResult); // ZBB result
|
||||
|
||||
logic [WIDTH-1:0] CntResult; // count result
|
||||
logic [WIDTH-1:0] MinMaxResult; // min,max result
|
||||
logic [WIDTH-1:0] ByteResult; // byte results
|
||||
logic [WIDTH-1:0] ExtResult; // sign/zero extend results
|
||||
|
||||
cnt #(WIDTH) cnt(.A, .RevA, .B(B[4:0]), .W64, .CntResult);
|
||||
byteUnit #(WIDTH) bu(.A, .ByteSelect(B[0]), .ByteResult);
|
||||
ext #(WIDTH) ext(.A, .ExtSelect({~B[2], {B[2] & B[0]}}), .ExtResult);
|
||||
|
||||
// ZBBSelect[2] differentiates between min(u) vs max(u) instruction
|
||||
mux2 #(WIDTH) minmaxmux(B, A, lt^ZBBSelect[2], MinMaxResult);
|
||||
|
||||
// ZBB Result select mux
|
||||
mux4 #(WIDTH) zbbresultmux(CntResult, ExtResult, ByteResult, MinMaxResult, ZBBSelect[1:0], ZBBResult);
|
||||
endmodule
|
54
src/ieu/bmu/zbc.sv
Normal file
54
src/ieu/bmu/zbc.sv
Normal file
|
@ -0,0 +1,54 @@
|
|||
///////////////////////////////////////////
|
||||
// zbc.sv
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu> and Kip Macsai-Goren <kmacsaigoren@hmc.edu>
|
||||
// Created: 2 February 2023
|
||||
// Modified: 3 March 2023
|
||||
//
|
||||
// Purpose: RISC-V ZBC top-level unit
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module zbc #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, RevA, B, // Operands
|
||||
input logic [2:0] Funct3, // Indicates operation to perform
|
||||
output logic [WIDTH-1:0] ZBCResult); // ZBC result
|
||||
|
||||
logic [WIDTH-1:0] ClmulResult, RevClmulResult;
|
||||
logic [WIDTH-1:0] RevB;
|
||||
logic [WIDTH-1:0] x,y;
|
||||
logic [1:0] select;
|
||||
|
||||
assign select = ~Funct3[1:0];
|
||||
|
||||
bitreverse #(WIDTH) brB(.A(B), .RevA(RevB));
|
||||
|
||||
mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, select, x);
|
||||
mux3 #(WIDTH) ymux({{1'b0},RevB[WIDTH-2:0]}, RevB, B, select, y);
|
||||
|
||||
clmul #(WIDTH) clm(.A(x), .B(y), .ClmulResult(ClmulResult));
|
||||
|
||||
bitreverse #(WIDTH) brClmulResult(.A(ClmulResult), .RevA(RevClmulResult));
|
||||
|
||||
mux2 #(WIDTH) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult);
|
||||
endmodule
|
|
@ -1,9 +1,9 @@
|
|||
///////////////////////////////////////////
|
||||
// controller.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, Sarah.Harris@unlv.edu
|
||||
// Written: David_Harris@hmc.edu, Sarah.Harris@unlv.edu, kekim@hmc.edu
|
||||
// Created: 9 January 2021
|
||||
// Modified:
|
||||
// Modified: 3 March 2023
|
||||
//
|
||||
// Purpose: Top level controller module
|
||||
//
|
||||
|
@ -31,7 +31,7 @@
|
|||
|
||||
|
||||
module controller(
|
||||
input logic clk, reset,
|
||||
input logic clk, reset,
|
||||
// Decode stage control signals
|
||||
input logic StallD, FlushD, // Stall, flush Decode stage
|
||||
input logic [31:0] InstrD, // Instruction in Decode stage
|
||||
|
@ -41,22 +41,27 @@ module controller(
|
|||
output logic JumpD, // Jump instruction
|
||||
output logic BranchD, // Branch instruction
|
||||
// Execute stage control signals
|
||||
input logic StallE, FlushE, // Stall, flush Execute stage
|
||||
input logic StallE, FlushE, // Stall, flush Execute stage
|
||||
input logic [1:0] FlagsE, // Comparison flags ({eq, lt})
|
||||
input logic FWriteIntE, // Write integer register, coming from FPU controller
|
||||
output logic PCSrcE, // Select signal to choose next PC (for datapath and Hazard unit)
|
||||
output logic [2:0] ALUControlE, // ALU operation to perform
|
||||
output logic ALUSrcAE, ALUSrcBE, // ALU operands
|
||||
output logic ALUSrcAE, ALUSrcBE, // ALU operands
|
||||
output logic ALUResultSrcE, // Selects result to pass on to Memory stage
|
||||
output logic [2:0] ALUSelectE, // ALU mux select signal
|
||||
output logic MemReadE, CSRReadE, // Instruction reads memory, reads a CSR (needed for Hazard unit)
|
||||
output logic [2:0] Funct3E, // Instruction's funct3 field
|
||||
output logic IntDivE, // Integer divide
|
||||
output logic MDUE, // MDU (multiply/divide) operatio
|
||||
output logic W64E, // RV64 W-type operation
|
||||
output logic SubArithE, // Subtraction or arithmetic shift
|
||||
output logic JumpE, // jump instruction
|
||||
output logic BranchE, // Branch instruction
|
||||
output logic SCE, // Store Conditional instruction
|
||||
output logic BranchSignedE, // Branch comparison operands are signed (if it's a branch)
|
||||
output logic [1:0] BSelectE, // One-Hot encoding of if it's ZBA_ZBB_ZBC_ZBS instruction
|
||||
output logic [2:0] ZBBSelectE, // ZBB mux select signal in Execute stage
|
||||
output logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage
|
||||
|
||||
// Memory stage control signals
|
||||
input logic StallM, FlushM, // Stall, flush Memory stage
|
||||
output logic [1:0] MemRWM, // Mem read/write: MemRWM[1] = 1 for read, MemRWM[0] = 1 for write
|
||||
|
@ -69,7 +74,7 @@ module controller(
|
|||
output logic FWriteIntM, // FPU controller writes integer register file
|
||||
// Writeback stage control signals
|
||||
input logic StallW, FlushW, // Stall, flush Writeback stage
|
||||
output logic RegWriteW, IntDivW, // Instruction writes a register, is an integer divide
|
||||
output logic RegWriteW, IntDivW, // Instruction writes a register, is an integer divide
|
||||
output logic [2:0] ResultSrcW, // Select source of result to write back to register file
|
||||
// Stall during CSRs
|
||||
output logic CSRWriteFenceM, // CSR write or fence instruction; needs to flush the following instructions
|
||||
|
@ -84,12 +89,16 @@ module controller(
|
|||
`define CTRLW 23
|
||||
|
||||
// pipelined control signals
|
||||
logic RegWriteD, RegWriteE; // RegWrite (register will be written)
|
||||
logic RegWriteD, RegWriteE; // RegWrite (register will be written)
|
||||
logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file
|
||||
logic [1:0] MemRWD, MemRWE; // Store (write to memory)
|
||||
logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3)
|
||||
logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3)
|
||||
logic BaseW64D; // W64 for Base instructions specifically
|
||||
logic BaseRegWriteD; // Indicates if Base instruction register write instruction
|
||||
logic BaseSubArithD; // Indicates if Base instruction subtracts, sra, slt, sltu
|
||||
logic BaseALUSrcBD; // Base instruction ALU B source select signal
|
||||
logic [2:0] ALUControlD; // Determines ALU operation
|
||||
logic ALUSrcAD, ALUSrcBD; // ALU inputs
|
||||
logic ALUSrcAD, ALUSrcBD; // ALU inputs
|
||||
logic ALUResultSrcD, W64D, MDUD; // ALU result, is RV64 W-type, is multiply/divide instruction
|
||||
logic CSRZeroSrcD; // Ignore setting and clearing zeros to CSR
|
||||
logic CSRReadD; // CSR read instruction
|
||||
|
@ -100,22 +109,28 @@ module controller(
|
|||
logic PrivilegedD, PrivilegedE; // Privileged instruction
|
||||
logic InvalidateICacheE, FlushDCacheE;// Invalidate I$, flush D$
|
||||
logic [`CTRLW-1:0] ControlsD; // Main Instruction Decoder control signals
|
||||
logic SubArithD; // TRUE for R-type subtracts and sra, slt, sltu
|
||||
logic SubArithD; // TRUE for R-type subtracts and sra, slt, sltu or B-type ext clr, andn, orn, xnor
|
||||
logic subD, sraD, sltD, sltuD; // Indicates if is one of these instructions
|
||||
logic ALUOpE; // 0 for address generationm 1 for ALU operations
|
||||
logic BranchTakenE; // Branch is taken
|
||||
logic eqE, ltE; // Comparator outputs
|
||||
logic unused;
|
||||
logic BranchFlagE; // Branch flag to use (chosen between eq or lt)
|
||||
logic BranchFlagE; // Branch flag to use (chosen between eq or lt)
|
||||
logic IEURegWriteE; // Register write
|
||||
logic BRegWriteE; // Register write from BMU controller in Execute Stage
|
||||
logic IllegalERegAdrD; // RV32E attempts to write upper 16 registers
|
||||
logic [1:0] AtomicE; // Atomic instruction
|
||||
logic FenceD, FenceE; // Fence instruction
|
||||
logic SFenceVmaD; // sfence.vma instruction
|
||||
logic IntDivM; // Integer divide instruction
|
||||
logic [1:0] BSelectD; // One-Hot encoding if it's ZBA_ZBB_ZBC_ZBS instruction in decode stage
|
||||
logic [2:0] ZBBSelectD; // ZBB Mux Select Signal
|
||||
logic BComparatorSignedE; // Indicates if max, min (signed comarison) instruction in Execute Stage
|
||||
logic IFunctD, RFunctD, MFunctD; // Detect I, R, and M-type RV32IM/Rv64IM instructions
|
||||
logic LFunctD, SFunctD, BFunctD; // Detect load, store, branch instructions
|
||||
logic JFunctD; // detect jalr instruction
|
||||
logic FenceM; // Fence.I or sfence.VMA instruction in memory stage
|
||||
logic [2:0] ALUSelectD; // ALU Output selection mux control
|
||||
|
||||
// Extract fields
|
||||
assign OpD = InstrD[6:0];
|
||||
|
@ -131,29 +146,29 @@ module controller(
|
|||
logic Funct7ZeroD, Funct7b5D, IShiftD, INoShiftD;
|
||||
logic Funct7ShiftZeroD, Funct7Shiftb5D;
|
||||
|
||||
assign Funct7ZeroD = (Funct7D == 7'b0000000); // most R-type instructions
|
||||
assign Funct7b5D = (Funct7D == 7'b0100000); // srai, sub
|
||||
assign Funct7ZeroD = (Funct7D == 7'b0000000); // most R-type instructions
|
||||
assign Funct7b5D = (Funct7D == 7'b0100000); // srai, sub
|
||||
assign Funct7ShiftZeroD = (`XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD;
|
||||
assign Funct7Shiftb5D = (`XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D;
|
||||
assign IShiftD = (Funct3D == 3'b001 & Funct7ShiftZeroD) | (Funct3D == 3'b101 & (Funct7ShiftZeroD | Funct7Shiftb5D)); // slli, srli, srai, or w forms
|
||||
assign INoShiftD = ((Funct3D != 3'b001) & (Funct3D != 3'b101));
|
||||
assign IFunctD = IShiftD | INoShiftD;
|
||||
assign RFunctD = ((Funct3D == 3'b000 | Funct3D == 3'b101) & Funct7b5D) | Funct7ZeroD;
|
||||
assign MFunctD = (Funct7D == 7'b0000001) & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
|
||||
assign LFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | Funct3D == 3'b100 | Funct3D == 3'b101 |
|
||||
((`XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110));
|
||||
assign SFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 |
|
||||
((`XLEN == 64) & (Funct3D == 3'b011));
|
||||
assign BFunctD = (Funct3D[2:1] != 2'b01); // legal branches
|
||||
assign JFunctD = (Funct3D == 3'b000);
|
||||
assign IShiftD = (Funct3D == 3'b001 & Funct7ShiftZeroD) | (Funct3D == 3'b101 & (Funct7ShiftZeroD | Funct7Shiftb5D)); // slli, srli, srai, or w forms
|
||||
assign INoShiftD = ((Funct3D != 3'b001) & (Funct3D != 3'b101));
|
||||
assign IFunctD = IShiftD | INoShiftD;
|
||||
assign RFunctD = ((Funct3D == 3'b000 | Funct3D == 3'b101) & Funct7b5D) | Funct7ZeroD;
|
||||
assign MFunctD = (Funct7D == 7'b0000001) & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
|
||||
assign LFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | Funct3D == 3'b100 | Funct3D == 3'b101 |
|
||||
((`XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110));
|
||||
assign SFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 |
|
||||
((`XLEN == 64) & (Funct3D == 3'b011));
|
||||
assign BFunctD = (Funct3D[2:1] != 2'b01); // legal branches
|
||||
assign JFunctD = (Funct3D == 3'b000);
|
||||
end else begin:legalcheck2
|
||||
assign IFunctD = 1; // Don't bother to separate out shift decoding
|
||||
assign RFunctD = ~Funct7D[0]; // Not a multiply
|
||||
assign MFunctD = Funct7D[0] & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
|
||||
assign LFunctD = 1; // don't bother to check Funct3 for loads
|
||||
assign SFunctD = 1; // don't bother to check Funct3 for stores
|
||||
assign BFunctD = 1; // don't bother to check Funct3 for branches
|
||||
assign JFunctD = 1; // don't bother to check Funct3 for jumps
|
||||
assign IFunctD = 1; // Don't bother to separate out shift decoding
|
||||
assign RFunctD = ~Funct7D[0]; // Not a multiply
|
||||
assign MFunctD = Funct7D[0] & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
|
||||
assign LFunctD = 1; // don't bother to check Funct3 for loads
|
||||
assign SFunctD = 1; // don't bother to check Funct3 for stores
|
||||
assign BFunctD = 1; // don't bother to check Funct3 for branches
|
||||
assign JFunctD = 1; // don't bother to check Funct3 for jumps
|
||||
end
|
||||
|
||||
// Main Instruction Decoder
|
||||
|
@ -167,7 +182,7 @@ module controller(
|
|||
7'b0000111: ControlsD = `CTRLW'b0_000_01_10_001_0_0_0_0_0_0_0_0_0_00_1; // flw - only legal if FP supported
|
||||
7'b0001111: if (`ZIFENCEI_SUPPORTED)
|
||||
ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_1_0_00_0; // fence
|
||||
else
|
||||
else
|
||||
ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_0; // fence treated as nop
|
||||
7'b0010011: if (IFunctD)
|
||||
ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_0_0_0_0_0_00_0; // I-type ALU
|
||||
|
@ -213,24 +228,61 @@ module controller(
|
|||
// Squash control signals if coming from an illegal compressed instruction
|
||||
// On RV32E, can't write to upper 16 registers. Checking reads to upper 16 is more costly so disregard them.
|
||||
assign IllegalERegAdrD = `E_SUPPORTED & `ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11];
|
||||
assign IllegalBaseInstrD = ControlsD[0] | IllegalERegAdrD;
|
||||
assign {RegWriteD, ImmSrcD, ALUSrcAD, ALUSrcBD, MemRWD,
|
||||
ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, W64D, CSRReadD,
|
||||
//assign IllegalBaseInstrD = 1'b0;
|
||||
assign {BaseRegWriteD, ImmSrcD, ALUSrcAD, BaseALUSrcBD, MemRWD,
|
||||
ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, BaseW64D, CSRReadD,
|
||||
PrivilegedD, FenceXD, MDUD, AtomicD, unused} = IllegalIEUFPUInstrD ? `CTRLW'b0 : ControlsD;
|
||||
|
||||
|
||||
assign CSRZeroSrcD = InstrD[14] ? (InstrD[19:15] == 0) : (Rs1D == 0); // Is a CSR instruction using zero as the source?
|
||||
assign CSRWriteD = CSRReadD & !(CSRZeroSrcD & InstrD[13]); // Don't write if setting or clearing zeros
|
||||
assign SFenceVmaD = PrivilegedD & (InstrD[31:25] == 7'b0001001);
|
||||
assign FenceD = SFenceVmaD | FenceXD; // possible sfence.vma or fence.i
|
||||
|
||||
|
||||
// ALU Decoding is lazy, only using func7[5] to distinguish add/sub and srl/sra
|
||||
assign sltD = (Funct3D == 3'b010);
|
||||
assign sltuD = (Funct3D == 3'b011);
|
||||
assign sltuD = (Funct3D == 3'b011);
|
||||
assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); // OpD[5] needed to distinguish sub from addi
|
||||
assign sraD = (Funct3D == 3'b101 & Funct7D[5]);
|
||||
assign SubArithD = ALUOpD & (subD | sraD | sltD | sltuD); // TRUE for R-type subtracts and sra, slt, sltu
|
||||
assign ALUControlD = {W64D, SubArithD, ALUOpD};
|
||||
assign BaseSubArithD = ALUOpD & (subD | sraD | sltD | sltuD);
|
||||
|
||||
// bit manipulation Configuration Block
|
||||
if (`ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags
|
||||
logic IllegalBitmanipInstrD; // Unrecognized B instruction
|
||||
logic BRegWriteD; // Indicates if it is a R type BMU instruction in decode stage
|
||||
logic BW64D; // Indicates if it is a W type BMU instruction in decode stage
|
||||
logic BSubArithD; // TRUE for BMU ext, clr, andn, orn, xnor
|
||||
logic BALUSrcBD; // BMU alu src select signal
|
||||
|
||||
bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD,
|
||||
.BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE,
|
||||
.ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BComparatorSignedE, .BALUControlE);
|
||||
if (`ZBA_SUPPORTED) begin
|
||||
// ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw
|
||||
assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ;
|
||||
end else assign sltD = (Funct3D == 3'b010);
|
||||
|
||||
// Combine base and bit manipulation signals
|
||||
assign IllegalBaseInstrD = (ControlsD[0] & IllegalBitmanipInstrD) | IllegalERegAdrD ;
|
||||
assign RegWriteD = BaseRegWriteD | BRegWriteD;
|
||||
assign W64D = BaseW64D | BW64D;
|
||||
assign ALUSrcBD = BaseALUSrcBD | BALUSrcBD;
|
||||
assign SubArithD = BaseSubArithD | BSubArithD; // TRUE If BMU or R-type instruction involves inverted operand
|
||||
|
||||
end else begin: bitmanipi
|
||||
assign ALUSelectD = ALUOpD ? Funct3D : 3'b000; // add for address generation when not doing ALU operation
|
||||
assign sltD = (Funct3D == 3'b010);
|
||||
assign IllegalBaseInstrD = ControlsD[0] | IllegalERegAdrD ;
|
||||
assign RegWriteD = BaseRegWriteD;
|
||||
assign W64D = BaseW64D;
|
||||
assign ALUSrcBD = BaseALUSrcBD;
|
||||
assign SubArithD = BaseSubArithD; // TRUE If B-type or R-type instruction involves inverted operand
|
||||
|
||||
// tie off unused bit manipulation signals
|
||||
assign BSelectE = 2'b00;
|
||||
assign BSelectD = 2'b00;
|
||||
assign ZBBSelectE = 3'b000;
|
||||
assign BComparatorSignedE = 1'b0;
|
||||
assign BALUControlE = 3'b0;
|
||||
end
|
||||
|
||||
// Fences
|
||||
// Ordinary fence is presently a nop
|
||||
|
@ -249,14 +301,15 @@ module controller(
|
|||
flopenrc #(1) controlregD(clk, reset, FlushD, ~StallD, 1'b1, InstrValidD);
|
||||
|
||||
// Execute stage pipeline control register and logic
|
||||
flopenrc #(28) controlregE(clk, reset, FlushE, ~StallE,
|
||||
{RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, InstrValidD},
|
||||
{IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, InstrValidE});
|
||||
flopenrc #(29) controlregE(clk, reset, FlushE, ~StallE,
|
||||
{ALUSelectD, RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, SubArithD, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, InstrValidD},
|
||||
{ALUSelectE, IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, SubArithE, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, InstrValidE});
|
||||
|
||||
// Branch Logic
|
||||
// The comparator handles both signed and unsigned branches using BranchSignedE
|
||||
// Hence, only eq and lt flags are needed
|
||||
assign BranchSignedE = ~(Funct3E[2:1] == 2'b11);
|
||||
// We also want comparator to handle signed comparison on a max/min bitmanip instruction
|
||||
assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE) | BComparatorSignedE;
|
||||
assign {eqE, ltE} = FlagsE;
|
||||
mux2 #(1) branchflagmux(eqE, ltE, Funct3E[2], BranchFlagE);
|
||||
assign BranchTakenE = BranchFlagE ^ Funct3E[0];
|
||||
|
|
|
@ -40,11 +40,16 @@ module datapath (
|
|||
input logic [2:0] Funct3E, // Funct3 field of instruction in Execute stage
|
||||
input logic StallE, FlushE, // Stall, flush Execute stage
|
||||
input logic [1:0] ForwardAE, ForwardBE, // Forward ALU operands from later stages
|
||||
input logic [2:0] ALUControlE, // Indicate operation ALU performs
|
||||
input logic W64E, // W64-type instruction
|
||||
input logic SubArithE, // Subtraction or arithmetic shift
|
||||
input logic ALUSrcAE, ALUSrcBE, // ALU operands
|
||||
input logic ALUResultSrcE, // Selects result to pass on to Memory stage
|
||||
input logic [2:0] ALUSelectE, // ALU mux select signal
|
||||
input logic JumpE, // Is a jump (j) instruction
|
||||
input logic BranchSignedE, // Branch comparison operands are signed (if it's a branch)
|
||||
input logic [1:0] BSelectE, // One hot encoding of ZBA_ZBB_ZBC_ZBS instruction
|
||||
input logic [2:0] ZBBSelectE, // ZBB mux select signal
|
||||
input logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage
|
||||
output logic [1:0] FlagsE, // Comparison flags ({eq, lt})
|
||||
output logic [`XLEN-1:0] IEUAdrE, // Address computed by ALU
|
||||
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
|
||||
|
@ -56,7 +61,7 @@ module datapath (
|
|||
output logic [`XLEN-1:0] WriteDataM, // Write data in Memory stage
|
||||
// Writeback stage signals
|
||||
input logic StallW, FlushW, // Stall, flush Writeback stage
|
||||
input logic RegWriteW, IntDivW, // Write register file, integer divide instruction
|
||||
input logic RegWriteW, IntDivW, // Write register file, integer divide instruction
|
||||
input logic SquashSCW, // Squash a store conditional when a conflict arose
|
||||
input logic [2:0] ResultSrcW, // Select source of result to write back to register file
|
||||
input logic [`XLEN-1:0] FCvtIntResW, // FPU convert fp to integer result
|
||||
|
@ -103,20 +108,20 @@ module datapath (
|
|||
flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E);
|
||||
flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
|
||||
flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
|
||||
|
||||
|
||||
mux3 #(`XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
|
||||
mux3 #(`XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
|
||||
comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
|
||||
mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
|
||||
mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
|
||||
alu #(`XLEN) alu(SrcAE, SrcBE, ALUControlE, Funct3E, ALUResultE, IEUAdrE);
|
||||
alu #(`XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, FlagsE, BALUControlE, ALUResultE, IEUAdrE);
|
||||
mux2 #(`XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
|
||||
mux2 #(`XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
|
||||
|
||||
// Memory stage pipeline register
|
||||
flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
|
||||
flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
|
||||
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
|
||||
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
|
||||
flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM);
|
||||
|
||||
// Writeback stage pipeline register and logic
|
||||
|
@ -141,4 +146,4 @@ module datapath (
|
|||
// handle Store Conditional result if atomic extension supported
|
||||
if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};
|
||||
else assign SCResultW = 0;
|
||||
endmodule
|
||||
endmodule
|
|
@ -34,7 +34,7 @@ module forward(
|
|||
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // Source and destination registers
|
||||
input logic MemReadE, MDUE, CSRReadE, // Execute stage instruction is a load (MemReadE), divide (MDUE), or CSR read (CSRReadE)
|
||||
input logic RegWriteM, RegWriteW, // Instruction in Memory or Writeback stage writes register file
|
||||
input logic FCvtIntE, // FPU convert float to int
|
||||
input logic FCvtIntE, // FPU convert float to int
|
||||
input logic SCE, // Store Conditional instruction
|
||||
// Forwarding controls
|
||||
output logic [1:0] ForwardAE, ForwardBE, // Select signals for forwarding multiplexers
|
||||
|
|
|
@ -29,27 +29,27 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module ieu (
|
||||
input logic clk, reset,
|
||||
input logic clk, reset,
|
||||
// Decode stage signals
|
||||
input logic [31:0] InstrD, // Instruction
|
||||
input logic IllegalIEUFPUInstrD, // Illegal instruction
|
||||
output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers
|
||||
input logic [31:0] InstrD, // Instruction
|
||||
input logic IllegalIEUFPUInstrD, // Illegal instruction
|
||||
output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers
|
||||
// Execute stage signals
|
||||
input logic [`XLEN-1:0] PCE, // PC
|
||||
input logic [`XLEN-1:0] PCLinkE, // PC + 4
|
||||
output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE)
|
||||
input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int
|
||||
output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE)
|
||||
input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int
|
||||
output logic [`XLEN-1:0] IEUAdrE, // Memory address
|
||||
output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction
|
||||
output logic [2:0] Funct3E, // Funct3 instruction field
|
||||
output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction
|
||||
output logic [2:0] Funct3E, // Funct3 instruction field
|
||||
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B
|
||||
output logic [4:0] RdE, // Destination register
|
||||
// Memory stage signals
|
||||
input logic SquashSCW, // Squash store conditional, from LSU
|
||||
output logic [1:0] MemRWM, // Read/write control goes to LSU
|
||||
output logic [1:0] AtomicM, // Atomic control goes to LSU
|
||||
input logic SquashSCW, // Squash store conditional, from LSU
|
||||
output logic [1:0] MemRWM, // Read/write control goes to LSU
|
||||
output logic [1:0] AtomicM, // Atomic control goes to LSU
|
||||
output logic [`XLEN-1:0] WriteDataM, // Write data to LSU
|
||||
output logic [2:0] Funct3M, // Funct3 (size and signedness) to LSU
|
||||
output logic [2:0] Funct3M, // Funct3 (size and signedness) to LSU
|
||||
output logic [`XLEN-1:0] SrcAM, // ALU SrcA to Privileged unit and FPU
|
||||
output logic [4:0] RdM, // Destination register
|
||||
input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp)
|
||||
|
@ -66,23 +66,27 @@ module ieu (
|
|||
output logic [4:0] RdW, // Destination register
|
||||
input logic [`XLEN-1:0] ReadDataW, // LSU's read data
|
||||
// Hazard unit signals
|
||||
input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit
|
||||
input logic FlushD, FlushE, FlushM, FlushW, // Flush signals
|
||||
output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit
|
||||
input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit
|
||||
input logic FlushD, FlushE, FlushM, FlushW, // Flush signals
|
||||
output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit
|
||||
output logic MDUStallD, CSRRdStallD, StoreStallD,
|
||||
output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction
|
||||
output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions
|
||||
output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction
|
||||
output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions
|
||||
);
|
||||
|
||||
logic [2:0] ImmSrcD; // Select type of immediate extension
|
||||
logic [1:0] FlagsE; // Comparison flags ({eq, lt})
|
||||
logic [2:0] ALUControlE; // ALU control indicates function to perform
|
||||
logic ALUSrcAE, ALUSrcBE; // ALU source operands
|
||||
logic [2:0] ResultSrcW; // Selects result in Writeback stage
|
||||
logic ALUResultSrcE; // Selects ALU result to pass on to Memory stage
|
||||
logic [2:0] ALUSelectE; // ALU select mux signal
|
||||
logic SCE; // Store Conditional instruction
|
||||
logic FWriteIntM; // FPU writing to integer register file
|
||||
logic IntDivW; // Integer divide instruction
|
||||
logic [1:0] BSelectE; // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding
|
||||
logic [2:0] ZBBSelectE; // ZBB Result Select Signal in Execute Stage
|
||||
logic [2:0] BALUControlE; // ALU Control signals for B instructions in Execute Stage
|
||||
logic SubArithE; // Subtraction or arithmetic shift
|
||||
|
||||
// Forwarding signals
|
||||
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; // Source and destination registers
|
||||
|
@ -92,19 +96,19 @@ module ieu (
|
|||
logic BranchSignedE; // Branch does signed comparison on operands
|
||||
logic MDUE; // Multiply/divide instruction
|
||||
|
||||
controller c(
|
||||
controller c(
|
||||
.clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,
|
||||
.IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE,
|
||||
.PCSrcE, .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .MemReadE, .CSRReadE,
|
||||
.Funct3E, .IntDivE, .MDUE, .W64E, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
|
||||
.PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE,
|
||||
.Funct3E, .IntDivE, .MDUE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .StallM, .FlushM, .MemRWM,
|
||||
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
|
||||
.RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM,
|
||||
.StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD);
|
||||
|
||||
datapath dp(
|
||||
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
|
||||
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE,
|
||||
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
|
||||
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE,
|
||||
.Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE,
|
||||
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE,
|
||||
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,
|
||||
.StallW, .FlushW, .RegWriteW, .IntDivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
|
||||
.CSRReadValW, .MDUResultW, .FIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
|
||||
|
|
|
@ -52,7 +52,7 @@ module regfile (
|
|||
|
||||
always_ff @(negedge clk)
|
||||
if (reset) for(i=1; i<NUMREGS; i++) rf[i] <= 0;
|
||||
else if (we3) rf[a3] <= wd3;
|
||||
else if (we3) rf[a3] <= wd3;
|
||||
|
||||
assign #2 rd1 = (a1 != 0) ? rf[a1] : 0;
|
||||
assign #2 rd2 = (a2 != 0) ? rf[a2] : 0;
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
///////////////////////////////////////////
|
||||
// shifter.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, Sarah.Harris@unlv.edu
|
||||
// Written: David_Harris@hmc.edu, Sarah.Harris@unlv.edu, Kevin Kim <kekim@hmc.edu>
|
||||
// Created: 9 January 2021
|
||||
// Modified:
|
||||
// Modified: 6 February 2023
|
||||
//
|
||||
// Purpose: RISC-V 32/64 bit shifter
|
||||
//
|
||||
|
@ -30,48 +30,59 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module shifter (
|
||||
input logic [`XLEN-1:0] A, // Source
|
||||
input logic [`LOG_XLEN-1:0] Amt, // Shift amount
|
||||
input logic Right, Arith, W64, // Shift right, arithmetic, RV64 W-type shift
|
||||
output logic [`XLEN-1:0] Y); // Shifted result
|
||||
input logic [`XLEN-1:0] A, // shift Source
|
||||
input logic [`LOG_XLEN-1:0] Amt, // Shift amount
|
||||
input logic Right, Rotate, W64, SubArith, // Shift right, rotate, W64-type operation, arithmetic shift
|
||||
output logic [`XLEN-1:0] Y); // Shifted result
|
||||
|
||||
logic [2*`XLEN-2:0] z, zshift; // Input to funnel shifter, shifted amount before truncated to 32 or 64 bits
|
||||
logic [`LOG_XLEN-1:0] amttrunc, offset; // Shift amount adjusted for RV64, right-shift amount
|
||||
logic [2*`XLEN-2:0] Z, ZShift; // Input to funnel shifter, shifted amount before truncated to 32 or 64 bits
|
||||
logic [`LOG_XLEN-1:0] TruncAmt, Offset; // Shift amount adjusted for RV64, right-shift amount
|
||||
logic Sign; // Sign bit for sign extension
|
||||
|
||||
// Handle left and right shifts with a funnel shifter.
|
||||
// For RV32, only 32-bit shifts are needed.
|
||||
// For RV64, 32- and 64-bit shifts are needed, with sign extension.
|
||||
|
||||
// Funnel shifter input (see CMOS VLSI Design 4e Section 11.8.1, note Table 11.11 shift types wrong)
|
||||
if (`XLEN==32) begin:shifter // RV32
|
||||
always_comb // funnel mux
|
||||
if (Right)
|
||||
if (Arith) z = {{31{A[31]}}, A};
|
||||
else z = {31'b0, A};
|
||||
else z = {A, 31'b0};
|
||||
assign amttrunc = Amt; // shift amount
|
||||
end else begin:shifter // RV64
|
||||
always_comb // funnel mux
|
||||
if (W64) begin // 32-bit shifts
|
||||
if (Right)
|
||||
if (Arith) z = {64'b0, {31{A[31]}}, A[31:0]};
|
||||
else z = {95'b0, A[31:0]};
|
||||
else z = {32'b0, A[31:0], 63'b0};
|
||||
end else begin
|
||||
if (Right)
|
||||
if (Arith) z = {{63{A[63]}}, A};
|
||||
else z = {63'b0, A};
|
||||
else z = {A, 63'b0};
|
||||
end
|
||||
assign amttrunc = W64 ? {1'b0, Amt[4:0]} : Amt; // 32- or 64-bit shift
|
||||
assign Sign = A[`XLEN-1] & SubArith; // sign bit for sign extension
|
||||
if (`XLEN==32) begin // rv32
|
||||
if (`ZBB_SUPPORTED) begin: rotfunnel32 //rv32 shifter with rotates
|
||||
always_comb // funnel mux
|
||||
case({Right, Rotate})
|
||||
2'b00: Z = {A[31:0], 31'b0};
|
||||
2'b01: Z = {A[31:0], A[31:1]};
|
||||
2'b10: Z = {{31{Sign}}, A[31:0]};
|
||||
2'b11: Z = {A[30:0], A[31:0]};
|
||||
endcase
|
||||
end else begin: norotfunnel32 //rv32 shifter without rotates
|
||||
always_comb // funnel mux
|
||||
if (Right) Z = {{31{Sign}}, A[31:0]};
|
||||
else Z = {A[31:0], 31'b0};
|
||||
end
|
||||
assign TruncAmt = Amt; // shift amount
|
||||
end else begin // rv64
|
||||
logic [`XLEN-1:0] A64;
|
||||
mux3 #(64) extendmux({{32{1'b0}}, A[31:0]}, {{32{A[31]}}, A[31:0]}, A, {~W64, SubArith}, A64); // bottom 32 bits are always A[31:0], so effectively a 32-bit upper mux
|
||||
if (`ZBB_SUPPORTED) begin: rotfunnel64 // rv64 shifter with rotates
|
||||
// shifter rotate source select mux
|
||||
logic [`XLEN-1:0] RotA; // rotate source
|
||||
mux2 #(`XLEN) rotmux(A, {A[31:0], A[31:0]}, W64, RotA); // W64 rotatons
|
||||
always_comb // funnel mux
|
||||
case ({Right, Rotate})
|
||||
2'b00: Z = {A64[63:0],{63'b0}};
|
||||
2'b01: Z = {RotA[63:0], RotA[63:1]};
|
||||
2'b10: Z = {{63{Sign}}, A64[63:0]};
|
||||
2'b11: Z = {RotA[62:0], RotA[63:0]};
|
||||
endcase
|
||||
end else begin: norotfunnel64 // rv64 shifter without rotates
|
||||
always_comb // funnel mux
|
||||
if (Right) Z = {{63{Sign}}, A64[63:0]};
|
||||
else Z = {A64[63:0], {63'b0}};
|
||||
end
|
||||
assign TruncAmt = W64 ? {1'b0, Amt[4:0]} : Amt; // 32- or 64-bit shift
|
||||
end
|
||||
|
||||
|
||||
// Opposite offset for right shifts
|
||||
assign offset = Right ? amttrunc : ~amttrunc;
|
||||
assign Offset = Right ? TruncAmt : ~TruncAmt;
|
||||
|
||||
// Funnel operation
|
||||
assign zshift = z >> offset;
|
||||
assign Y = zshift[`XLEN-1:0];
|
||||
assign ZShift = Z >> Offset;
|
||||
assign Y = ZShift[`XLEN-1:0];
|
||||
endmodule
|
||||
|
||||
|
||||
|
|
|
@ -31,10 +31,10 @@
|
|||
|
||||
module RASPredictor #(parameter int StackSize = 16 )(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM,
|
||||
input logic BPReturnWrongD, // Prediction class is wrong
|
||||
input logic ReturnD,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM,
|
||||
input logic BPReturnWrongD, // Prediction class is wrong
|
||||
input logic ReturnD,
|
||||
input logic ReturnE, CallE, // Instr class
|
||||
input logic BPReturnF,
|
||||
input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a call
|
||||
|
@ -48,14 +48,14 @@ module RASPredictor #(parameter int StackSize = 16 )(
|
|||
logic [StackSize-1:0] [`XLEN-1:0] memory;
|
||||
integer index;
|
||||
|
||||
logic PopF;
|
||||
logic PushE;
|
||||
logic RepairD;
|
||||
logic IncrRepairD, DecRepairD;
|
||||
logic PopF;
|
||||
logic PushE;
|
||||
logic RepairD;
|
||||
logic IncrRepairD, DecRepairD;
|
||||
|
||||
logic DecrementPtr;
|
||||
logic FlushedReturnDE;
|
||||
logic WrongPredReturnD;
|
||||
logic DecrementPtr;
|
||||
logic FlushedReturnDE;
|
||||
logic WrongPredReturnD;
|
||||
|
||||
|
||||
assign PopF = BPReturnF & ~StallD & ~FlushD;
|
||||
|
@ -85,7 +85,7 @@ module RASPredictor #(parameter int StackSize = 16 )(
|
|||
always_ff @ (posedge clk) begin
|
||||
if(reset) begin
|
||||
for(index=0; index<StackSize; index++)
|
||||
memory[index] <= {`XLEN{1'b0}};
|
||||
memory[index] <= {`XLEN{1'b0}};
|
||||
end else if(PushE) begin
|
||||
memory[NextPtr] <= #1 PCLinkE;
|
||||
end
|
||||
|
|
|
@ -69,35 +69,33 @@ module bpred (
|
|||
output logic IClassWrongM // Class prediction is wrong
|
||||
);
|
||||
|
||||
logic [1:0] BPDirPredF;
|
||||
logic [1:0] BPDirPredF;
|
||||
|
||||
logic [`XLEN-1:0] BPBTAF, RASPCF;
|
||||
logic BPPCWrongE;
|
||||
logic IClassWrongE;
|
||||
logic BPDirPredWrongE;
|
||||
logic [`XLEN-1:0] BPBTAF, RASPCF;
|
||||
logic BPPCWrongE;
|
||||
logic IClassWrongE;
|
||||
logic BPDirPredWrongE;
|
||||
|
||||
logic BPPCSrcF;
|
||||
logic [`XLEN-1:0] BPPCF;
|
||||
logic [`XLEN-1:0] PC0NextF;
|
||||
logic [`XLEN-1:0] PCCorrectE;
|
||||
logic [3:0] WrongPredInstrClassD;
|
||||
logic BPPCSrcF;
|
||||
logic [`XLEN-1:0] BPPCF;
|
||||
logic [`XLEN-1:0] PC0NextF;
|
||||
logic [`XLEN-1:0] PCCorrectE;
|
||||
logic [3:0] WrongPredInstrClassD;
|
||||
|
||||
logic BTBTargetWrongE;
|
||||
logic RASTargetWrongE;
|
||||
logic BTBTargetWrongE;
|
||||
logic RASTargetWrongE;
|
||||
|
||||
logic [`XLEN-1:0] BPBTAD;
|
||||
logic [`XLEN-1:0] BPBTAD;
|
||||
|
||||
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
|
||||
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
|
||||
logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
|
||||
logic ReturnD, CallD;
|
||||
logic ReturnE, CallE;
|
||||
logic BranchM, JumpM, ReturnM, CallM;
|
||||
logic BranchW, JumpW, ReturnW, CallW;
|
||||
logic BPReturnWrongD;
|
||||
logic [`XLEN-1:0] BPBTAE;
|
||||
|
||||
|
||||
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
|
||||
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
|
||||
logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
|
||||
logic ReturnD, CallD;
|
||||
logic ReturnE, CallE;
|
||||
logic BranchM, JumpM, ReturnM, CallM;
|
||||
logic BranchW, JumpW, ReturnW, CallW;
|
||||
logic BPReturnWrongD;
|
||||
logic [`XLEN-1:0] BPBTAE;
|
||||
|
||||
// Part 1 branch direction prediction
|
||||
// look into the 2 port Sram model. something is wrong.
|
||||
|
@ -128,7 +126,7 @@ module bpred (
|
|||
gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
|
||||
.BranchE, .BranchM, .PCSrcE);
|
||||
|
||||
|
||||
end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor
|
||||
// *** Fix me
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
|
@ -149,25 +147,25 @@ module bpred (
|
|||
|
||||
btb #(`BTB_SIZE)
|
||||
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCF, .PCD, .PCE, .PCM,
|
||||
.BPBTAF, .BPBTAD, .BPBTAE,
|
||||
.BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}),
|
||||
.IClassWrongM, .IClassWrongE,
|
||||
.IEUAdrE, .IEUAdrM,
|
||||
.InstrClassD({CallD, ReturnD, JumpD, BranchD}),
|
||||
.InstrClassE({CallE, ReturnE, JumpE, BranchE}),
|
||||
.InstrClassM({CallM, ReturnM, JumpM, BranchM}),
|
||||
.InstrClassW({CallW, ReturnW, JumpW, BranchW}));
|
||||
.PCNextF, .PCF, .PCD, .PCE, .PCM,
|
||||
.BPBTAF, .BPBTAD, .BPBTAE,
|
||||
.BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}),
|
||||
.IClassWrongM, .IClassWrongE,
|
||||
.IEUAdrE, .IEUAdrM,
|
||||
.InstrClassD({CallD, ReturnD, JumpD, BranchD}),
|
||||
.InstrClassE({CallE, ReturnE, JumpE, BranchE}),
|
||||
.InstrClassM({CallM, ReturnM, JumpM, BranchM}),
|
||||
.InstrClassW({CallW, ReturnW, JumpW, BranchW}));
|
||||
|
||||
icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
|
||||
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF,
|
||||
.BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .BPReturnWrongD);
|
||||
.PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
|
||||
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF,
|
||||
.BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .BPReturnWrongD);
|
||||
|
||||
// Part 3 RAS
|
||||
RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
|
||||
.BPReturnF, .ReturnD, .ReturnE, .CallE,
|
||||
.BPReturnWrongD, .RASPCF, .PCLinkE);
|
||||
.BPReturnF, .ReturnD, .ReturnE, .CallE,
|
||||
.BPReturnWrongD, .RASPCF, .PCLinkE);
|
||||
|
||||
// Check the prediction
|
||||
// if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address.
|
||||
|
@ -192,11 +190,11 @@ module bpred (
|
|||
// If the fence/csrw was predicted as a taken branch then we select PCF, rather PCE.
|
||||
// Effectively this is PCM+4 or the non-existant PCLinkM
|
||||
if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE);
|
||||
else assign NextValidPCE = PCE;
|
||||
else assign NextValidPCE = PCE;
|
||||
|
||||
if(`ZICOUNTERS_SUPPORTED) begin
|
||||
logic [`XLEN-1:0] RASPCD, RASPCE;
|
||||
logic BTAWrongE, RASPredPCWrongE;
|
||||
logic [`XLEN-1:0] RASPCD, RASPCE;
|
||||
logic BTAWrongE, RASPredPCWrongE;
|
||||
// performance counters
|
||||
// 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now
|
||||
// 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal)
|
||||
|
@ -207,15 +205,15 @@ module bpred (
|
|||
// could be wrong or the fall through address selected for branch predict not taken.
|
||||
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
|
||||
// both without the above inaccuracies.
|
||||
// **** use BPBTAWrongM from BTB.
|
||||
// **** use BPBTAWrongM from BTB.
|
||||
assign BTAWrongE = (BPBTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
|
||||
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
|
||||
|
||||
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
|
||||
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
|
||||
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
|
||||
{BPDirPredWrongE, BTAWrongE, RASPredPCWrongE},
|
||||
{BPDirPredWrongM, BTAWrongM, RASPredPCWrongM});
|
||||
{BPDirPredWrongE, BTAWrongE, RASPredPCWrongE},
|
||||
{BPDirPredWrongM, BTAWrongM, RASPredPCWrongM});
|
||||
|
||||
end else begin
|
||||
assign {BTAWrongM, RASPredPCWrongM} = '0;
|
||||
|
|
|
@ -31,34 +31,34 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module btb #(parameter Depth = 10 ) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
|
||||
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages
|
||||
output logic [`XLEN-1:0] BPBTAF, // BTB's guess at PC
|
||||
output logic [`XLEN-1:0] BPBTAD,
|
||||
output logic [`XLEN-1:0] BPBTAE,
|
||||
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
|
||||
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
|
||||
// update
|
||||
input logic IClassWrongM, // BTB's instruction class guess was wrong
|
||||
input logic IClassWrongM, // BTB's instruction class guess was wrong
|
||||
input logic IClassWrongE,
|
||||
input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
|
||||
input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
|
||||
input logic [3:0] InstrClassD, // Instruction class to insert into btb
|
||||
input logic [3:0] InstrClassE, // Instruction class to insert into btb
|
||||
input logic [3:0] InstrClassM, // Instruction class to insert into btb
|
||||
input logic [3:0] InstrClassD, // Instruction class to insert into btb
|
||||
input logic [3:0] InstrClassE, // Instruction class to insert into btb
|
||||
input logic [3:0] InstrClassM, // Instruction class to insert into btb
|
||||
input logic [3:0] InstrClassW
|
||||
);
|
||||
|
||||
logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
|
||||
logic [`XLEN-1:0] ResetPC;
|
||||
logic MatchD, MatchE, MatchM, MatchW, MatchX;
|
||||
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
|
||||
logic [`XLEN+3:0] TableBTBPredF;
|
||||
logic [`XLEN-1:0] IEUAdrW;
|
||||
logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
|
||||
logic [`XLEN-1:0] ResetPC;
|
||||
logic MatchD, MatchE, MatchM, MatchW, MatchX;
|
||||
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
|
||||
logic [`XLEN+3:0] TableBTBPredF;
|
||||
logic [`XLEN-1:0] IEUAdrW;
|
||||
logic [`XLEN-1:0] PCW;
|
||||
logic BTBWrongE, BPBTAWrongE;
|
||||
logic BTBWrongM, BPBTAWrongM;
|
||||
logic BTBWrongE, BPBTAWrongE;
|
||||
logic BTBWrongM, BPBTAWrongM;
|
||||
|
||||
|
||||
// hashing function for indexing the PC
|
||||
|
@ -111,5 +111,4 @@ module btb #(parameter Depth = 10 ) (
|
|||
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
|
||||
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -42,30 +42,30 @@ module gshare #(parameter k = 10,
|
|||
input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE
|
||||
);
|
||||
|
||||
logic MatchF, MatchD, MatchE, MatchM, MatchW;
|
||||
logic MatchX;
|
||||
logic MatchF, MatchD, MatchE, MatchM, MatchW;
|
||||
logic MatchX;
|
||||
|
||||
logic [1:0] TableBPDirPredF, BPDirPredD, BPDirPredE, FwdNewDirPredF;
|
||||
logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW;
|
||||
logic [1:0] TableBPDirPredF, BPDirPredD, BPDirPredE, FwdNewDirPredF;
|
||||
logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW;
|
||||
|
||||
logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW;
|
||||
logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW;
|
||||
|
||||
logic [k-1:0] GHRF, GHRD, GHRE, GHRM;
|
||||
logic [k-1:0] GHRNextM, GHRNextF;
|
||||
logic PCSrcM;
|
||||
logic [k-1:0] GHRF, GHRD, GHRE, GHRM;
|
||||
logic [k-1:0] GHRNextM, GHRNextF;
|
||||
logic PCSrcM;
|
||||
|
||||
if(TYPE == 1) begin
|
||||
assign IndexNextF = GHRNextF ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
|
||||
assign IndexF = GHRF ^ {PCF[k+1] ^ PCF[1], PCF[k:2]};
|
||||
assign IndexD = GHRD ^ {PCD[k+1] ^ PCD[1], PCD[k:2]};
|
||||
assign IndexE = GHRE ^ {PCE[k+1] ^ PCE[1], PCE[k:2]};
|
||||
assign IndexM = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
|
||||
assign IndexNextF = GHRNextF ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
|
||||
assign IndexF = GHRF ^ {PCF[k+1] ^ PCF[1], PCF[k:2]};
|
||||
assign IndexD = GHRD ^ {PCD[k+1] ^ PCD[1], PCD[k:2]};
|
||||
assign IndexE = GHRE ^ {PCE[k+1] ^ PCE[1], PCE[k:2]};
|
||||
assign IndexM = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
|
||||
end else if(TYPE == 0) begin
|
||||
assign IndexNextF = GHRNextF;
|
||||
assign IndexF = GHRF;
|
||||
assign IndexD = GHRD;
|
||||
assign IndexE = GHRE;
|
||||
assign IndexM = GHRM;
|
||||
assign IndexNextF = GHRNextF;
|
||||
assign IndexF = GHRF;
|
||||
assign IndexD = GHRD;
|
||||
assign IndexE = GHRE;
|
||||
assign IndexM = GHRM;
|
||||
end
|
||||
|
||||
flopenrc #(k) IndexWReg(clk, reset, FlushW, ~StallW, IndexM, IndexW);
|
||||
|
@ -79,7 +79,7 @@ module gshare #(parameter k = 10,
|
|||
assign FwdNewDirPredF = MatchD ? {2{BPDirPredD[1]}} :
|
||||
MatchE ? {NewBPDirPredE} :
|
||||
MatchM ? {NewBPDirPredM} :
|
||||
NewBPDirPredW ;
|
||||
NewBPDirPredW ;
|
||||
|
||||
assign BPDirPredF = MatchX ? FwdNewDirPredF : TableBPDirPredF;
|
||||
|
||||
|
|
|
@ -42,20 +42,20 @@ module gsharebasic #(parameter k = 10,
|
|||
input logic BranchE, BranchM, PCSrcE
|
||||
);
|
||||
|
||||
logic [k-1:0] IndexNextF, IndexM;
|
||||
logic [1:0] BPDirPredD, BPDirPredE;
|
||||
logic [1:0] NewBPDirPredE, NewBPDirPredM;
|
||||
logic [k-1:0] IndexNextF, IndexM;
|
||||
logic [1:0] BPDirPredD, BPDirPredE;
|
||||
logic [1:0] NewBPDirPredE, NewBPDirPredM;
|
||||
|
||||
logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR;
|
||||
logic [k-1:0] GHRNext;
|
||||
logic PCSrcM;
|
||||
logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR;
|
||||
logic [k-1:0] GHRNext;
|
||||
logic PCSrcM;
|
||||
|
||||
if(TYPE == 1) begin
|
||||
assign IndexNextF = GHR ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
|
||||
assign IndexM = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
|
||||
assign IndexNextF = GHR ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
|
||||
assign IndexM = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
|
||||
end else if(TYPE == 0) begin
|
||||
assign IndexNextF = GHRNext;
|
||||
assign IndexM = GHRM;
|
||||
assign IndexNextF = GHRNext;
|
||||
assign IndexM = GHRM;
|
||||
end
|
||||
|
||||
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
|
||||
|
|
|
@ -45,16 +45,16 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)(
|
|||
output logic IClassWrongM, BPReturnWrongD, IClassWrongE
|
||||
);
|
||||
|
||||
logic IClassWrongD;
|
||||
logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
|
||||
logic IClassWrongD;
|
||||
logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
|
||||
|
||||
if (!INSTR_CLASS_PRED) begin : DirectClassDecode
|
||||
// This section is mainly for testing, verification, and PPA comparison.
|
||||
// An alternative to using the BTB to store the instruction class is to partially decode
|
||||
// the instructions in the Fetch stage into, Call, Return, Jump, and Branch instructions.
|
||||
// This logic is not described in the text book as of 23 February 2023.
|
||||
logic ccall, cj, cjr, ccallr, CJumpF, CBranchF;
|
||||
logic NCJumpF, NCBranchF;
|
||||
logic ccall, cj, cjr, ccallr, CJumpF, CBranchF;
|
||||
logic NCJumpF, NCBranchF;
|
||||
|
||||
if(`C_SUPPORTED) begin
|
||||
logic [4:0] CompressedOpcF;
|
||||
|
@ -75,10 +75,10 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)(
|
|||
assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF);
|
||||
assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF));
|
||||
assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5
|
||||
(`C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
|
||||
(`C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
|
||||
|
||||
assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5
|
||||
(`C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
|
||||
(`C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
|
||||
|
||||
end else begin
|
||||
// This section connects the BTB's instruction class prediction.
|
||||
|
|
196
src/ifu/ifu.sv
196
src/ifu/ifu.sv
|
@ -28,116 +28,116 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module ifu (
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushD, FlushE, FlushM, FlushW,
|
||||
output logic IFUStallF, // IFU stalsl pipeline during a multicycle operation
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushD, FlushE, FlushM, FlushW,
|
||||
output logic IFUStallF, // IFU stalsl pipeline during a multicycle operation
|
||||
// Command from CPU
|
||||
input logic InvalidateICacheM, // Clears all instruction cache valid bits
|
||||
input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE)
|
||||
input logic InstrValidD, InstrValidE, InstrValidM,
|
||||
input logic BranchD, BranchE,
|
||||
input logic JumpD, JumpE,
|
||||
// Bus interface
|
||||
output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU
|
||||
input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU
|
||||
input logic IFUHREADY, // Bus ready from IFU to EBU
|
||||
output logic IFUHWRITE, // Bus write operation from IFU to EBU
|
||||
output logic [2:0] IFUHSIZE, // Bus operation size from IFU to EBU
|
||||
output logic [2:0] IFUHBURST, // Bus burst from IFU to EBU
|
||||
output logic [1:0] IFUHTRANS, // Bus transaction type from IFU to EBU
|
||||
input logic InvalidateICacheM, // Clears all instruction cache valid bits
|
||||
input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE)
|
||||
input logic InstrValidD, InstrValidE, InstrValidM,
|
||||
input logic BranchD, BranchE,
|
||||
input logic JumpD, JumpE,
|
||||
// Bus interface
|
||||
output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU
|
||||
input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU
|
||||
input logic IFUHREADY, // Bus ready from IFU to EBU
|
||||
output logic IFUHWRITE, // Bus write operation from IFU to EBU
|
||||
output logic [2:0] IFUHSIZE, // Bus operation size from IFU to EBU
|
||||
output logic [2:0] IFUHBURST, // Bus burst from IFU to EBU
|
||||
output logic [1:0] IFUHTRANS, // Bus transaction type from IFU to EBU
|
||||
|
||||
output logic [`XLEN-1:0] PCSpillF, // PCF with possible + 2 to handle spill to HPTW
|
||||
output logic [`XLEN-1:0] PCSpillF, // PCF with possible + 2 to handle spill to HPTW
|
||||
// Execute
|
||||
output logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||
input logic PCSrcE, // Executation stage branch is taken
|
||||
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
|
||||
input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address
|
||||
output logic [`XLEN-1:0] PCE, // Execution stage instruction address
|
||||
output logic BPWrongE, // Prediction is wrong
|
||||
output logic BPWrongM, // Prediction is wrong
|
||||
output logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||
input logic PCSrcE, // Executation stage branch is taken
|
||||
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
|
||||
input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address
|
||||
output logic [`XLEN-1:0] PCE, // Execution stage instruction address
|
||||
output logic BPWrongE, // Prediction is wrong
|
||||
output logic BPWrongM, // Prediction is wrong
|
||||
// Mem
|
||||
output logic CommittedF, // I$ or bus memory operation started, delay interrupts
|
||||
input logic [`XLEN-1:0] UnalignedPCNextF, // The next PCF, but not aligned to 2 bytes.
|
||||
output logic [`XLEN-1:0] PC2NextF, // Selected PC between branch prediction and next valid PC if CSRWriteFence
|
||||
output logic [31:0] InstrD, // The decoded instruction in Decode stage
|
||||
output logic [31:0] InstrM, // The decoded instruction in Memory stage
|
||||
output logic [31:0] InstrOrigM, // Original compressed or uncompressed instruction in Memory stage for Illegal Instruction MTVAL
|
||||
output logic [`XLEN-1:0] PCM, // Memory stage instruction address
|
||||
output logic CommittedF, // I$ or bus memory operation started, delay interrupts
|
||||
input logic [`XLEN-1:0] UnalignedPCNextF, // The next PCF, but not aligned to 2 bytes.
|
||||
output logic [`XLEN-1:0] PC2NextF, // Selected PC between branch prediction and next valid PC if CSRWriteFence
|
||||
output logic [31:0] InstrD, // The decoded instruction in Decode stage
|
||||
output logic [31:0] InstrM, // The decoded instruction in Memory stage
|
||||
output logic [31:0] InstrOrigM, // Original compressed or uncompressed instruction in Memory stage for Illegal Instruction MTVAL
|
||||
output logic [`XLEN-1:0] PCM, // Memory stage instruction address
|
||||
// branch predictor
|
||||
output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
|
||||
output logic BPDirPredWrongM, // Prediction direction is wrong
|
||||
output logic BTAWrongM, // Prediction target wrong
|
||||
output logic RASPredPCWrongM, // RAS prediction is wrong
|
||||
output logic IClassWrongM, // Class prediction is wrong
|
||||
output logic ICacheStallF, // I$ busy with multicycle operation
|
||||
output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
|
||||
output logic BPDirPredWrongM, // Prediction direction is wrong
|
||||
output logic BTAWrongM, // Prediction target wrong
|
||||
output logic RASPredPCWrongM, // RAS prediction is wrong
|
||||
output logic IClassWrongM, // Class prediction is wrong
|
||||
output logic ICacheStallF, // I$ busy with multicycle operation
|
||||
// Faults
|
||||
input logic IllegalBaseInstrD, // Illegal non-compressed instruction
|
||||
input logic IllegalFPUInstrD, // Illegal FP instruction
|
||||
output logic InstrPageFaultF, // Instruction page fault
|
||||
output logic IllegalIEUFPUInstrD, // Illegal instruction including compressed & FP
|
||||
output logic InstrMisalignedFaultM, // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed)
|
||||
input logic IllegalBaseInstrD, // Illegal non-compressed instruction
|
||||
input logic IllegalFPUInstrD, // Illegal FP instruction
|
||||
output logic InstrPageFaultF, // Instruction page fault
|
||||
output logic IllegalIEUFPUInstrD, // Illegal instruction including compressed & FP
|
||||
output logic InstrMisalignedFaultM, // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed)
|
||||
// mmu management
|
||||
input logic [1:0] PrivilegeModeW, // Priviledge mode in Writeback stage
|
||||
input logic [`XLEN-1:0] PTE, // Hardware page table walker (HPTW) writes Page table entry (PTE) to ITLB
|
||||
input logic [1:0] PageType, // Hardware page table walker (HPTW) writes PageType to ITLB
|
||||
input logic ITLBWriteF, // Writes PTE and PageType to ITLB
|
||||
input logic [`XLEN-1:0] SATP_REGW, // Location of the root page table and page table configuration
|
||||
input logic STATUS_MXR, // Status CSR: make executable page readable
|
||||
input logic STATUS_SUM, // Status CSR: Supervisor access to user memory
|
||||
input logic STATUS_MPRV, // Status CSR: modify machine privilege
|
||||
input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level
|
||||
input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries
|
||||
output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk
|
||||
output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits
|
||||
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit
|
||||
input var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit
|
||||
output logic InstrAccessFaultF, // Instruction access fault
|
||||
output logic ICacheAccess, // Report I$ read to performance counters
|
||||
output logic ICacheMiss // Report I$ miss to performance counters
|
||||
input logic [1:0] PrivilegeModeW, // Priviledge mode in Writeback stage
|
||||
input logic [`XLEN-1:0] PTE, // Hardware page table walker (HPTW) writes Page table entry (PTE) to ITLB
|
||||
input logic [1:0] PageType, // Hardware page table walker (HPTW) writes PageType to ITLB
|
||||
input logic ITLBWriteF, // Writes PTE and PageType to ITLB
|
||||
input logic [`XLEN-1:0] SATP_REGW, // Location of the root page table and page table configuration
|
||||
input logic STATUS_MXR, // Status CSR: make executable page readable
|
||||
input logic STATUS_SUM, // Status CSR: Supervisor access to user memory
|
||||
input logic STATUS_MPRV, // Status CSR: modify machine privilege
|
||||
input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level
|
||||
input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries
|
||||
output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk
|
||||
output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits
|
||||
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit
|
||||
input var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit
|
||||
output logic InstrAccessFaultF, // Instruction access fault
|
||||
output logic ICacheAccess, // Report I$ read to performance counters
|
||||
output logic ICacheMiss // Report I$ miss to performance counters
|
||||
);
|
||||
|
||||
localparam [31:0] nop = 32'h00000013; // instruction for NOP
|
||||
localparam [31:0] nop = 32'h00000013; // instruction for NOP
|
||||
|
||||
logic [`XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4
|
||||
logic BranchMisalignedFaultE; // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed)
|
||||
logic [`XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed)
|
||||
logic [`XLEN-1:0] PCSpillNextF; // Next PCF after possible + 2 to handle spill
|
||||
logic [`XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j)
|
||||
logic [`XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F
|
||||
logic [`XLEN-1:0] PCD; // Decode stage instruction address
|
||||
logic [`XLEN-1:0] NextValidPCE; // The PC of the next valid instruction in the pipeline after csr write or fence
|
||||
logic [`XLEN-1:0] PCF; // Fetch stage instruction address
|
||||
logic [`PA_BITS-1:0] PCPF; // Physical address after address translation
|
||||
logic [`XLEN+1:0] PCFExt; //
|
||||
logic [`XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4
|
||||
logic BranchMisalignedFaultE; // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed)
|
||||
logic [`XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed)
|
||||
logic [`XLEN-1:0] PCSpillNextF; // Next PCF after possible + 2 to handle spill
|
||||
logic [`XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j)
|
||||
logic [`XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F
|
||||
logic [`XLEN-1:0] PCD; // Decode stage instruction address
|
||||
logic [`XLEN-1:0] NextValidPCE; // The PC of the next valid instruction in the pipeline after csr write or fence
|
||||
logic [`XLEN-1:0] PCF; // Fetch stage instruction address
|
||||
logic [`PA_BITS-1:0] PCPF; // Physical address after address translation
|
||||
logic [`XLEN+1:0] PCFExt; //
|
||||
|
||||
logic [31:0] IROMInstrF; // Instruction from the IROM
|
||||
logic [31:0] ICacheInstrF; // Instruction from the I$
|
||||
logic [31:0] InstrRawF; // Instruction from the IROM, I$, or bus
|
||||
logic CompressedF; // The fetched instruction is compressed
|
||||
logic CompressedD; // The decoded instruction is compressed
|
||||
logic CompressedE; // The execution instruction is compressed
|
||||
logic CompressedM; // The execution instruction is compressed
|
||||
logic [31:0] PostSpillInstrRawF; // Fetch instruction after merge two halves of spill
|
||||
logic [31:0] InstrRawD; // Non-decompressed instruction in the Decode stage
|
||||
logic IllegalIEUInstrD; // IEU Instruction (regular or compressed) is not good
|
||||
logic [31:0] IROMInstrF; // Instruction from the IROM
|
||||
logic [31:0] ICacheInstrF; // Instruction from the I$
|
||||
logic [31:0] InstrRawF; // Instruction from the IROM, I$, or bus
|
||||
logic CompressedF; // The fetched instruction is compressed
|
||||
logic CompressedD; // The decoded instruction is compressed
|
||||
logic CompressedE; // The execution instruction is compressed
|
||||
logic CompressedM; // The execution instruction is compressed
|
||||
logic [31:0] PostSpillInstrRawF; // Fetch instruction after merge two halves of spill
|
||||
logic [31:0] InstrRawD; // Non-decompressed instruction in the Decode stage
|
||||
logic IllegalIEUInstrD; // IEU Instruction (regular or compressed) is not good
|
||||
|
||||
logic [1:0] IFURWF; // IFU alreays read IFURWF = 10
|
||||
logic [31:0] InstrE; // Instruction in the Execution stage
|
||||
logic [31:0] NextInstrD, NextInstrE; // Instruction into the next stage after possible stage flush
|
||||
logic [1:0] IFURWF; // IFU alreays read IFURWF = 10
|
||||
logic [31:0] InstrE; // Instruction in the Execution stage
|
||||
logic [31:0] NextInstrD, NextInstrE; // Instruction into the next stage after possible stage flush
|
||||
|
||||
|
||||
logic CacheableF; // PMA indicates instruction address is cacheable
|
||||
logic SelSpillNextF; // In a spill, stall pipeline and gate local stallF
|
||||
logic BusStall; // Bus interface busy with multicycle operation
|
||||
logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation
|
||||
logic GatedStallD; // StallD gated by selected next spill
|
||||
logic CacheableF; // PMA indicates instruction address is cacheable
|
||||
logic SelSpillNextF; // In a spill, stall pipeline and gate local stallF
|
||||
logic BusStall; // Bus interface busy with multicycle operation
|
||||
logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation
|
||||
logic GatedStallD; // StallD gated by selected next spill
|
||||
// branch predictor signal
|
||||
logic [`XLEN-1:0] PC1NextF; // Branch predictor next PCF
|
||||
logic BusCommittedF; // Bus memory operation in flight, delay interrupts
|
||||
logic CacheCommittedF; // I$ memory operation started, delay interrupts
|
||||
logic SelIROM; // PMA indicates instruction address is in the IROM
|
||||
logic [15:0] InstrRawE, InstrRawM;
|
||||
logic [`XLEN-1:0] PC1NextF; // Branch predictor next PCF
|
||||
logic BusCommittedF; // Bus memory operation in flight, delay interrupts
|
||||
logic CacheCommittedF; // I$ memory operation started, delay interrupts
|
||||
logic SelIROM; // PMA indicates instruction address is in the IROM
|
||||
logic [15:0] InstrRawE, InstrRawM;
|
||||
|
||||
assign PCFExt = {2'b00, PCSpillF};
|
||||
|
||||
|
@ -208,13 +208,13 @@ module ifu (
|
|||
// delay the interrupt until the LSU is in a clean state.
|
||||
assign CommittedF = CacheCommittedF | BusCommittedF;
|
||||
|
||||
logic IgnoreRequest;
|
||||
logic IgnoreRequest;
|
||||
assign IgnoreRequest = ITLBMissF | FlushD;
|
||||
|
||||
// The IROM uses untranslated addresses, so it is not compatible with virtual memory.
|
||||
if (`IROM_SUPPORTED) begin : irom
|
||||
logic IROMce;
|
||||
assign IROMce = ~GatedStallD | reset;
|
||||
logic IROMce;
|
||||
assign IROMce = ~GatedStallD | reset;
|
||||
assign IFURWF = 2'b10;
|
||||
irom irom(.clk, .ce(IROMce), .Adr(PCSpillNextF[`XLEN-1:0]), .IROMInstrF);
|
||||
end else begin
|
||||
|
|
|
@ -27,10 +27,10 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module irom(
|
||||
input logic clk,
|
||||
input logic ce, // Chip Enable. 0: Holds IROMInstrF constant
|
||||
input logic clk,
|
||||
input logic ce, // Chip Enable. 0: Holds IROMInstrF constant
|
||||
input logic [`XLEN-1:0] Adr, // PCNextFSpill
|
||||
output logic [31:0] IROMInstrF // Instruction read data
|
||||
output logic [31:0] IROMInstrF // Instruction read data
|
||||
);
|
||||
|
||||
localparam XLENBYTES = `XLEN/8;
|
||||
|
@ -38,16 +38,16 @@ module irom(
|
|||
localparam OFFSET = $clog2(XLENBYTES);
|
||||
|
||||
logic [`XLEN-1:0] IROMInstrFFull;
|
||||
logic [31:0] RawIROMInstrF;
|
||||
logic [31:0] RawIROMInstrF;
|
||||
|
||||
logic [1:0] AdrD;
|
||||
logic [1:0] AdrD;
|
||||
flopen #(2) AdrReg(clk, ce, Adr[2:1], AdrD);
|
||||
|
||||
rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
|
||||
if (`XLEN == 32) assign RawIROMInstrF = IROMInstrFFull;
|
||||
else begin
|
||||
// IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two
|
||||
// haves. Adr is the Next PCF not PCF so we delay 1 cycle.
|
||||
// IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two
|
||||
// haves. Adr is the Next PCF not PCF so we delay 1 cycle.
|
||||
assign RawIROMInstrF = AdrD[1] ? IROMInstrFFull[63:32] : IROMInstrFFull[31:0];
|
||||
end
|
||||
// If the memory addres is aligned to 2 bytes return the upper 2 bytes in the lower 2 bytes.
|
||||
|
|
|
@ -34,31 +34,32 @@
|
|||
module spill #(
|
||||
parameter CACHE_ENABLED // Changes spill threshold to 1 if there is no cache
|
||||
)(input logic clk,
|
||||
input logic reset,
|
||||
input logic StallD, FlushD,
|
||||
input logic reset,
|
||||
input logic StallD, FlushD,
|
||||
input logic [`XLEN-1:0] PCF, // 2 byte aligned PC in Fetch stage
|
||||
input logic [`XLEN-1:2] PCPlus4F, // PCF + 4
|
||||
input logic [`XLEN-1:0] PCNextF, // The next PCF
|
||||
input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed
|
||||
input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
|
||||
input logic ITLBMissF, // ITLB miss, ignore memory request
|
||||
input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active)
|
||||
input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed
|
||||
input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
|
||||
input logic ITLBMissF, // ITLB miss, ignore memory request
|
||||
input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active)
|
||||
output logic [`XLEN-1:0] PCSpillNextF, // The next PCF for one of the two memory addresses of the spill
|
||||
output logic [`XLEN-1:0] PCSpillF, // PCF for one of the two memory addresses of the spill
|
||||
output logic SelSpillNextF, // During the transition between the two spill operations, the IFU should stall the pipeline
|
||||
output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
|
||||
output logic CompressedF); // The fetched instruction is compressed
|
||||
output logic SelSpillNextF, // During the transition between the two spill operations, the IFU should stall the pipeline
|
||||
output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
|
||||
output logic CompressedF); // The fetched instruction is compressed
|
||||
|
||||
// Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1]
|
||||
typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype;
|
||||
statetype CurrState, NextState;
|
||||
localparam SPILLTHRESHOLD = CACHE_ENABLED ? `ICACHE_LINELENINBITS/32 : 1;
|
||||
logic [`XLEN-1:0] PCPlus2F;
|
||||
logic TakeSpillF;
|
||||
logic SpillF;
|
||||
logic SelSpillF;
|
||||
logic SpillSaveF;
|
||||
logic [15:0] InstrFirstHalfF;
|
||||
|
||||
statetype CurrState, NextState;
|
||||
logic [`XLEN-1:0] PCPlus2F;
|
||||
logic TakeSpillF;
|
||||
logic SpillF;
|
||||
logic SelSpillF;
|
||||
logic SpillSaveF;
|
||||
logic [15:0] InstrFirstHalfF;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PC logic
|
||||
|
@ -109,7 +110,7 @@ module spill #(
|
|||
|
||||
// Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x
|
||||
always_comb
|
||||
if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1;
|
||||
else CompressedF = 1'b0;
|
||||
if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1;
|
||||
else CompressedF = 1'b0;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -30,14 +30,14 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module dtim(
|
||||
input logic clk,
|
||||
input logic FlushW,
|
||||
input logic ce, // Chip Enable. 0: Holds ReadDataWordM
|
||||
input logic [1:0] MemRWM, // Read/Write control
|
||||
input logic clk,
|
||||
input logic FlushW,
|
||||
input logic ce, // Chip Enable. 0: Holds ReadDataWordM
|
||||
input logic [1:0] MemRWM, // Read/Write control
|
||||
input logic [`PA_BITS-1:0] DTIMAdr, // No stall: Execution stage memory address. Stall: Memory stage memory address
|
||||
input logic [`LLEN-1:0] WriteDataM, // Write data from IEU
|
||||
input logic [`LLEN-1:0] WriteDataM, // Write data from IEU
|
||||
input logic [`LLEN/8-1:0] ByteMaskM, // Selects which bytes within a word to write
|
||||
output logic [`LLEN-1:0] ReadDataWordM // Read data before subword selection
|
||||
output logic [`LLEN-1:0] ReadDataWordM // Read data before subword selection
|
||||
);
|
||||
|
||||
logic we;
|
||||
|
|
|
@ -37,17 +37,17 @@ module lrsc(
|
|||
input logic MemReadM, // Memory read
|
||||
input logic [1:0] PreLSURWM, // Memory operation from the HPTW or IEU [1]: read, [0]: write
|
||||
output logic [1:0] LSURWM, // Memory operation after potential squash of SC
|
||||
input logic [1:0] LSUAtomicM, // Atomic memory operaiton
|
||||
input logic [1:0] LSUAtomicM, // Atomic memory operaiton
|
||||
input logic [`PA_BITS-1:0] PAdrM, // Physical memory address
|
||||
output logic SquashSCW // Squash the store conditional by not allowing rf write
|
||||
);
|
||||
|
||||
// possible bug: *** double check if PreLSURWM needs to be flushed by ignorerequest.
|
||||
// Handle atomic load reserved / store conditional
|
||||
logic [`PA_BITS-1:2] ReservationPAdrW;
|
||||
logic ReservationValidM, ReservationValidW;
|
||||
logic lrM, scM, WriteAdrMatchM;
|
||||
logic SquashSCM;
|
||||
logic [`PA_BITS-1:2] ReservationPAdrW;
|
||||
logic ReservationValidM, ReservationValidW;
|
||||
logic lrM, scM, WriteAdrMatchM;
|
||||
logic SquashSCM;
|
||||
|
||||
assign lrM = MemReadM & LSUAtomicM[0];
|
||||
assign scM = PreLSURWM[0] & LSUAtomicM[0];
|
||||
|
@ -56,7 +56,7 @@ module lrsc(
|
|||
assign LSURWM = SquashSCM ? 2'b00 : PreLSURWM;
|
||||
always_comb begin // ReservationValidM (next value of valid reservation)
|
||||
if (lrM) ReservationValidM = 1; // set valid on load reserve
|
||||
// if we implement multiple harts invalidate reservation if another hart stores to this reservation.
|
||||
// if we implement multiple harts invalidate reservation if another hart stores to this reservation.
|
||||
else if (scM) ReservationValidM = 0; // clear valid on store to same address or any sc
|
||||
else ReservationValidM = ReservationValidW; // otherwise don't change valid
|
||||
end
|
||||
|
|
108
src/lsu/lsu.sv
108
src/lsu/lsu.sv
|
@ -78,61 +78,61 @@ module lsu (
|
|||
output logic [`XLEN/8-1:0] LSUHWSTRB, // Bus byte write enables from LSU to EBU
|
||||
// page table walker
|
||||
input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR
|
||||
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege
|
||||
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege
|
||||
input logic [1:0] STATUS_MPP, // Machine previous privilege mode
|
||||
input logic [`XLEN-1:0] PCSpillF, // Fetch PC
|
||||
input logic [`XLEN-1:0] PCSpillF, // Fetch PC
|
||||
input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk
|
||||
input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits
|
||||
input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits
|
||||
output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB
|
||||
output logic [1:0] PageType, // Type of page table entry to write to ITLB
|
||||
output logic ITLBWriteF, // Write PTE to ITLB
|
||||
output logic SelHPTW, // During a HPTW walk the effective privilege mode becomes S_MODE
|
||||
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit
|
||||
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit
|
||||
input var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // PMP address from privileged unit
|
||||
);
|
||||
|
||||
logic [`XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer
|
||||
logic [`XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer
|
||||
logic [`PA_BITS-1:0] PAdrM; // Physical memory address
|
||||
logic [`XLEN+1:0] IHAdrM; // Either IEU or HPTW memory address
|
||||
logic [`XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer
|
||||
logic [`XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer
|
||||
logic [`PA_BITS-1:0] PAdrM; // Physical memory address
|
||||
logic [`XLEN+1:0] IHAdrM; // Either IEU or HPTW memory address
|
||||
|
||||
logic [1:0] PreLSURWM; // IEU or HPTW Read/Write signal
|
||||
logic [1:0] LSURWM; // IEU or HPTW Read/Write signal gated by LR/SC
|
||||
logic [2:0] LSUFunct3M; // IEU or HPTW memory operation size
|
||||
logic [6:0] LSUFunct7M; // AMO function gated by HPTW
|
||||
logic [1:0] LSUAtomicM; // AMO signal gated by HPTW
|
||||
logic [1:0] PreLSURWM; // IEU or HPTW Read/Write signal
|
||||
logic [1:0] LSURWM; // IEU or HPTW Read/Write signal gated by LR/SC
|
||||
logic [2:0] LSUFunct3M; // IEU or HPTW memory operation size
|
||||
logic [6:0] LSUFunct7M; // AMO function gated by HPTW
|
||||
logic [1:0] LSUAtomicM; // AMO signal gated by HPTW
|
||||
|
||||
logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1
|
||||
|
||||
logic BusStall; // Bus interface busy with multicycle operation
|
||||
logic HPTWStall; // HPTW busy with multicycle operation
|
||||
logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1
|
||||
|
||||
logic BusStall; // Bus interface busy with multicycle operation
|
||||
logic HPTWStall; // HPTW busy with multicycle operation
|
||||
|
||||
logic CacheableM; // PMA indicates memory address is cacheable
|
||||
logic BusCommittedM; // Bus memory operation in flight, delay interrupts
|
||||
logic DCacheCommittedM; // D$ memory operation started, delay interrupts
|
||||
logic CacheableM; // PMA indicates memory address is cacheable
|
||||
logic BusCommittedM; // Bus memory operation in flight, delay interrupts
|
||||
logic DCacheCommittedM; // D$ memory operation started, delay interrupts
|
||||
|
||||
logic [`LLEN-1:0] DTIMReadDataWordM; // DTIM read data
|
||||
logic [`LLEN-1:0] DCacheReadDataWordM; // D$ read data
|
||||
logic [`LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data
|
||||
logic [`LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data
|
||||
logic [`LLEN-1:0] ReadDataWordM; // Read data before subword selection
|
||||
logic [`LLEN-1:0] ReadDataM; // Final read data
|
||||
logic [`LLEN-1:0] DTIMReadDataWordM; // DTIM read data
|
||||
logic [`LLEN-1:0] DCacheReadDataWordM; // D$ read data
|
||||
logic [`LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data
|
||||
logic [`LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data
|
||||
logic [`LLEN-1:0] ReadDataWordM; // Read data before subword selection
|
||||
logic [`LLEN-1:0] ReadDataM; // Final read data
|
||||
|
||||
logic [`XLEN-1:0] IHWriteDataM; // IEU or HPTW write data
|
||||
logic [`XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data
|
||||
logic [`LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data
|
||||
logic [`LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data
|
||||
logic [`LLEN-1:0] LSUWriteDataM; // Final write data
|
||||
logic [(`LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write
|
||||
logic [`XLEN-1:0] IHWriteDataM; // IEU or HPTW write data
|
||||
logic [`XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data
|
||||
logic [`LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data
|
||||
logic [`LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data
|
||||
logic [`LLEN-1:0] LSUWriteDataM; // Final write data
|
||||
logic [(`LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write
|
||||
|
||||
logic DTLBMissM; // DTLB miss causes HPTW walk
|
||||
logic DTLBWriteM; // Writes PTE and PageType to DTLB
|
||||
logic DataUpdateDAM; // DTLB hit needs to update dirty or access bits
|
||||
logic LSULoadAccessFaultM; // Load acces fault
|
||||
logic LSUStoreAmoAccessFaultM; // Store access fault
|
||||
logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle
|
||||
logic IgnoreRequest; // On FlushM or TLB miss ignore memory operation
|
||||
logic SelDTIM; // Select DTIM rather than bus or D$
|
||||
logic DTLBMissM; // DTLB miss causes HPTW walk
|
||||
logic DTLBWriteM; // Writes PTE and PageType to DTLB
|
||||
logic DataUpdateDAM; // DTLB hit needs to update dirty or access bits
|
||||
logic LSULoadAccessFaultM; // Load acces fault
|
||||
logic LSUStoreAmoAccessFaultM; // Store access fault
|
||||
logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle
|
||||
logic IgnoreRequest; // On FlushM or TLB miss ignore memory operation
|
||||
logic SelDTIM; // Select DTIM rather than bus or D$
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -164,8 +164,8 @@ module lsu (
|
|||
assign PreLSURWM = MemRWM;
|
||||
assign IHAdrM = IEUAdrExtM;
|
||||
assign LSUFunct3M = Funct3M;
|
||||
assign LSUFunct7M = Funct7M;
|
||||
assign LSUAtomicM = AtomicM;
|
||||
assign LSUFunct7M = Funct7M;
|
||||
assign LSUAtomicM = AtomicM;
|
||||
assign IHWriteDataM = WriteDataM;
|
||||
assign LoadAccessFaultM = LSULoadAccessFaultM;
|
||||
assign StoreAmoAccessFaultM = LSUStoreAmoAccessFaultM;
|
||||
|
@ -194,7 +194,7 @@ module lsu (
|
|||
.PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM),
|
||||
.InstrAccessFaultF(), .LoadAccessFaultM(LSULoadAccessFaultM),
|
||||
.StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM,
|
||||
.StoreAmoPageFaultM,
|
||||
.StoreAmoPageFaultM,
|
||||
.LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw.
|
||||
.UpdateDA(DataUpdateDAM),
|
||||
.AtomicAccessM(|LSUAtomicM), .ExecuteAccessF(1'b0),
|
||||
|
@ -227,7 +227,7 @@ module lsu (
|
|||
logic [1:0] DTIMMemRWM;
|
||||
|
||||
// The DTIM uses untranslated addresses, so it is not compatible with virtual memory.
|
||||
mux2 #(`PA_BITS) DTIMAdrMux(IEUAdrExtE[`PA_BITS-1:0], IEUAdrExtM[`PA_BITS-1:0], MemRWM[0], DTIMAdr);
|
||||
mux2 #(`PA_BITS) DTIMAdrMux(IEUAdrExtE[`PA_BITS-1:0], IEUAdrExtM[`PA_BITS-1:0], MemRWM[0], DTIMAdr);
|
||||
assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0;
|
||||
// **** fix ReadDataWordM to be LLEN. ByteMask is wrong length.
|
||||
// **** create config to support DTIM with floating point.
|
||||
|
@ -250,18 +250,18 @@ module lsu (
|
|||
logic [AHBWLOGBWPL-1:0] BeatCount; // Position within a cacheline. ahbcacheinterface to cache
|
||||
logic DCacheBusAck; // ahbcacheinterface completed fetch or writeback
|
||||
logic SelBusBeat; // ahbcacheinterface selects postion in cacheline with BeatCount
|
||||
logic [1:0] CacheBusRW; // Cache sends request to ahbcacheinterface
|
||||
logic [1:0] BusRW; // Uncached bus memory access
|
||||
logic [1:0] CacheBusRW; // Cache sends request to ahbcacheinterface
|
||||
logic [1:0] BusRW; // Uncached bus memory access
|
||||
logic CacheableOrFlushCacheM; // Memory address is cacheable or operation is a cache flush
|
||||
logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11)
|
||||
logic [1:0] CacheAtomicM; // Cache AMO
|
||||
logic FlushDCache; // Suppress d cache flush if there is an ITLB miss.
|
||||
logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11)
|
||||
logic [1:0] CacheAtomicM; // Cache AMO
|
||||
logic FlushDCache; // Suppress d cache flush if there is an ITLB miss.
|
||||
|
||||
assign BusRW = ~CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0;
|
||||
assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM;
|
||||
assign CacheRWM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0;
|
||||
assign CacheAtomicM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSUAtomicM : '0;
|
||||
assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW);
|
||||
assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW);
|
||||
|
||||
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
|
||||
.NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .READ_ONLY_CACHE(0)) dcache(
|
||||
|
@ -285,8 +285,8 @@ module lsu (
|
|||
.Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW),
|
||||
.BusStall, .BusCommitted(BusCommittedM));
|
||||
|
||||
// Mux between the 3 sources of read data, 0: cache, 1: Bus, 2: DTIM
|
||||
// Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times.
|
||||
// Mux between the 3 sources of read data, 0: cache, 1: Bus, 2: DTIM
|
||||
// Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times.
|
||||
// *** DTIMReadDataWordM should be increased to LLEN.
|
||||
// pma should generate exception for LLEN read to periph.
|
||||
mux3 #(`LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[`XLEN-1:0]}}),
|
||||
|
@ -305,7 +305,7 @@ module lsu (
|
|||
.HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM),
|
||||
.Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer));
|
||||
|
||||
// Mux between the 2 sources of read data, 0: Bus, 1: DTIM
|
||||
// Mux between the 2 sources of read data, 0: Bus, 1: DTIM
|
||||
if(`DTIM_SUPPORTED) mux2 #(`XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM);
|
||||
else assign ReadDataWordMuxM = FetchBuffer[`XLEN-1:0];
|
||||
assign LSUHBURST = 3'b0;
|
||||
|
@ -338,7 +338,7 @@ module lsu (
|
|||
// Subword Accesses
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
subwordread subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM,
|
||||
.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
|
||||
.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
|
||||
subwordwrite subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM);
|
||||
|
||||
// Compute byte masks
|
||||
|
|
|
@ -31,16 +31,16 @@
|
|||
|
||||
module subwordread
|
||||
(
|
||||
input logic [`LLEN-1:0] ReadDataWordMuxM,
|
||||
input logic [2:0] PAdrM,
|
||||
input logic [2:0] Funct3M,
|
||||
input logic [`LLEN-1:0] ReadDataWordMuxM,
|
||||
input logic [2:0] PAdrM,
|
||||
input logic [2:0] Funct3M,
|
||||
input logic FpLoadStoreM,
|
||||
input logic BigEndianM,
|
||||
output logic [`LLEN-1:0] ReadDataM
|
||||
);
|
||||
|
||||
logic [7:0] ByteM;
|
||||
logic [15:0] HalfwordM;
|
||||
logic [7:0] ByteM;
|
||||
logic [15:0] HalfwordM;
|
||||
logic [2:0] PAdrSwap;
|
||||
// Funct3M[2] is the unsigned bit. mask upper bits.
|
||||
// Funct3M[1:0] is the size of the memory access.
|
||||
|
@ -87,11 +87,11 @@ module subwordread
|
|||
3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
|
||||
3'b010: ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw
|
||||
3'b011: ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld
|
||||
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
|
||||
// 3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128
|
||||
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
|
||||
3'b110: ReadDataM = {{`LLEN-32{1'b0}}, WordM[31:0]}; // lwu
|
||||
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
|
||||
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
|
||||
//3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128
|
||||
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
|
||||
3'b110: ReadDataM = {{`LLEN-32{1'b0}}, WordM[31:0]}; // lwu
|
||||
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
|
||||
endcase
|
||||
|
||||
end else begin:swrmux // 32-bit
|
||||
|
@ -114,13 +114,13 @@ module subwordread
|
|||
// sign extension
|
||||
always_comb
|
||||
case(Funct3M)
|
||||
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
|
||||
3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
|
||||
3'b010: ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw
|
||||
3'b011: ReadDataM = ReadDataWordMuxM; // fld
|
||||
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
|
||||
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
|
||||
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
|
||||
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
|
||||
3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
|
||||
3'b010: ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw
|
||||
3'b011: ReadDataM = ReadDataWordMuxM; // fld
|
||||
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
|
||||
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
|
||||
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
|
||||
endcase
|
||||
end
|
||||
endmodule
|
||||
|
|
|
@ -36,7 +36,7 @@ module div(
|
|||
input logic IntDivE, // integer division/remainder instruction of any type
|
||||
input logic DivSignedE, // signed division
|
||||
input logic W64E, // W-type instructions (divw, divuw, remw, remuw)
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Forwarding mux outputs for Source A and B
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE,// Forwarding mux outputs for Source A and B
|
||||
output logic DivBusyE, // Divide is busy - stall pipeline
|
||||
output logic [`XLEN-1:0] QuotM, RemM // Quotient and remainder outputs
|
||||
);
|
||||
|
@ -76,7 +76,7 @@ module div(
|
|||
mux2 #(`XLEN) dinmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31]&DivSignedE}}, ForwardedSrcBE[31:0]}, W64E, DinE);
|
||||
end else begin // RV32 has no W-type instructions
|
||||
assign XinE = ForwardedSrcAE;
|
||||
assign DinE = ForwardedSrcBE;
|
||||
assign DinE = ForwardedSrcBE;
|
||||
end
|
||||
|
||||
// Extract sign bits and check fo division by zero
|
||||
|
|
100
src/mdu/mdu.sv
100
src/mdu/mdu.sv
|
@ -29,62 +29,62 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module mdu(
|
||||
input logic clk, reset,
|
||||
input logic StallM, StallW,
|
||||
input logic FlushE, FlushM, FlushW,
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output
|
||||
input logic [2:0] Funct3E, Funct3M, // type of MDU operation
|
||||
input logic IntDivE, W64E, // Integer division/remainder, and W-type instrutions
|
||||
output logic [`XLEN-1:0] MDUResultW, // multiply/divide result
|
||||
output logic DivBusyE // busy signal to stall pipeline in Execute stage
|
||||
input logic clk, reset,
|
||||
input logic StallM, StallW,
|
||||
input logic FlushE, FlushM, FlushW,
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output
|
||||
input logic [2:0] Funct3E, Funct3M, // type of MDU operation
|
||||
input logic IntDivE, W64E, // Integer division/remainder, and W-type instrutions
|
||||
output logic [`XLEN-1:0] MDUResultW, // multiply/divide result
|
||||
output logic DivBusyE // busy signal to stall pipeline in Execute stage
|
||||
);
|
||||
|
||||
logic [`XLEN*2-1:0] ProdM; // double-width product from mul
|
||||
logic [`XLEN-1:0] QuotM, RemM; // quotient and remainder from intdivrestoring
|
||||
logic [`XLEN-1:0] PrelimResultM; // selected result before W truncation
|
||||
logic [`XLEN-1:0] MDUResultM; // result after W truncation
|
||||
logic W64M; // W-type instruction
|
||||
logic [`XLEN*2-1:0] ProdM; // double-width product from mul
|
||||
logic [`XLEN-1:0] QuotM, RemM; // quotient and remainder from intdivrestoring
|
||||
logic [`XLEN-1:0] PrelimResultM; // selected result before W truncation
|
||||
logic [`XLEN-1:0] MDUResultM; // result after W truncation
|
||||
logic W64M; // W-type instruction
|
||||
|
||||
// Multiplier
|
||||
mul mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM);
|
||||
// Multiplier
|
||||
mul mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM);
|
||||
|
||||
// Divider
|
||||
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
|
||||
// When IDIV_ON_FPU is set, use the FPU divider instead
|
||||
// In ZMMUL, with M_SUPPORTED = 0, omit the divider
|
||||
if ((`IDIV_ON_FPU) || (!`M_SUPPORTED)) begin:nodiv
|
||||
assign QuotM = 0;
|
||||
assign RemM = 0;
|
||||
assign DivBusyE = 0;
|
||||
end else begin:div
|
||||
div div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
|
||||
end
|
||||
|
||||
// Result multiplexer
|
||||
// For ZMMUL, QuotM and RemM are tied to 0, so the mux automatically simplifies
|
||||
always_comb
|
||||
case (Funct3M)
|
||||
3'b000: PrelimResultM = ProdM[`XLEN-1:0]; // mul
|
||||
3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulh
|
||||
3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulhsu
|
||||
3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulhu
|
||||
3'b100: PrelimResultM = QuotM; // div
|
||||
3'b101: PrelimResultM = QuotM; // divu
|
||||
3'b110: PrelimResultM = RemM; // rem
|
||||
3'b111: PrelimResultM = RemM; // remu
|
||||
endcase
|
||||
// Divider
|
||||
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
|
||||
// When IDIV_ON_FPU is set, use the FPU divider instead
|
||||
// In ZMMUL, with M_SUPPORTED = 0, omit the divider
|
||||
if ((`IDIV_ON_FPU) || (!`M_SUPPORTED)) begin:nodiv
|
||||
assign QuotM = 0;
|
||||
assign RemM = 0;
|
||||
assign DivBusyE = 0;
|
||||
end else begin:div
|
||||
div div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
|
||||
end
|
||||
|
||||
// Result multiplexer
|
||||
// For ZMMUL, QuotM and RemM are tied to 0, so the mux automatically simplifies
|
||||
always_comb
|
||||
case (Funct3M)
|
||||
3'b000: PrelimResultM = ProdM[`XLEN-1:0]; // mul
|
||||
3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulh
|
||||
3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulhsu
|
||||
3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; // mulhu
|
||||
3'b100: PrelimResultM = QuotM; // div
|
||||
3'b101: PrelimResultM = QuotM; // divu
|
||||
3'b110: PrelimResultM = RemM; // rem
|
||||
3'b111: PrelimResultM = RemM; // remu
|
||||
endcase
|
||||
|
||||
// Handle sign extension for W-type instructions
|
||||
flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M);
|
||||
if (`XLEN == 64) begin:resmux // RV64 has W-type instructions
|
||||
assign MDUResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM;
|
||||
end else begin:resmux // RV32 has no W-type instructions
|
||||
assign MDUResultM = PrelimResultM;
|
||||
end
|
||||
// Handle sign extension for W-type instructions
|
||||
flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M);
|
||||
if (`XLEN == 64) begin:resmux // RV64 has W-type instructions
|
||||
assign MDUResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM;
|
||||
end else begin:resmux // RV32 has no W-type instructions
|
||||
assign MDUResultM = PrelimResultM;
|
||||
end
|
||||
|
||||
// Writeback stage pipeline register
|
||||
flopenrc #(`XLEN) MDUResultWReg(clk, reset, FlushW, ~StallW, MDUResultM, MDUResultW);
|
||||
// Writeback stage pipeline register
|
||||
flopenrc #(`XLEN) MDUResultWReg(clk, reset, FlushW, ~StallW, MDUResultM, MDUResultW);
|
||||
endmodule // mdu
|
||||
|
||||
|
||||
|
|
354
src/mmu/hptw.sv
354
src/mmu/hptw.sv
|
@ -32,118 +32,118 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module hptw (
|
||||
input logic clk, reset,
|
||||
input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table
|
||||
input logic [`XLEN-1:0] PCSpillF, // addresses to translate
|
||||
input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate
|
||||
input logic [1:0] MemRWM, AtomicM,
|
||||
// system status
|
||||
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
|
||||
input logic [1:0] STATUS_MPP,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] ReadDataM, // page table entry from LSU
|
||||
input logic [`XLEN-1:0] WriteDataM,
|
||||
input logic DCacheStallM, // stall from LSU
|
||||
input logic [2:0] Funct3M,
|
||||
input logic [6:0] Funct7M,
|
||||
input logic ITLBMissF,
|
||||
input logic DTLBMissM,
|
||||
input logic FlushW,
|
||||
input logic InstrUpdateDAF,
|
||||
input logic DataUpdateDAM,
|
||||
output logic [`XLEN-1:0] PTE, // page table entry to TLBs
|
||||
output logic [1:0] PageType, // page type to TLBs
|
||||
output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry
|
||||
output logic [1:0] PreLSURWM,
|
||||
output logic [`XLEN+1:0] IHAdrM,
|
||||
output logic [`XLEN-1:0] IHWriteDataM,
|
||||
output logic [1:0] LSUAtomicM,
|
||||
output logic [2:0] LSUFunct3M,
|
||||
output logic [6:0] LSUFunct7M,
|
||||
output logic IgnoreRequestTLB,
|
||||
output logic SelHPTW,
|
||||
output logic HPTWStall,
|
||||
input logic LSULoadAccessFaultM, LSUStoreAmoAccessFaultM,
|
||||
output logic LoadAccessFaultM, StoreAmoAccessFaultM, HPTWInstrAccessFaultM
|
||||
input logic clk, reset,
|
||||
input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table
|
||||
input logic [`XLEN-1:0] PCSpillF, // addresses to translate
|
||||
input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate
|
||||
input logic [1:0] MemRWM, AtomicM,
|
||||
// system status
|
||||
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
|
||||
input logic [1:0] STATUS_MPP,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] ReadDataM, // page table entry from LSU
|
||||
input logic [`XLEN-1:0] WriteDataM,
|
||||
input logic DCacheStallM, // stall from LSU
|
||||
input logic [2:0] Funct3M,
|
||||
input logic [6:0] Funct7M,
|
||||
input logic ITLBMissF,
|
||||
input logic DTLBMissM,
|
||||
input logic FlushW,
|
||||
input logic InstrUpdateDAF,
|
||||
input logic DataUpdateDAM,
|
||||
output logic [`XLEN-1:0] PTE, // page table entry to TLBs
|
||||
output logic [1:0] PageType, // page type to TLBs
|
||||
output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry
|
||||
output logic [1:0] PreLSURWM,
|
||||
output logic [`XLEN+1:0] IHAdrM,
|
||||
output logic [`XLEN-1:0] IHWriteDataM,
|
||||
output logic [1:0] LSUAtomicM,
|
||||
output logic [2:0] LSUFunct3M,
|
||||
output logic [6:0] LSUFunct7M,
|
||||
output logic IgnoreRequestTLB,
|
||||
output logic SelHPTW,
|
||||
output logic HPTWStall,
|
||||
input logic LSULoadAccessFaultM, LSUStoreAmoAccessFaultM,
|
||||
output logic LoadAccessFaultM, StoreAmoAccessFaultM, HPTWInstrAccessFaultM
|
||||
);
|
||||
|
||||
typedef enum logic [3:0] {L0_ADR, L0_RD,
|
||||
L1_ADR, L1_RD,
|
||||
L2_ADR, L2_RD,
|
||||
L3_ADR, L3_RD,
|
||||
LEAF, IDLE, UPDATE_PTE} statetype;
|
||||
L1_ADR, L1_RD,
|
||||
L2_ADR, L2_RD,
|
||||
L3_ADR, L3_RD,
|
||||
LEAF, IDLE, UPDATE_PTE} statetype;
|
||||
|
||||
logic DTLBWalk; // register TLBs translation miss requests
|
||||
logic [`PPN_BITS-1:0] BasePageTablePPN;
|
||||
logic [`PPN_BITS-1:0] CurrentPPN;
|
||||
logic Executable, Writable, Readable, Valid, PTE_U;
|
||||
logic Misaligned, MegapageMisaligned;
|
||||
logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
|
||||
logic StartWalk;
|
||||
logic TLBMiss;
|
||||
logic PRegEn;
|
||||
logic [1:0] NextPageType;
|
||||
logic DTLBWalk; // register TLBs translation miss requests
|
||||
logic [`PPN_BITS-1:0] BasePageTablePPN;
|
||||
logic [`PPN_BITS-1:0] CurrentPPN;
|
||||
logic Executable, Writable, Readable, Valid, PTE_U;
|
||||
logic Misaligned, MegapageMisaligned;
|
||||
logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
|
||||
logic StartWalk;
|
||||
logic TLBMiss;
|
||||
logic PRegEn;
|
||||
logic [1:0] NextPageType;
|
||||
logic [`SVMODE_BITS-1:0] SvMode;
|
||||
logic [`XLEN-1:0] TranslationVAdr;
|
||||
logic [`XLEN-1:0] NextPTE;
|
||||
logic UpdatePTE;
|
||||
logic HPTWUpdateDA;
|
||||
logic [`PA_BITS-1:0] HPTWReadAdr;
|
||||
logic SelHPTWAdr;
|
||||
logic [`XLEN+1:0] HPTWAdrExt;
|
||||
logic ITLBMissOrUpdateDAF;
|
||||
logic DTLBMissOrUpdateDAM;
|
||||
logic [`XLEN-1:0] TranslationVAdr;
|
||||
logic [`XLEN-1:0] NextPTE;
|
||||
logic UpdatePTE;
|
||||
logic HPTWUpdateDA;
|
||||
logic [`PA_BITS-1:0] HPTWReadAdr;
|
||||
logic SelHPTWAdr;
|
||||
logic [`XLEN+1:0] HPTWAdrExt;
|
||||
logic ITLBMissOrUpdateDAF;
|
||||
logic DTLBMissOrUpdateDAM;
|
||||
logic LSUAccessFaultM;
|
||||
logic [`PA_BITS-1:0] HPTWAdr;
|
||||
logic [1:0] HPTWRW;
|
||||
logic [2:0] HPTWSize; // 32 or 64 bit access
|
||||
statetype WalkerState, NextWalkerState, InitialWalkerState;
|
||||
logic [`PA_BITS-1:0] HPTWAdr;
|
||||
logic [1:0] HPTWRW;
|
||||
logic [2:0] HPTWSize; // 32 or 64 bit access
|
||||
statetype WalkerState, NextWalkerState, InitialWalkerState;
|
||||
|
||||
// map hptw access faults onto either the original LSU load/store fault or instruction access fault
|
||||
assign LSUAccessFaultM = LSULoadAccessFaultM | LSUStoreAmoAccessFaultM;
|
||||
assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[1] & ~MemRWM[0];
|
||||
assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[0];
|
||||
assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: LSUAccessFaultM & ~DTLBWalk;
|
||||
assign LSUAccessFaultM = LSULoadAccessFaultM | LSUStoreAmoAccessFaultM;
|
||||
assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[1] & ~MemRWM[0];
|
||||
assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[0];
|
||||
assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: LSUAccessFaultM & ~DTLBWalk;
|
||||
|
||||
// Extract bits from CSRs and inputs
|
||||
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
|
||||
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
|
||||
assign TLBMiss = (DTLBMissOrUpdateDAM | ITLBMissOrUpdateDAF);
|
||||
// Extract bits from CSRs and inputs
|
||||
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
|
||||
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
|
||||
assign TLBMiss = (DTLBMissOrUpdateDAM | ITLBMissOrUpdateDAF);
|
||||
|
||||
// Determine which address to translate
|
||||
mux2 #(`XLEN) vadrmux(PCSpillF, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr);
|
||||
assign CurrentPPN = PTE[`PPN_BITS+9:10];
|
||||
// Determine which address to translate
|
||||
mux2 #(`XLEN) vadrmux(PCSpillF, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr);
|
||||
assign CurrentPPN = PTE[`PPN_BITS+9:10];
|
||||
|
||||
// State flops
|
||||
flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrUpdateDAM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB)
|
||||
assign PRegEn = HPTWRW[1] & ~DCacheStallM | UpdatePTE;
|
||||
flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache
|
||||
// State flops
|
||||
flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrUpdateDAM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB)
|
||||
assign PRegEn = HPTWRW[1] & ~DCacheStallM | UpdatePTE;
|
||||
flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache
|
||||
|
||||
// Assign PTE descriptors common across all XLEN values
|
||||
// For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table
|
||||
assign {PTE_U, Executable, Writable, Readable, Valid} = PTE[4:0];
|
||||
assign LeafPTE = Executable | Writable | Readable;
|
||||
assign ValidPTE = Valid & ~(Writable & ~Readable);
|
||||
assign ValidLeafPTE = ValidPTE & LeafPTE;
|
||||
assign ValidNonLeafPTE = ValidPTE & ~LeafPTE;
|
||||
// Assign PTE descriptors common across all XLEN values
|
||||
// For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table
|
||||
assign {PTE_U, Executable, Writable, Readable, Valid} = PTE[4:0];
|
||||
assign LeafPTE = Executable | Writable | Readable;
|
||||
assign ValidPTE = Valid & ~(Writable & ~Readable);
|
||||
assign ValidLeafPTE = ValidPTE & LeafPTE;
|
||||
assign ValidNonLeafPTE = ValidPTE & ~LeafPTE;
|
||||
|
||||
if(`SVADU_SUPPORTED) begin : hptwwrites
|
||||
logic ReadAccess, WriteAccess;
|
||||
logic InvalidRead, InvalidWrite, InvalidOp;
|
||||
logic UpperBitsUnequal;
|
||||
logic OtherPageFault;
|
||||
logic [1:0] EffectivePrivilegeMode;
|
||||
logic ImproperPrivilege;
|
||||
logic SaveHPTWAdr, SelHPTWWriteAdr;
|
||||
logic [`PA_BITS-1:0] HPTWWriteAdr;
|
||||
logic SetDirty;
|
||||
logic Dirty, Accessed;
|
||||
logic [`XLEN-1:0] AccessedPTE;
|
||||
logic ReadAccess, WriteAccess;
|
||||
logic InvalidRead, InvalidWrite, InvalidOp;
|
||||
logic UpperBitsUnequal;
|
||||
logic OtherPageFault;
|
||||
logic [1:0] EffectivePrivilegeMode;
|
||||
logic ImproperPrivilege;
|
||||
logic SaveHPTWAdr, SelHPTWWriteAdr;
|
||||
logic [`PA_BITS-1:0] HPTWWriteAdr;
|
||||
logic SetDirty;
|
||||
logic Dirty, Accessed;
|
||||
logic [`XLEN-1:0] AccessedPTE;
|
||||
|
||||
assign AccessedPTE = {PTE[`XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit
|
||||
mux2 #(`XLEN) NextPTEMux(ReadDataM, AccessedPTE, UpdatePTE, NextPTE);
|
||||
assign AccessedPTE = {PTE[`XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit
|
||||
mux2 #(`XLEN) NextPTEMux(ReadDataM, AccessedPTE, UpdatePTE, NextPTE);
|
||||
flopenr #(`PA_BITS) HPTWAdrWriteReg(clk, reset, SaveHPTWAdr, HPTWReadAdr, HPTWWriteAdr);
|
||||
|
||||
|
||||
assign SaveHPTWAdr = WalkerState == L0_ADR;
|
||||
assign SelHPTWWriteAdr = UpdatePTE | HPTWRW[0];
|
||||
mux2 #(`PA_BITS) HPTWWriteAdrMux(HPTWReadAdr, HPTWWriteAdr, SelHPTWWriteAdr, HPTWAdr);
|
||||
|
@ -158,11 +158,11 @@ module hptw (
|
|||
((EffectivePrivilegeMode == `S_MODE) & PTE_U & (~STATUS_SUM & DTLBWalk));
|
||||
|
||||
// Check for page faults
|
||||
vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr),
|
||||
.SV39Mode(), .UpperBitsUnequal);
|
||||
vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr),
|
||||
.SV39Mode(), .UpperBitsUnequal);
|
||||
assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable);
|
||||
assign InvalidWrite = WriteAccess & ~Writable;
|
||||
assign InvalidOp = DTLBWalk ? (InvalidRead | InvalidWrite) : ~Executable;
|
||||
assign InvalidOp = DTLBWalk ? (InvalidRead | InvalidWrite) : ~Executable;
|
||||
assign OtherPageFault = ImproperPrivilege | InvalidOp | UpperBitsUnequal | Misaligned | ~Valid;
|
||||
|
||||
// hptw needs to know if there is a Dirty or Access fault occuring on this
|
||||
|
@ -181,62 +181,62 @@ module hptw (
|
|||
assign HPTWRW[0] = '0;
|
||||
end
|
||||
|
||||
// Enable and select signals based on states
|
||||
assign StartWalk = (WalkerState == IDLE) & TLBMiss;
|
||||
assign HPTWRW[1] = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD);
|
||||
assign DTLBWriteM = (WalkerState == LEAF & ~HPTWUpdateDA) & DTLBWalk;
|
||||
assign ITLBWriteF = (WalkerState == LEAF & ~HPTWUpdateDA) & ~DTLBWalk;
|
||||
// Enable and select signals based on states
|
||||
assign StartWalk = (WalkerState == IDLE) & TLBMiss;
|
||||
assign HPTWRW[1] = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD);
|
||||
assign DTLBWriteM = (WalkerState == LEAF & ~HPTWUpdateDA) & DTLBWalk;
|
||||
assign ITLBWriteF = (WalkerState == LEAF & ~HPTWUpdateDA) & ~DTLBWalk;
|
||||
|
||||
// FSM to track PageType based on the levels of the page table traversed
|
||||
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
|
||||
always_comb
|
||||
case (WalkerState)
|
||||
L3_RD: NextPageType = 2'b11; // terapage
|
||||
L2_RD: NextPageType = 2'b10; // gigapage
|
||||
L1_RD: NextPageType = 2'b01; // megapage
|
||||
L0_RD: NextPageType = 2'b00; // kilopage
|
||||
default: NextPageType = PageType;
|
||||
endcase
|
||||
// FSM to track PageType based on the levels of the page table traversed
|
||||
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
|
||||
always_comb
|
||||
case (WalkerState)
|
||||
L3_RD: NextPageType = 2'b11; // terapage
|
||||
L2_RD: NextPageType = 2'b10; // gigapage
|
||||
L1_RD: NextPageType = 2'b01; // megapage
|
||||
L0_RD: NextPageType = 2'b00; // kilopage
|
||||
default: NextPageType = PageType;
|
||||
endcase
|
||||
|
||||
// HPTWAdr muxing
|
||||
if (`XLEN==32) begin // RV32
|
||||
logic [9:0] VPN;
|
||||
logic [`PPN_BITS-1:0] PPN;
|
||||
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
|
||||
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN;
|
||||
assign HPTWReadAdr = {PPN, VPN, 2'b00};
|
||||
assign HPTWSize = 3'b010;
|
||||
end else begin // RV64
|
||||
logic [8:0] VPN;
|
||||
logic [`PPN_BITS-1:0] PPN;
|
||||
always_comb
|
||||
case (WalkerState) // select VPN field based on HPTW state
|
||||
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39];
|
||||
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
|
||||
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
|
||||
default: VPN = TranslationVAdr[20:12];
|
||||
endcase
|
||||
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
|
||||
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
|
||||
assign HPTWReadAdr = {PPN, VPN, 3'b000};
|
||||
assign HPTWSize = 3'b011;
|
||||
end
|
||||
// HPTWAdr muxing
|
||||
if (`XLEN==32) begin // RV32
|
||||
logic [9:0] VPN;
|
||||
logic [`PPN_BITS-1:0] PPN;
|
||||
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
|
||||
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN;
|
||||
assign HPTWReadAdr = {PPN, VPN, 2'b00};
|
||||
assign HPTWSize = 3'b010;
|
||||
end else begin // RV64
|
||||
logic [8:0] VPN;
|
||||
logic [`PPN_BITS-1:0] PPN;
|
||||
always_comb
|
||||
case (WalkerState) // select VPN field based on HPTW state
|
||||
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39];
|
||||
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
|
||||
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
|
||||
default: VPN = TranslationVAdr[20:12];
|
||||
endcase
|
||||
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
|
||||
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
|
||||
assign HPTWReadAdr = {PPN, VPN, 3'b000};
|
||||
assign HPTWSize = 3'b011;
|
||||
end
|
||||
|
||||
// Initial state and misalignment for RV32/64
|
||||
if (`XLEN == 32) begin
|
||||
assign InitialWalkerState = L1_ADR;
|
||||
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
|
||||
assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
|
||||
end else begin
|
||||
logic GigapageMisaligned, TerapageMisaligned;
|
||||
assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR;
|
||||
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
|
||||
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
|
||||
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
|
||||
assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned);
|
||||
end
|
||||
// Initial state and misalignment for RV32/64
|
||||
if (`XLEN == 32) begin
|
||||
assign InitialWalkerState = L1_ADR;
|
||||
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
|
||||
assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
|
||||
end else begin
|
||||
logic GigapageMisaligned, TerapageMisaligned;
|
||||
assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR;
|
||||
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
|
||||
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
|
||||
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
|
||||
assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned);
|
||||
end
|
||||
|
||||
// Page Table Walker FSM
|
||||
// Page Table Walker FSM
|
||||
// there is a bug here. Each memory access needs to be potentially flushed if the PMA/P checkers
|
||||
// generate an access fault. Specially the store on UDPATE_PTE needs to check for access violation.
|
||||
// I think the solution is to do 1 of the following
|
||||
|
@ -244,32 +244,32 @@ module hptw (
|
|||
// 2. If the store would generate an exception don't store to dcache but still write the TLB. When we go back
|
||||
// to LEAF then the PMA/P. Wait this does not work. The PMA/P won't be looking a the address in the table, but
|
||||
// rather than physical address of the translated instruction/data. So we must generate the exception.
|
||||
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset | FlushW, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
always_comb
|
||||
case (WalkerState)
|
||||
IDLE: if (TLBMiss & ~DCacheStallM) NextWalkerState = InitialWalkerState;
|
||||
else NextWalkerState = IDLE;
|
||||
L3_ADR: NextWalkerState = L3_RD; // first access in SV48
|
||||
L3_RD: if (DCacheStallM) NextWalkerState = L3_RD;
|
||||
else NextWalkerState = L2_ADR;
|
||||
L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39
|
||||
else NextWalkerState = LEAF;
|
||||
L2_RD: if (DCacheStallM) NextWalkerState = L2_RD;
|
||||
else NextWalkerState = L1_ADR;
|
||||
L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32
|
||||
else NextWalkerState = LEAF;
|
||||
L1_RD: if (DCacheStallM) NextWalkerState = L1_RD;
|
||||
else NextWalkerState = L0_ADR;
|
||||
L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD;
|
||||
else NextWalkerState = LEAF;
|
||||
L0_RD: if (DCacheStallM) NextWalkerState = L0_RD;
|
||||
else NextWalkerState = LEAF;
|
||||
LEAF: if (`SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE;
|
||||
else NextWalkerState = IDLE;
|
||||
UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE;
|
||||
else NextWalkerState = LEAF;
|
||||
default: NextWalkerState = IDLE; // should never be reached
|
||||
endcase // case (WalkerState)
|
||||
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset | FlushW, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
always_comb
|
||||
case (WalkerState)
|
||||
IDLE: if (TLBMiss & ~DCacheStallM) NextWalkerState = InitialWalkerState;
|
||||
else NextWalkerState = IDLE;
|
||||
L3_ADR: NextWalkerState = L3_RD; // first access in SV48
|
||||
L3_RD: if (DCacheStallM) NextWalkerState = L3_RD;
|
||||
else NextWalkerState = L2_ADR;
|
||||
L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39
|
||||
else NextWalkerState = LEAF;
|
||||
L2_RD: if (DCacheStallM) NextWalkerState = L2_RD;
|
||||
else NextWalkerState = L1_ADR;
|
||||
L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32
|
||||
else NextWalkerState = LEAF;
|
||||
L1_RD: if (DCacheStallM) NextWalkerState = L1_RD;
|
||||
else NextWalkerState = L0_ADR;
|
||||
L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD;
|
||||
else NextWalkerState = LEAF;
|
||||
L0_RD: if (DCacheStallM) NextWalkerState = L0_RD;
|
||||
else NextWalkerState = LEAF;
|
||||
LEAF: if (`SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE;
|
||||
else NextWalkerState = IDLE;
|
||||
UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE;
|
||||
else NextWalkerState = LEAF;
|
||||
default: NextWalkerState = IDLE; // should never be reached
|
||||
endcase // case (WalkerState)
|
||||
|
||||
assign IgnoreRequestTLB = WalkerState == IDLE & TLBMiss;
|
||||
assign SelHPTW = WalkerState != IDLE;
|
||||
|
|
|
@ -84,8 +84,8 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) (
|
|||
logic [1:0] HitPageType;
|
||||
logic CAMHit;
|
||||
logic SV39Mode;
|
||||
logic Misaligned;
|
||||
logic MegapageMisaligned;
|
||||
logic Misaligned;
|
||||
logic MegapageMisaligned;
|
||||
|
||||
if(`XLEN == 32) begin
|
||||
assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0
|
||||
|
@ -94,7 +94,7 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) (
|
|||
logic GigapageMisaligned, TerapageMisaligned;
|
||||
assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0
|
||||
assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0
|
||||
assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0
|
||||
assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0
|
||||
assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) |
|
||||
((HitPageType == 2'b10) & GigapageMisaligned) |
|
||||
((HitPageType == 2'b01) & MegapageMisaligned);
|
||||
|
|
|
@ -96,11 +96,11 @@ module csr #(parameter
|
|||
);
|
||||
|
||||
logic [`XLEN-1:0] CSRMReadValM, CSRSReadValM, CSRUReadValM, CSRCReadValM;
|
||||
logic [`XLEN-1:0] CSRReadValM;
|
||||
logic [`XLEN-1:0] CSRSrcM;
|
||||
logic [`XLEN-1:0] CSRRWM, CSRRSM, CSRRCM;
|
||||
logic [`XLEN-1:0] CSRWriteValM;
|
||||
logic [`XLEN-1:0] MSTATUS_REGW, SSTATUS_REGW, MSTATUSH_REGW;
|
||||
logic [`XLEN-1:0] CSRReadValM;
|
||||
logic [`XLEN-1:0] CSRSrcM;
|
||||
logic [`XLEN-1:0] CSRRWM, CSRRSM, CSRRCM;
|
||||
logic [`XLEN-1:0] CSRWriteValM;
|
||||
logic [`XLEN-1:0] MSTATUS_REGW, SSTATUS_REGW, MSTATUSH_REGW;
|
||||
logic [`XLEN-1:0] STVEC_REGW, MTVEC_REGW;
|
||||
logic [`XLEN-1:0] MEPC_REGW, SEPC_REGW;
|
||||
logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW;
|
||||
|
@ -117,7 +117,7 @@ module csr #(parameter
|
|||
logic [`XLEN-1:0] TVecM, TrapVectorM, NextFaultMtvalM;
|
||||
logic MTrapM, STrapM;
|
||||
logic [`XLEN-1:0] EPC;
|
||||
logic RetM;
|
||||
logic RetM;
|
||||
logic SelMtvecM;
|
||||
logic [`XLEN-1:0] TVecAlignedM;
|
||||
logic InstrValidNotFlushedM;
|
||||
|
@ -153,7 +153,7 @@ module csr #(parameter
|
|||
logic VectoredM;
|
||||
logic [`XLEN-1:0] TVecPlusCauseM;
|
||||
assign VectoredM = InterruptM & (TVecM[1:0] == 2'b01);
|
||||
assign TVecPlusCauseM = {TVecAlignedM[`XLEN-1:6], CauseM[3:0], 2'b00}; // 64-byte alignment allows concatenation rather than addition
|
||||
assign TVecPlusCauseM = {TVecAlignedM[`XLEN-1:6], CauseM[3:0], 2'b00}; // 64-byte alignment allows concatenation rather than addition
|
||||
mux2 #(`XLEN) trapvecmux(TVecAlignedM, TVecPlusCauseM, VectoredM, TrapVectorM);
|
||||
end else
|
||||
assign TrapVectorM = TVecAlignedM;
|
||||
|
|
|
@ -40,52 +40,52 @@ module csrc #(parameter
|
|||
TIME = 12'hC01,
|
||||
TIMEH = 12'hC81
|
||||
) (
|
||||
input logic clk, reset,
|
||||
input logic StallE, StallM,
|
||||
input logic FlushM,
|
||||
input logic InstrValidNotFlushedM, LoadStallD, StoreStallD,
|
||||
input logic CSRMWriteM, CSRWriteM,
|
||||
input logic BPDirPredWrongM,
|
||||
input logic BTAWrongM,
|
||||
input logic RASPredPCWrongM,
|
||||
input logic IClassWrongM,
|
||||
input logic BPWrongM, // branch predictor is wrong
|
||||
input logic [3:0] InstrClassM,
|
||||
input logic DCacheMiss,
|
||||
input logic DCacheAccess,
|
||||
input logic ICacheMiss,
|
||||
input logic ICacheAccess,
|
||||
input logic ICacheStallF,
|
||||
input logic DCacheStallM,
|
||||
input logic sfencevmaM,
|
||||
input logic InterruptM,
|
||||
input logic ExceptionM,
|
||||
input logic InvalidateICacheM,
|
||||
input logic DivBusyE, // integer divide busy
|
||||
input logic FDivBusyE, // floating point divide busy
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
|
||||
input logic [63:0] MTIME_CLINT,
|
||||
output logic [`XLEN-1:0] CSRCReadValM,
|
||||
output logic IllegalCSRCAccessM
|
||||
input logic clk, reset,
|
||||
input logic StallE, StallM,
|
||||
input logic FlushM,
|
||||
input logic InstrValidNotFlushedM, LoadStallD, StoreStallD,
|
||||
input logic CSRMWriteM, CSRWriteM,
|
||||
input logic BPDirPredWrongM,
|
||||
input logic BTAWrongM,
|
||||
input logic RASPredPCWrongM,
|
||||
input logic IClassWrongM,
|
||||
input logic BPWrongM, // branch predictor is wrong
|
||||
input logic [3:0] InstrClassM,
|
||||
input logic DCacheMiss,
|
||||
input logic DCacheAccess,
|
||||
input logic ICacheMiss,
|
||||
input logic ICacheAccess,
|
||||
input logic ICacheStallF,
|
||||
input logic DCacheStallM,
|
||||
input logic sfencevmaM,
|
||||
input logic InterruptM,
|
||||
input logic ExceptionM,
|
||||
input logic InvalidateICacheM,
|
||||
input logic DivBusyE, // integer divide busy
|
||||
input logic FDivBusyE, // floating point divide busy
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
|
||||
input logic [63:0] MTIME_CLINT,
|
||||
output logic [`XLEN-1:0] CSRCReadValM,
|
||||
output logic IllegalCSRCAccessM
|
||||
);
|
||||
|
||||
logic [4:0] CounterNumM;
|
||||
logic [`XLEN-1:0] HPMCOUNTER_REGW[`COUNTERS-1:0];
|
||||
logic [`XLEN-1:0] HPMCOUNTERH_REGW[`COUNTERS-1:0];
|
||||
logic LoadStallE, LoadStallM;
|
||||
logic StoreStallE, StoreStallM;
|
||||
logic [`COUNTERS-1:0] WriteHPMCOUNTERM;
|
||||
logic [`COUNTERS-1:0] CounterEvent;
|
||||
logic [63:0] HPMCOUNTERPlusM[`COUNTERS-1:0];
|
||||
logic [`XLEN-1:0] NextHPMCOUNTERM[`COUNTERS-1:0];
|
||||
genvar i;
|
||||
logic [4:0] CounterNumM;
|
||||
logic [`XLEN-1:0] HPMCOUNTER_REGW[`COUNTERS-1:0];
|
||||
logic [`XLEN-1:0] HPMCOUNTERH_REGW[`COUNTERS-1:0];
|
||||
logic LoadStallE, LoadStallM;
|
||||
logic StoreStallE, StoreStallM;
|
||||
logic [`COUNTERS-1:0] WriteHPMCOUNTERM;
|
||||
logic [`COUNTERS-1:0] CounterEvent;
|
||||
logic [63:0] HPMCOUNTERPlusM[`COUNTERS-1:0];
|
||||
logic [`XLEN-1:0] NextHPMCOUNTERM[`COUNTERS-1:0];
|
||||
genvar i;
|
||||
|
||||
// Interface signals
|
||||
flopenrc #(2) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d({StoreStallD, LoadStallD}), .q({StoreStallE, LoadStallE})); // don't flush the load stall during a load stall.
|
||||
flopenrc #(2) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d({StoreStallE, LoadStallE}), .q({StoreStallM, LoadStallM}));
|
||||
flopenrc #(2) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d({StoreStallE, LoadStallE}), .q({StoreStallM, LoadStallM}));
|
||||
|
||||
// Determine when to increment each counter
|
||||
assign CounterEvent[0] = 1'b1; // MCYCLE always increments
|
||||
|
@ -97,11 +97,11 @@ module csrc #(parameter
|
|||
assign CounterEvent[3] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction
|
||||
assign CounterEvent[4] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions
|
||||
assign CounterEvent[5] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions
|
||||
assign CounterEvent[6] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong
|
||||
assign CounterEvent[6] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong
|
||||
assign CounterEvent[7] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction
|
||||
assign CounterEvent[8] = BTAWrongM & InstrValidNotFlushedM; // branch predictor wrong target
|
||||
assign CounterEvent[8] = BTAWrongM & InstrValidNotFlushedM; // branch predictor wrong target
|
||||
assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address
|
||||
assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong
|
||||
assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong
|
||||
assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed.
|
||||
assign CounterEvent[12] = StoreStallM & InstrValidNotFlushedM; // Store Stall
|
||||
assign CounterEvent[13] = DCacheAccess & InstrValidNotFlushedM; // data cache access
|
||||
|
@ -111,7 +111,7 @@ module csrc #(parameter
|
|||
assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss
|
||||
assign CounterEvent[18] = ICacheStallF; // i cache miss cycles
|
||||
assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes
|
||||
assign CounterEvent[20] = InvalidateICacheM & InstrValidNotFlushedM; // fence.i
|
||||
assign CounterEvent[20] = InvalidateICacheM & InstrValidNotFlushedM; // fence.i
|
||||
assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma
|
||||
assign CounterEvent[22] = InterruptM; // interrupt, InstrValidNotFlushedM will be low
|
||||
assign CounterEvent[23] = ExceptionM; // exceptions, InstrValidNotFlushedM will be low
|
||||
|
|
|
@ -34,14 +34,14 @@ module csri #(parameter
|
|||
MIP = 12'h344,
|
||||
SIE = 12'h104,
|
||||
SIP = 12'h144) (
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic CSRMWriteM, CSRSWriteM,
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic CSRMWriteM, CSRSWriteM,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic MExtInt, SExtInt, MTimerInt, STimerInt, MSwInt,
|
||||
input logic [11:0] MIDELEG_REGW,
|
||||
output logic [11:0] MIP_REGW, MIE_REGW,
|
||||
output logic [11:0] MIP_REGW, MIE_REGW,
|
||||
output logic [11:0] MIP_REGW_writeable // only SEIP, STIP, SSIP are actually writeable; the rest are hardwired to 0
|
||||
);
|
||||
|
||||
|
|
|
@ -72,30 +72,30 @@ module csrm #(parameter
|
|||
MEDELEG_MASK = ~(ZERO | `XLEN'b1 << 11),
|
||||
MIDELEG_MASK = 12'h222 // we choose to not make machine interrupts delegable
|
||||
) (
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic CSRMWriteM, MTrapM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, MSTATUS_REGW, MSTATUSH_REGW,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [11:0] MIP_REGW, MIE_REGW,
|
||||
output logic [`XLEN-1:0] CSRMReadValM, MTVEC_REGW,
|
||||
output logic [`XLEN-1:0] MEPC_REGW,
|
||||
output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW,
|
||||
output logic [`XLEN-1:0] MEDELEG_REGW,
|
||||
output logic [11:0] MIDELEG_REGW,
|
||||
output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic CSRMWriteM, MTrapM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, MSTATUS_REGW, MSTATUSH_REGW,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [11:0] MIP_REGW, MIE_REGW,
|
||||
output logic [`XLEN-1:0] CSRMReadValM, MTVEC_REGW,
|
||||
output logic [`XLEN-1:0] MEPC_REGW,
|
||||
output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW,
|
||||
output logic [`XLEN-1:0] MEDELEG_REGW,
|
||||
output logic [11:0] MIDELEG_REGW,
|
||||
output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
|
||||
output var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
|
||||
output logic WriteMSTATUSM, WriteMSTATUSHM,
|
||||
output logic IllegalCSRMAccessM, IllegalCSRMWriteReadonlyM
|
||||
output logic WriteMSTATUSM, WriteMSTATUSHM,
|
||||
output logic IllegalCSRMAccessM, IllegalCSRMWriteReadonlyM
|
||||
);
|
||||
|
||||
logic [`XLEN-1:0] MISA_REGW, MHARTID_REGW;
|
||||
logic [`XLEN-1:0] MSCRATCH_REGW;
|
||||
logic [`XLEN-1:0] MCAUSE_REGW, MTVAL_REGW;
|
||||
logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM;
|
||||
logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM;
|
||||
logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM;
|
||||
logic [`XLEN-1:0] MISA_REGW, MHARTID_REGW;
|
||||
logic [`XLEN-1:0] MSCRATCH_REGW;
|
||||
logic [`XLEN-1:0] MCAUSE_REGW, MTVAL_REGW;
|
||||
logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM;
|
||||
logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM;
|
||||
logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM;
|
||||
|
||||
// There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop
|
||||
genvar i;
|
||||
|
|
|
@ -44,23 +44,23 @@ module csrs #(parameter
|
|||
STIMECMP = 12'h14D,
|
||||
STIMECMPH = 12'h15D,
|
||||
SATP = 12'h180) (
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic CSRSWriteM, STrapM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic clk, reset,
|
||||
input logic InstrValidNotFlushedM,
|
||||
input logic CSRSWriteM, STrapM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, SSTATUS_REGW,
|
||||
input logic STATUS_TVM,
|
||||
input logic STATUS_TVM,
|
||||
input logic MCOUNTEREN_TM, // TM bit (1) of MCOUNTEREN; cause illegal instruction when trying to access STIMECMP if clear
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
output logic [`XLEN-1:0] CSRSReadValM, STVEC_REGW,
|
||||
output logic [`XLEN-1:0] SEPC_REGW,
|
||||
output logic [31:0] SCOUNTEREN_REGW,
|
||||
output logic [`XLEN-1:0] SATP_REGW,
|
||||
input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW,
|
||||
input logic [63:0] MTIME_CLINT,
|
||||
output logic WriteSSTATUSM,
|
||||
output logic IllegalCSRSAccessM,
|
||||
output logic WriteSSTATUSM,
|
||||
output logic IllegalCSRSAccessM,
|
||||
output logic STimerInt
|
||||
);
|
||||
|
||||
|
@ -68,13 +68,13 @@ module csrs #(parameter
|
|||
localparam ZERO = {(`XLEN){1'b0}};
|
||||
localparam SEDELEG_MASK = ~(ZERO | `XLEN'b111 << 9);
|
||||
|
||||
logic WriteSTVECM;
|
||||
logic WriteSSCRATCHM, WriteSEPCM;
|
||||
logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM;
|
||||
logic WriteSTIMECMPM, WriteSTIMECMPHM;
|
||||
logic [`XLEN-1:0] SSCRATCH_REGW, STVAL_REGW;
|
||||
logic [`XLEN-1:0] SCAUSE_REGW;
|
||||
logic [63:0] STIMECMP_REGW;
|
||||
logic WriteSTVECM;
|
||||
logic WriteSSCRATCHM, WriteSEPCM;
|
||||
logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM;
|
||||
logic WriteSTIMECMPM, WriteSTIMECMPHM;
|
||||
logic [`XLEN-1:0] SSCRATCH_REGW, STVAL_REGW;
|
||||
logic [`XLEN-1:0] SCAUSE_REGW;
|
||||
logic [63:0] STIMECMP_REGW;
|
||||
|
||||
// write enables
|
||||
// *** can InstrValidNotFlushed be factored out of all these writes into CSRWriteM?
|
||||
|
@ -100,10 +100,10 @@ module csrs #(parameter
|
|||
else
|
||||
assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported
|
||||
flopenr #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW);
|
||||
if (`SSTC_SUPPORTED) begin
|
||||
if (`XLEN == 64)
|
||||
if (`SSTC_SUPPORTED) begin : sstc
|
||||
if (`XLEN == 64) begin : sstc64
|
||||
flopenl #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, 64'hFFFFFFFFFFFFFFFF, STIMECMP_REGW);
|
||||
else begin
|
||||
end else begin : sstc32
|
||||
flopenl #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, 32'hFFFFFFFF, STIMECMP_REGW[31:0]);
|
||||
flopenl #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, 32'hFFFFFFFF, STIMECMP_REGW[63:32]);
|
||||
end
|
||||
|
@ -129,10 +129,10 @@ module csrs #(parameter
|
|||
SCAUSE: CSRSReadValM = SCAUSE_REGW;
|
||||
STVAL: CSRSReadValM = STVAL_REGW;
|
||||
SATP: if (`VIRTMEM_SUPPORTED & (PrivilegeModeW == `M_MODE | ~STATUS_TVM)) CSRSReadValM = SATP_REGW;
|
||||
else begin
|
||||
CSRSReadValM = 0;
|
||||
if (PrivilegeModeW == `S_MODE & STATUS_TVM) IllegalCSRSAccessM = 1;
|
||||
end
|
||||
else begin
|
||||
CSRSReadValM = 0;
|
||||
if (PrivilegeModeW == `S_MODE & STATUS_TVM) IllegalCSRSAccessM = 1;
|
||||
end
|
||||
SCOUNTEREN:CSRSReadValM = {{(`XLEN-32){1'b0}}, SCOUNTEREN_REGW};
|
||||
STIMECMP: if (`SSTC_SUPPORTED & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0];
|
||||
else begin
|
||||
|
|
|
@ -70,7 +70,7 @@ module csrsr (
|
|||
STATUS_XS, STATUS_FS, /*STATUS_MPP, 2'b0*/ 4'b0,
|
||||
STATUS_SPP, /*STATUS_MPIE*/ 1'b0, STATUS_UBE, STATUS_SPIE,
|
||||
/*1'b0, STATUS_MIE, 1'b0*/ 3'b0, STATUS_SIE, 1'b0};
|
||||
assign MSTATUSH_REGW = '0; // *** does not exist when XLEN=64, but don't want it to have an undefined value. Spec is not clear what it should be.
|
||||
assign MSTATUSH_REGW = '0; // *** does not exist when XLEN=64, but don't want it to have an undefined value. Spec is not clear what it should be.
|
||||
end else begin: csrsr32 // RV32
|
||||
assign MSTATUS_REGW = {STATUS_SD, 8'b0,
|
||||
STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV,
|
||||
|
|
|
@ -48,7 +48,7 @@ module csru #(parameter
|
|||
logic [4:0] FFLAGS_REGW;
|
||||
logic [2:0] NextFRMM;
|
||||
logic [4:0] NextFFLAGSM;
|
||||
logic SetOrWriteFFLAGSM;
|
||||
logic SetOrWriteFFLAGSM;
|
||||
|
||||
// Write enables
|
||||
//assign WriteFCSRM = CSRUWriteM & (CSRAdrM == FCSR) & InstrValidNotFlushedM;
|
||||
|
|
|
@ -34,86 +34,86 @@ module privileged (
|
|||
input logic StallD, StallE, StallM, StallW,
|
||||
input logic FlushD, FlushE, FlushM, FlushW,
|
||||
// CSR Reads and Writes, and values needed for traps
|
||||
input logic CSRReadM, CSRWriteM, // Read or write CSRs
|
||||
input logic [`XLEN-1:0] SrcAM, // GPR register to write
|
||||
input logic [31:0] InstrM, // Instruction
|
||||
input logic [31:0] InstrOrigM, // Original compressed or uncompressed instruction in Memory stage for Illegal Instruction MTVAL
|
||||
input logic [`XLEN-1:0] IEUAdrM, // address from IEU
|
||||
input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return PC logic
|
||||
// control signals
|
||||
input logic InstrValidM, // Current instruction is valid (not flushed)
|
||||
input logic CommittedM, CommittedF, // current instruction is using bus; don't interrupt
|
||||
input logic PrivilegedM, // privileged instruction
|
||||
// processor events for performance counter logging
|
||||
input logic FRegWriteM, // instruction will write floating-point registers
|
||||
input logic LoadStallD, // load instruction is stalling
|
||||
input logic StoreStallD, // store instruction is stalling
|
||||
input logic ICacheStallF, // I cache stalled
|
||||
input logic DCacheStallM, // D cache stalled
|
||||
input logic BPDirPredWrongM, // branch predictor guessed wrong direction
|
||||
input logic BTAWrongM, // branch predictor guessed wrong target
|
||||
input logic RASPredPCWrongM, // return adddress stack guessed wrong target
|
||||
input logic IClassWrongM, // branch predictor guessed wrong instruction class
|
||||
input logic BPWrongM, // branch predictor is wrong
|
||||
input logic [3:0] InstrClassM, // actual instruction class
|
||||
input logic DCacheMiss, // data cache miss
|
||||
input logic DCacheAccess, // data cache accessed (hit or miss)
|
||||
input logic ICacheMiss, // instruction cache miss
|
||||
input logic ICacheAccess, // instruction cache access
|
||||
input logic DivBusyE, // integer divide busy
|
||||
input logic FDivBusyE, // floating point divide busy
|
||||
// fault sources
|
||||
input logic InstrAccessFaultF, // instruction access fault
|
||||
input logic LoadAccessFaultM, StoreAmoAccessFaultM, // load or store access fault
|
||||
input logic HPTWInstrAccessFaultM, // hardware page table access fault while fetching instruction PTE
|
||||
input logic InstrPageFaultF, // page faults
|
||||
input logic LoadPageFaultM, StoreAmoPageFaultM, // page faults
|
||||
input logic InstrMisalignedFaultM, // misaligned instruction fault
|
||||
input logic LoadMisalignedFaultM, StoreAmoMisalignedFaultM, // misaligned data fault
|
||||
input logic IllegalIEUFPUInstrD, // illegal instruction from IEU or FPU
|
||||
input logic MTimerInt, MExtInt, SExtInt, MSwInt, // interrupt sources
|
||||
input logic [63:0] MTIME_CLINT, // timer value from CLINT
|
||||
input logic [4:0] SetFflagsM, // set FCSR flags from FPU
|
||||
input logic SelHPTW, // HPTW in use. Causes system to use S-mode endianness for accesses
|
||||
// CSR outputs
|
||||
output logic [`XLEN-1:0] CSRReadValW, // Value read from CSR
|
||||
output logic [1:0] PrivilegeModeW, // current privilege mode
|
||||
output logic [`XLEN-1:0] SATP_REGW, // supervisor address translation register
|
||||
output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // status register bits
|
||||
output logic [1:0] STATUS_MPP, STATUS_FS, // status register bits
|
||||
output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration entries to MMU
|
||||
output var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // PMP address entries to MMU
|
||||
output logic [2:0] FRM_REGW, // FPU rounding mode
|
||||
// PC logic output in privileged unit
|
||||
output logic [`XLEN-1:0] UnalignedPCNextF, // Next PC from trap/return PC logic
|
||||
// control outputs
|
||||
output logic RetM, TrapM, // return instruction, or trap
|
||||
output logic sfencevmaM, // sfence.vma instruction
|
||||
input logic InvalidateICacheM, // fence instruction
|
||||
output logic BigEndianM, // Use big endian in current privilege mode
|
||||
// Fault outputs
|
||||
output logic BreakpointFaultM, EcallFaultM, // breakpoint and Ecall traps should retire
|
||||
output logic WFIStallM // Stall in Memory stage for WFI until interrupt or timeout
|
||||
);
|
||||
|
||||
logic [`LOG_XLEN-1:0] CauseM; // trap cause
|
||||
logic [`XLEN-1:0] MEDELEG_REGW; // exception delegation CSR
|
||||
logic [11:0] MIDELEG_REGW; // interrupt delegation CSR
|
||||
logic sretM, mretM; // supervisor / machine return instruction
|
||||
logic IllegalCSRAccessM; // Illegal access to CSR
|
||||
logic IllegalIEUFPUInstrM; // Illegal IEU or FPU instruction, delayed to Mem stage
|
||||
logic InstrPageFaultM; // Instruction page fault, delayed to Mem stage
|
||||
logic InstrAccessFaultM; // Instruction access fault, delayed to Mem stages
|
||||
logic IllegalInstrFaultM; // Illegal instruction fault
|
||||
logic STATUS_SPP, STATUS_TSR, STATUS_TW, STATUS_TVM; // Status bits needed within privileged unit
|
||||
logic STATUS_MIE, STATUS_SIE; // status bits: interrupt enables
|
||||
logic [11:0] MIP_REGW, MIE_REGW; // interrupt pending and enable bits
|
||||
logic [1:0] NextPrivilegeModeM; // next privilege mode based on trap or return
|
||||
logic DelegateM; // trap should be delegated
|
||||
logic wfiM; // wait for interrupt instruction
|
||||
logic IntPendingM; // interrupt is pending, even if not enabled. ends wfi
|
||||
logic InterruptM; // interrupt occuring
|
||||
logic ExceptionM; // Memory stage instruction caused a fault
|
||||
input logic CSRReadM, CSRWriteM, // Read or write CSRs
|
||||
input logic [`XLEN-1:0] SrcAM, // GPR register to write
|
||||
input logic [31:0] InstrM, // Instruction
|
||||
input logic [31:0] InstrOrigM, // Original compressed or uncompressed instruction in Memory stage for Illegal Instruction MTVAL
|
||||
input logic [`XLEN-1:0] IEUAdrM, // address from IEU
|
||||
input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return PC logic
|
||||
// control signals
|
||||
input logic InstrValidM, // Current instruction is valid (not flushed)
|
||||
input logic CommittedM, CommittedF, // current instruction is using bus; don't interrupt
|
||||
input logic PrivilegedM, // privileged instruction
|
||||
// processor events for performance counter logging
|
||||
input logic FRegWriteM, // instruction will write floating-point registers
|
||||
input logic LoadStallD, // load instruction is stalling
|
||||
input logic StoreStallD, // store instruction is stalling
|
||||
input logic ICacheStallF, // I cache stalled
|
||||
input logic DCacheStallM, // D cache stalled
|
||||
input logic BPDirPredWrongM, // branch predictor guessed wrong direction
|
||||
input logic BTAWrongM, // branch predictor guessed wrong target
|
||||
input logic RASPredPCWrongM, // return adddress stack guessed wrong target
|
||||
input logic IClassWrongM, // branch predictor guessed wrong instruction class
|
||||
input logic BPWrongM, // branch predictor is wrong
|
||||
input logic [3:0] InstrClassM, // actual instruction class
|
||||
input logic DCacheMiss, // data cache miss
|
||||
input logic DCacheAccess, // data cache accessed (hit or miss)
|
||||
input logic ICacheMiss, // instruction cache miss
|
||||
input logic ICacheAccess, // instruction cache access
|
||||
input logic DivBusyE, // integer divide busy
|
||||
input logic FDivBusyE, // floating point divide busy
|
||||
// fault sources
|
||||
input logic InstrAccessFaultF, // instruction access fault
|
||||
input logic LoadAccessFaultM, StoreAmoAccessFaultM, // load or store access fault
|
||||
input logic HPTWInstrAccessFaultM, // hardware page table access fault while fetching instruction PTE
|
||||
input logic InstrPageFaultF, // page faults
|
||||
input logic LoadPageFaultM, StoreAmoPageFaultM, // page faults
|
||||
input logic InstrMisalignedFaultM, // misaligned instruction fault
|
||||
input logic LoadMisalignedFaultM, StoreAmoMisalignedFaultM, // misaligned data fault
|
||||
input logic IllegalIEUFPUInstrD, // illegal instruction from IEU or FPU
|
||||
input logic MTimerInt, MExtInt, SExtInt, MSwInt, // interrupt sources
|
||||
input logic [63:0] MTIME_CLINT, // timer value from CLINT
|
||||
input logic [4:0] SetFflagsM, // set FCSR flags from FPU
|
||||
input logic SelHPTW, // HPTW in use. Causes system to use S-mode endianness for accesses
|
||||
// CSR outputs
|
||||
output logic [`XLEN-1:0] CSRReadValW, // Value read from CSR
|
||||
output logic [1:0] PrivilegeModeW, // current privilege mode
|
||||
output logic [`XLEN-1:0] SATP_REGW, // supervisor address translation register
|
||||
output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // status register bits
|
||||
output logic [1:0] STATUS_MPP, STATUS_FS, // status register bits
|
||||
output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration entries to MMU
|
||||
output var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // PMP address entries to MMU
|
||||
output logic [2:0] FRM_REGW, // FPU rounding mode
|
||||
// PC logic output in privileged unit
|
||||
output logic [`XLEN-1:0] UnalignedPCNextF, // Next PC from trap/return PC logic
|
||||
// control outputs
|
||||
output logic RetM, TrapM, // return instruction, or trap
|
||||
output logic sfencevmaM, // sfence.vma instruction
|
||||
input logic InvalidateICacheM, // fence instruction
|
||||
output logic BigEndianM, // Use big endian in current privilege mode
|
||||
// Fault outputs
|
||||
output logic BreakpointFaultM, EcallFaultM, // breakpoint and Ecall traps should retire
|
||||
output logic WFIStallM // Stall in Memory stage for WFI until interrupt or timeout
|
||||
);
|
||||
|
||||
logic [`LOG_XLEN-1:0] CauseM; // trap cause
|
||||
logic [`XLEN-1:0] MEDELEG_REGW; // exception delegation CSR
|
||||
logic [11:0] MIDELEG_REGW; // interrupt delegation CSR
|
||||
logic sretM, mretM; // supervisor / machine return instruction
|
||||
logic IllegalCSRAccessM; // Illegal access to CSR
|
||||
logic IllegalIEUFPUInstrM; // Illegal IEU or FPU instruction, delayed to Mem stage
|
||||
logic InstrPageFaultM; // Instruction page fault, delayed to Mem stage
|
||||
logic InstrAccessFaultM; // Instruction access fault, delayed to Mem stages
|
||||
logic IllegalInstrFaultM; // Illegal instruction fault
|
||||
logic STATUS_SPP, STATUS_TSR, STATUS_TW, STATUS_TVM; // Status bits needed within privileged unit
|
||||
logic STATUS_MIE, STATUS_SIE; // status bits: interrupt enables
|
||||
logic [11:0] MIP_REGW, MIE_REGW; // interrupt pending and enable bits
|
||||
logic [1:0] NextPrivilegeModeM; // next privilege mode based on trap or return
|
||||
logic DelegateM; // trap should be delegated
|
||||
logic wfiM; // wait for interrupt instruction
|
||||
logic IntPendingM; // interrupt is pending, even if not enabled. ends wfi
|
||||
logic InterruptM; // interrupt occuring
|
||||
logic ExceptionM; // Memory stage instruction caused a fault
|
||||
|
||||
// track the current privilege level
|
||||
privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM,
|
||||
|
|
|
@ -29,33 +29,33 @@
|
|||
`include "wally-config.vh"
|
||||
|
||||
module trap (
|
||||
input logic reset,
|
||||
input logic InstrMisalignedFaultM, InstrAccessFaultM, HPTWInstrAccessFaultM, IllegalInstrFaultM,
|
||||
input logic BreakpointFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM,
|
||||
input logic LoadAccessFaultM, StoreAmoAccessFaultM, EcallFaultM, InstrPageFaultM,
|
||||
input logic LoadPageFaultM, StoreAmoPageFaultM, // various trap sources
|
||||
input logic mretM, sretM, // return instructions
|
||||
input logic wfiM, // wait for interrupt instruction
|
||||
input logic [1:0] PrivilegeModeW, // current privilege mode
|
||||
input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, // interrupt pending, enabled, and delegate CSRs
|
||||
input logic [`XLEN-1:0] MEDELEG_REGW, // exception delegation SR
|
||||
input logic STATUS_MIE, STATUS_SIE, // machine/supervisor interrupt enables
|
||||
input logic InstrValidM, // current instruction is valid, not flushed
|
||||
input logic CommittedM, CommittedF, // LSU/IFU has committed to a bus operation that can't be interrupted
|
||||
output logic TrapM, // Trap is occurring
|
||||
output logic RetM, // Return instruction being executed
|
||||
output logic InterruptM, // Interrupt is occurring
|
||||
output logic ExceptionM, // exception is occurring
|
||||
output logic IntPendingM, // Interrupt is pending, might occur if enabled
|
||||
output logic DelegateM, // Delegate trap to supervisor handler
|
||||
output logic WFIStallM, // Stall due to WFI instruction
|
||||
output logic [`LOG_XLEN-1:0] CauseM // trap cause
|
||||
input logic reset,
|
||||
input logic InstrMisalignedFaultM, InstrAccessFaultM, HPTWInstrAccessFaultM, IllegalInstrFaultM,
|
||||
input logic BreakpointFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM,
|
||||
input logic LoadAccessFaultM, StoreAmoAccessFaultM, EcallFaultM, InstrPageFaultM,
|
||||
input logic LoadPageFaultM, StoreAmoPageFaultM, // various trap sources
|
||||
input logic mretM, sretM, // return instructions
|
||||
input logic wfiM, // wait for interrupt instruction
|
||||
input logic [1:0] PrivilegeModeW, // current privilege mode
|
||||
input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, // interrupt pending, enabled, and delegate CSRs
|
||||
input logic [`XLEN-1:0] MEDELEG_REGW, // exception delegation SR
|
||||
input logic STATUS_MIE, STATUS_SIE, // machine/supervisor interrupt enables
|
||||
input logic InstrValidM, // current instruction is valid, not flushed
|
||||
input logic CommittedM, CommittedF, // LSU/IFU has committed to a bus operation that can't be interrupted
|
||||
output logic TrapM, // Trap is occurring
|
||||
output logic RetM, // Return instruction being executed
|
||||
output logic InterruptM, // Interrupt is occurring
|
||||
output logic ExceptionM, // exception is occurring
|
||||
output logic IntPendingM, // Interrupt is pending, might occur if enabled
|
||||
output logic DelegateM, // Delegate trap to supervisor handler
|
||||
output logic WFIStallM, // Stall due to WFI instruction
|
||||
output logic [`LOG_XLEN-1:0] CauseM // trap cause
|
||||
);
|
||||
|
||||
logic MIntGlobalEnM, SIntGlobalEnM; // Global interupt enables
|
||||
logic Committed; // LSU or IFU has committed to a bus operation that can't be interrupted
|
||||
logic BothInstrAccessFaultM; // instruction or HPTW ITLB fill caused an Instruction Access Fault
|
||||
logic [11:0] PendingIntsM, ValidIntsM, EnabledIntsM; // interrupts are pending, valid, or enabled
|
||||
logic MIntGlobalEnM, SIntGlobalEnM; // Global interupt enables
|
||||
logic Committed; // LSU or IFU has committed to a bus operation that can't be interrupted
|
||||
logic BothInstrAccessFaultM; // instruction or HPTW ITLB fill caused an Instruction Access Fault
|
||||
logic [11:0] PendingIntsM, ValidIntsM, EnabledIntsM; // interrupts are pending, valid, or enabled
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Determine pending enabled interrupts
|
||||
|
|
|
@ -40,7 +40,7 @@ module clint_apb (
|
|||
output logic [`XLEN-1:0] PRDATA,
|
||||
output logic PREADY,
|
||||
output logic [63:0] MTIME,
|
||||
output logic MTimerInt, MSwInt
|
||||
output logic MTimerInt, MSwInt
|
||||
);
|
||||
|
||||
logic MSIP;
|
||||
|
|
|
@ -43,37 +43,37 @@
|
|||
// hardcoded to 2 contexts for now; later upgrade to arbitrary (up to 15872) contexts
|
||||
|
||||
module plic_apb (
|
||||
input logic PCLK, PRESETn,
|
||||
input logic PSEL,
|
||||
input logic [27:0] PADDR,
|
||||
input logic [`XLEN-1:0] PWDATA,
|
||||
input logic PCLK, PRESETn,
|
||||
input logic PSEL,
|
||||
input logic [27:0] PADDR,
|
||||
input logic [`XLEN-1:0] PWDATA,
|
||||
input logic [`XLEN/8-1:0] PSTRB,
|
||||
input logic PWRITE,
|
||||
input logic PENABLE,
|
||||
output logic [`XLEN-1:0] PRDATA,
|
||||
output logic PREADY,
|
||||
input logic UARTIntr,GPIOIntr,
|
||||
output logic MExtInt, SExtInt
|
||||
input logic PWRITE,
|
||||
input logic PENABLE,
|
||||
output logic [`XLEN-1:0] PRDATA,
|
||||
output logic PREADY,
|
||||
input logic UARTIntr,GPIOIntr,
|
||||
output logic MExtInt, SExtInt
|
||||
);
|
||||
|
||||
logic memwrite, memread;
|
||||
logic [23:0] entry;
|
||||
logic [31:0] Din, Dout;
|
||||
logic memwrite, memread;
|
||||
logic [23:0] entry;
|
||||
logic [31:0] Din, Dout;
|
||||
|
||||
// context-independent signals
|
||||
logic [`N:1] requests;
|
||||
logic [`N:1][2:0] intPriority;
|
||||
logic [`N:1] intInProgress, intPending, nextIntPending;
|
||||
logic [`N:1] requests;
|
||||
logic [`N:1][2:0] intPriority;
|
||||
logic [`N:1] intInProgress, intPending, nextIntPending;
|
||||
|
||||
// context-dependent signals
|
||||
logic [`C-1:0][2:0] intThreshold;
|
||||
logic [`C-1:0][`N:1] intEn;
|
||||
logic [`C-1:0][5:0] intClaim; // ID's are 6 bits if we stay within 63 sources
|
||||
logic [`C-1:0][7:1][`N:1] irqMatrix;
|
||||
logic [`C-1:0][7:1] priorities_with_irqs;
|
||||
logic [`C-1:0][7:1] max_priority_with_irqs;
|
||||
logic [`C-1:0][`N:1] irqs_at_max_priority;
|
||||
logic [`C-1:0][7:1] threshMask;
|
||||
logic [`C-1:0][2:0] intThreshold;
|
||||
logic [`C-1:0][`N:1] intEn;
|
||||
logic [`C-1:0][5:0] intClaim; // ID's are 6 bits if we stay within 63 sources
|
||||
logic [`C-1:0][7:1][`N:1] irqMatrix;
|
||||
logic [`C-1:0][7:1] priorities_with_irqs;
|
||||
logic [`C-1:0][7:1] max_priority_with_irqs;
|
||||
logic [`C-1:0][`N:1] irqs_at_max_priority;
|
||||
logic [`C-1:0][7:1] threshMask;
|
||||
|
||||
// =======
|
||||
// AHB I/O
|
||||
|
@ -128,7 +128,7 @@ module plic_apb (
|
|||
if (memread)
|
||||
casez(entry)
|
||||
24'h000000: Dout <= #1 32'b0; // there is no intPriority[0]
|
||||
24'h0000??: Dout <= #1 {29'b0,intPriority[entry[7:2]]};
|
||||
24'h0000??: Dout <= #1 {29'b0,intPriority[entry[7:2]]};
|
||||
`ifdef PLIC_NUM_SRC_LT_32
|
||||
24'h001000: Dout <= #1 {{(31-`N){1'b0}},intPending,1'b0};
|
||||
24'h002000: Dout <= #1 {{(31-`N){1'b0}},intEn[0],1'b0};
|
||||
|
|
|
@ -30,27 +30,27 @@
|
|||
`define RAM_LATENCY 0
|
||||
|
||||
module ram_ahb #(parameter BASE=0, RANGE = 65535) (
|
||||
input logic HCLK, HRESETn,
|
||||
input logic HSELRam,
|
||||
input logic [`PA_BITS-1:0] HADDR,
|
||||
input logic HWRITE,
|
||||
input logic HREADY,
|
||||
input logic [1:0] HTRANS,
|
||||
input logic [`XLEN-1:0] HWDATA,
|
||||
input logic [`XLEN/8-1:0] HWSTRB,
|
||||
output logic [`XLEN-1:0] HREADRam,
|
||||
output logic HRESPRam, HREADYRam
|
||||
input logic HCLK, HRESETn,
|
||||
input logic HSELRam,
|
||||
input logic [`PA_BITS-1:0] HADDR,
|
||||
input logic HWRITE,
|
||||
input logic HREADY,
|
||||
input logic [1:0] HTRANS,
|
||||
input logic [`XLEN-1:0] HWDATA,
|
||||
input logic [`XLEN/8-1:0] HWSTRB,
|
||||
output logic [`XLEN-1:0] HREADRam,
|
||||
output logic HRESPRam, HREADYRam
|
||||
);
|
||||
|
||||
localparam ADDR_WIDTH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
|
||||
logic [`XLEN/8-1:0] ByteMask;
|
||||
logic [`PA_BITS-1:0] HADDRD, RamAddr;
|
||||
logic initTrans;
|
||||
logic memwrite, memwriteD, memread;
|
||||
logic nextHREADYRam;
|
||||
logic DelayReady;
|
||||
logic [`XLEN/8-1:0] ByteMask;
|
||||
logic [`PA_BITS-1:0] HADDRD, RamAddr;
|
||||
logic initTrans;
|
||||
logic memwrite, memwriteD, memread;
|
||||
logic nextHREADYRam;
|
||||
logic DelayReady;
|
||||
|
||||
// a new AHB transactions starts when HTRANS requests a transaction,
|
||||
// the peripheral is selected, and the previous transaction is completing
|
||||
|
@ -92,13 +92,13 @@ module ram_ahb #(parameter BASE=0, RANGE = 65535) (
|
|||
else CurrState <= #1 NextState;
|
||||
|
||||
always_comb begin
|
||||
case(CurrState)
|
||||
READY: if(initTrans & ~CycleFlag) NextState = DELAY;
|
||||
case(CurrState)
|
||||
READY: if(initTrans & ~CycleFlag) NextState = DELAY;
|
||||
else NextState = READY;
|
||||
DELAY: if(CycleFlag) NextState = READY;
|
||||
else NextState = DELAY;
|
||||
default: NextState = READY;
|
||||
endcase
|
||||
else NextState = DELAY;
|
||||
default: NextState = READY;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign CycleFlag = Cycle == `RAM_LATENCY;
|
||||
|
@ -108,7 +108,6 @@ module ram_ahb #(parameter BASE=0, RANGE = 65535) (
|
|||
end else begin
|
||||
assign DelayReady = 0;
|
||||
end
|
||||
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
|
|
@ -37,19 +37,19 @@
|
|||
/* verilator lint_off UNOPTFLAT */
|
||||
|
||||
module uartPC16550D(
|
||||
// Processor Interface
|
||||
input logic PCLK, PRESETn, // UART clock and active low reset
|
||||
input logic [2:0] A, // address input (8 registers)
|
||||
input logic [7:0] Din, // 8-bit WriteData
|
||||
output logic [7:0] Dout, // 8-bit ReadData
|
||||
input logic MEMRb, MEMWb, // Active low memory read/write
|
||||
output logic INTR, TXRDYb, RXRDYb, // interrupt and ready lines
|
||||
// Clocks
|
||||
output logic BAUDOUTb, // active low baud clock
|
||||
input logic RCLK, // usually BAUDOUTb tied to RCLK externally
|
||||
// E1A Driver
|
||||
input logic SIN, DSRb, DCDb, CTSb, RIb, // UART external serial and flow-control inputs
|
||||
output logic SOUT, RTSb, DTRb, OUT1b, OUT2b // UART external serial and flow-control outputs
|
||||
// Processor Interface
|
||||
input logic PCLK, PRESETn, // UART clock and active low reset
|
||||
input logic [2:0] A, // address input (8 registers)
|
||||
input logic [7:0] Din, // 8-bit WriteData
|
||||
output logic [7:0] Dout, // 8-bit ReadData
|
||||
input logic MEMRb, MEMWb, // Active low memory read/write
|
||||
output logic INTR, TXRDYb, RXRDYb, // interrupt and ready lines
|
||||
// Clocks
|
||||
output logic BAUDOUTb, // active low baud clock
|
||||
input logic RCLK, // usually BAUDOUTb tied to RCLK externally
|
||||
// E1A Driver
|
||||
input logic SIN, DSRb, DCDb, CTSb, RIb, // UART external serial and flow-control inputs
|
||||
output logic SOUT, RTSb, DTRb, OUT1b, OUT2b // UART external serial and flow-control outputs
|
||||
);
|
||||
|
||||
// transmit and receive states
|
||||
|
@ -62,63 +62,63 @@ module uartPC16550D(
|
|||
logic [4:0] MCR;
|
||||
|
||||
// Syncrhonized and delayed UART signals
|
||||
logic SINd, DSRbd, DCDbd, CTSbd, RIbd;
|
||||
logic SINsync, DSRbsync, DCDbsync, CTSbsync, RIbsync;
|
||||
logic DSRb2, DCDb2, CTSb2, RIb2;
|
||||
logic SOUTbit;
|
||||
logic SINd, DSRbd, DCDbd, CTSbd, RIbd;
|
||||
logic SINsync, DSRbsync, DCDbsync, CTSbsync, RIbsync;
|
||||
logic DSRb2, DCDb2, CTSb2, RIb2;
|
||||
logic SOUTbit;
|
||||
|
||||
// Control signals
|
||||
logic loop; // loopback mode
|
||||
logic DLAB; // Divisor Latch Access Bit (LCR bit 7)
|
||||
logic loop; // loopback mode
|
||||
logic DLAB; // Divisor Latch Access Bit (LCR bit 7)
|
||||
|
||||
// Baud and rx/tx timing
|
||||
logic baudpulse, txbaudpulse, rxbaudpulse; // high one system clk cycle each baud/16 period
|
||||
logic baudpulse, txbaudpulse, rxbaudpulse; // high one system clk cycle each baud/16 period
|
||||
logic [16+`UART_PRESCALE-1:0] baudcount;
|
||||
logic [3:0] rxoversampledcnt, txoversampledcnt; // count oversampled-by-16
|
||||
logic [3:0] rxbitsreceived, txbitssent;
|
||||
statetype rxstate, txstate;
|
||||
logic [3:0] rxoversampledcnt, txoversampledcnt; // count oversampled-by-16
|
||||
logic [3:0] rxbitsreceived, txbitssent;
|
||||
statetype rxstate, txstate;
|
||||
|
||||
// shift registrs and FIFOs
|
||||
logic [9:0] rxshiftreg;
|
||||
logic [10:0] rxfifo[15:0];
|
||||
logic [7:0] txfifo[15:0];
|
||||
logic [4:0] rxfifotailunwrapped;
|
||||
logic [3:0] rxfifohead, rxfifotail, txfifohead, txfifotail, rxfifotriggerlevel;
|
||||
logic [3:0] rxfifoentries, txfifoentries;
|
||||
logic [3:0] rxbitsexpected, txbitsexpected;
|
||||
logic [9:0] rxshiftreg;
|
||||
logic [10:0] rxfifo[15:0];
|
||||
logic [7:0] txfifo[15:0];
|
||||
logic [4:0] rxfifotailunwrapped;
|
||||
logic [3:0] rxfifohead, rxfifotail, txfifohead, txfifotail, rxfifotriggerlevel;
|
||||
logic [3:0] rxfifoentries, txfifoentries;
|
||||
logic [3:0] rxbitsexpected, txbitsexpected;
|
||||
|
||||
// receive data
|
||||
logic [10:0] RXBR;
|
||||
logic [9:0] rxtimeoutcnt;
|
||||
logic rxcentered;
|
||||
logic rxparity, rxparitybit, rxstopbit;
|
||||
logic rxparityerr, rxoverrunerr, rxframingerr, rxbreak, rxfifohaserr;
|
||||
logic rxdataready;
|
||||
logic rxfifoempty, rxfifotriggered, rxfifotimeout;
|
||||
logic rxfifodmaready;
|
||||
logic [8:0] rxdata9;
|
||||
logic [7:0] rxdata;
|
||||
logic [15:0] RXerrbit, rxfullbit;
|
||||
logic [31:0] rxfullbitunwrapped;
|
||||
logic [10:0] RXBR;
|
||||
logic [9:0] rxtimeoutcnt;
|
||||
logic rxcentered;
|
||||
logic rxparity, rxparitybit, rxstopbit;
|
||||
logic rxparityerr, rxoverrunerr, rxframingerr, rxbreak, rxfifohaserr;
|
||||
logic rxdataready;
|
||||
logic rxfifoempty, rxfifotriggered, rxfifotimeout;
|
||||
logic rxfifodmaready;
|
||||
logic [8:0] rxdata9;
|
||||
logic [7:0] rxdata;
|
||||
logic [15:0] RXerrbit, rxfullbit;
|
||||
logic [31:0] rxfullbitunwrapped;
|
||||
|
||||
// transmit data
|
||||
logic [7:0] TXHR, nexttxdata;
|
||||
logic [11:0] txdata, txsr;
|
||||
logic txnextbit, txhrfull, txsrfull;
|
||||
logic txparity;
|
||||
logic txfifoempty, txfifofull, txfifodmaready;
|
||||
logic [7:0] TXHR, nexttxdata;
|
||||
logic [11:0] txdata, txsr;
|
||||
logic txnextbit, txhrfull, txsrfull;
|
||||
logic txparity;
|
||||
logic txfifoempty, txfifofull, txfifodmaready;
|
||||
|
||||
// control signals
|
||||
logic fifoenabled, fifodmamodesel, evenparitysel;
|
||||
logic fifoenabled, fifodmamodesel, evenparitysel;
|
||||
|
||||
// interrupts
|
||||
logic RXerr, RXerrIP, squashRXerrIP, prevSquashRXerrIP, setSquashRXerrIP, resetSquashRXerrIP;
|
||||
logic THRE, THRE_IP, squashTHRE_IP, prevSquashTHRE_IP, setSquashTHRE_IP, resetSquashTHRE_IP;
|
||||
logic rxdataavailintr, modemstatusintr, intrpending;
|
||||
logic [2:0] intrID;
|
||||
logic RXerr, RXerrIP, squashRXerrIP, prevSquashRXerrIP, setSquashRXerrIP, resetSquashRXerrIP;
|
||||
logic THRE, THRE_IP, squashTHRE_IP, prevSquashTHRE_IP, setSquashTHRE_IP, resetSquashTHRE_IP;
|
||||
logic rxdataavailintr, modemstatusintr, intrpending;
|
||||
logic [2:0] intrID;
|
||||
|
||||
logic baudpulseComb;
|
||||
logic HeadPointerLastMove;
|
||||
logic baudpulseComb;
|
||||
logic HeadPointerLastMove;
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Input synchronization: 2-stage synchronizer
|
||||
|
@ -126,7 +126,7 @@ module uartPC16550D(
|
|||
always_ff @(posedge PCLK) begin
|
||||
{SINd, DSRbd, DCDbd, CTSbd, RIbd} <= #1 {SIN, DSRb, DCDb, CTSb, RIb};
|
||||
{SINsync, DSRbsync, DCDbsync, CTSbsync, RIbsync} <= #1 loop ? {SOUTbit, ~MCR[0], ~MCR[3], ~MCR[1], ~MCR[2]} :
|
||||
{SINd, DSRbd, DCDbd, CTSbd, RIbd}; // syncrhonized signals, handle loopback testing
|
||||
{SINd, DSRbd, DCDbd, CTSbd, RIbd}; // syncrhonized signals, handle loopback testing
|
||||
{DSRb2, DCDb2, CTSb2, RIb2} <= #1 {DSRbsync, DCDbsync, CTSbsync, RIbsync}; // for detecting state changes
|
||||
end
|
||||
|
||||
|
@ -141,8 +141,8 @@ module uartPC16550D(
|
|||
MCR <= #1 5'b0;
|
||||
LSR <= #1 8'b01100000;
|
||||
MSR <= #1 4'b0;
|
||||
DLL <= #1 8'd1; // this cannot be zero with DLM also zer0.
|
||||
DLM <= #1 8'b0;
|
||||
DLL <= #1 8'd1; // this cannot be zero with DLM also zer0.
|
||||
DLM <= #1 8'b0;
|
||||
SCR <= #1 8'b0; // not strictly necessary to reset
|
||||
end else begin
|
||||
if (~MEMWb) begin
|
||||
|
@ -367,7 +367,7 @@ module uartPC16550D(
|
|||
end
|
||||
|
||||
///////////////////////////////////////////
|
||||
// transmit timing and control
|
||||
// transmit timing and control
|
||||
///////////////////////////////////////////
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) begin
|
||||
|
@ -455,20 +455,20 @@ module uartPC16550D(
|
|||
end
|
||||
|
||||
always_ff @(posedge PCLK, negedge PRESETn) begin
|
||||
// special condition to check if the fifo is empty or full. Because the head
|
||||
// pointer indicates where the next write goes and not the location of the
|
||||
// current head, the head and tail pointer being equal imply two different
|
||||
// things. First it could mean the fifo is empty and second it could mean
|
||||
// the fifo is full. To differenciate we need to know which pointer moved
|
||||
// to cause them to be equal. If the head pointer moved then it is full.
|
||||
// If the tail pointer moved then it is empty. it resets to empty so
|
||||
// if reset with the tail pointer indicating the last update.
|
||||
if(~PRESETn)
|
||||
HeadPointerLastMove <= 1'b0;
|
||||
else if(fifoenabled & ~MEMWb & A == 3'b000 & ~DLAB)
|
||||
HeadPointerLastMove <= 1'b1;
|
||||
else if(fifoenabled & ~txfifoempty & ~txsrfull & txstate == UART_IDLE)
|
||||
HeadPointerLastMove <= 1'b0;
|
||||
// special condition to check if the fifo is empty or full. Because the head
|
||||
// pointer indicates where the next write goes and not the location of the
|
||||
// current head, the head and tail pointer being equal imply two different
|
||||
// things. First it could mean the fifo is empty and second it could mean
|
||||
// the fifo is full. To differenciate we need to know which pointer moved
|
||||
// to cause them to be equal. If the head pointer moved then it is full.
|
||||
// If the tail pointer moved then it is empty. it resets to empty so
|
||||
// if reset with the tail pointer indicating the last update.
|
||||
if(~PRESETn)
|
||||
HeadPointerLastMove <= 1'b0;
|
||||
else if(fifoenabled & ~MEMWb & A == 3'b000 & ~DLAB)
|
||||
HeadPointerLastMove <= 1'b1;
|
||||
else if(fifoenabled & ~txfifoempty & ~txsrfull & txstate == UART_IDLE)
|
||||
HeadPointerLastMove <= 1'b0;
|
||||
end
|
||||
|
||||
assign txfifoempty = (txfifohead == txfifotail) & ~HeadPointerLastMove;
|
||||
|
@ -477,7 +477,7 @@ module uartPC16550D(
|
|||
(txfifohead + 16 - txfifotail);
|
||||
// verilator lint_on WIDTH
|
||||
//assign txfifofull = (txfifoentries == 4'b1111);
|
||||
assign txfifofull = (txfifohead == txfifotail) & HeadPointerLastMove;
|
||||
assign txfifofull = (txfifohead == txfifotail) & HeadPointerLastMove;
|
||||
|
||||
// transmit buffer ready bit
|
||||
always_ff @(posedge PCLK, negedge PRESETn) // track txrdy for DMA mode (FCR3 = FCR0 = 1)
|
||||
|
|
|
@ -31,58 +31,58 @@
|
|||
|
||||
module uncore (
|
||||
// AHB Bus Interface
|
||||
input logic HCLK, HRESETn,
|
||||
input logic TIMECLK,
|
||||
input logic HCLK, HRESETn,
|
||||
input logic TIMECLK,
|
||||
input logic [`PA_BITS-1:0] HADDR,
|
||||
input logic [`AHBW-1:0] HWDATA,
|
||||
input logic [`XLEN/8-1:0] HWSTRB,
|
||||
input logic HWRITE,
|
||||
input logic [2:0] HSIZE,
|
||||
input logic [2:0] HBURST,
|
||||
input logic [3:0] HPROT,
|
||||
input logic [1:0] HTRANS,
|
||||
input logic HMASTLOCK,
|
||||
input logic [`AHBW-1:0] HRDATAEXT,
|
||||
input logic HREADYEXT, HRESPEXT,
|
||||
output logic [`AHBW-1:0] HRDATA,
|
||||
output logic HREADY, HRESP,
|
||||
output logic HSELEXT,
|
||||
// peripheral pins
|
||||
output logic MTimerInt, MSwInt, // Timer and software interrupts from CLINT
|
||||
output logic MExtInt, SExtInt, // External interrupts from PLIC
|
||||
output logic [63:0] MTIME_CLINT, // MTIME, from CLINT
|
||||
input logic [31:0] GPIOPinsIn, // GPIO pin input value
|
||||
output logic [31:0] GPIOPinsOut, GPIOPinsEn, // GPIO pin output value and enable
|
||||
input logic UARTSin, // UART serial input
|
||||
output logic UARTSout, // UART serial output
|
||||
output logic SDCCmdOut, // SD Card command output
|
||||
output logic SDCCmdOE, // SD Card command output enable
|
||||
input logic SDCCmdIn, // SD Card command input
|
||||
input logic [3:0] SDCDatIn, // SD Card data input
|
||||
output logic SDCCLK // SD Card clock
|
||||
input logic [`AHBW-1:0] HWDATA,
|
||||
input logic [`XLEN/8-1:0] HWSTRB,
|
||||
input logic HWRITE,
|
||||
input logic [2:0] HSIZE,
|
||||
input logic [2:0] HBURST,
|
||||
input logic [3:0] HPROT,
|
||||
input logic [1:0] HTRANS,
|
||||
input logic HMASTLOCK,
|
||||
input logic [`AHBW-1:0] HRDATAEXT,
|
||||
input logic HREADYEXT, HRESPEXT,
|
||||
output logic [`AHBW-1:0] HRDATA,
|
||||
output logic HREADY, HRESP,
|
||||
output logic HSELEXT,
|
||||
// peripheral pins
|
||||
output logic MTimerInt, MSwInt, // Timer and software interrupts from CLINT
|
||||
output logic MExtInt, SExtInt, // External interrupts from PLIC
|
||||
output logic [63:0] MTIME_CLINT, // MTIME, from CLINT
|
||||
input logic [31:0] GPIOPinsIn, // GPIO pin input value
|
||||
output logic [31:0] GPIOPinsOut, GPIOPinsEn, // GPIO pin output value and enable
|
||||
input logic UARTSin, // UART serial input
|
||||
output logic UARTSout, // UART serial output
|
||||
output logic SDCCmdOut, // SD Card command output
|
||||
output logic SDCCmdOE, // SD Card command output enable
|
||||
input logic SDCCmdIn, // SD Card command input
|
||||
input logic [3:0] SDCDatIn, // SD Card data input
|
||||
output logic SDCCLK // SD Card clock
|
||||
);
|
||||
|
||||
logic [`XLEN-1:0] HREADRam, HREADSDC;
|
||||
logic [`XLEN-1:0] HREADRam, HREADSDC;
|
||||
|
||||
logic [10:0] HSELRegions;
|
||||
logic HSELDTIM, HSELIROM, HSELRam, HSELCLINT, HSELPLIC, HSELGPIO, HSELUART, HSELSDC;
|
||||
logic HSELDTIMD, HSELIROMD, HSELEXTD, HSELRamD, HSELCLINTD, HSELPLICD, HSELGPIOD, HSELUARTD, HSELSDCD;
|
||||
logic HRESPRam, HRESPSDC;
|
||||
logic HREADYRam, HRESPSDCD;
|
||||
logic [`XLEN-1:0] HREADBootRom;
|
||||
logic HSELBootRom, HSELBootRomD, HRESPBootRom, HREADYBootRom, HREADYSDC;
|
||||
logic HSELNoneD;
|
||||
logic UARTIntr,GPIOIntr;
|
||||
logic SDCIntM;
|
||||
logic [10:0] HSELRegions;
|
||||
logic HSELDTIM, HSELIROM, HSELRam, HSELCLINT, HSELPLIC, HSELGPIO, HSELUART, HSELSDC;
|
||||
logic HSELDTIMD, HSELIROMD, HSELEXTD, HSELRamD, HSELCLINTD, HSELPLICD, HSELGPIOD, HSELUARTD, HSELSDCD;
|
||||
logic HRESPRam, HRESPSDC;
|
||||
logic HREADYRam, HRESPSDCD;
|
||||
logic [`XLEN-1:0] HREADBootRom;
|
||||
logic HSELBootRom, HSELBootRomD, HRESPBootRom, HREADYBootRom, HREADYSDC;
|
||||
logic HSELNoneD;
|
||||
logic UARTIntr,GPIOIntr;
|
||||
logic SDCIntM;
|
||||
|
||||
logic PCLK, PRESETn, PWRITE, PENABLE;
|
||||
logic [3:0] PSEL, PREADY;
|
||||
logic [31:0] PADDR;
|
||||
logic [`XLEN-1:0] PWDATA;
|
||||
logic [`XLEN/8-1:0] PSTRB;
|
||||
logic [3:0][`XLEN-1:0] PRDATA;
|
||||
logic [`XLEN-1:0] HREADBRIDGE;
|
||||
logic HRESPBRIDGE, HREADYBRIDGE, HSELBRIDGE, HSELBRIDGED;
|
||||
logic PCLK, PRESETn, PWRITE, PENABLE;
|
||||
logic [3:0] PSEL, PREADY;
|
||||
logic [31:0] PADDR;
|
||||
logic [`XLEN-1:0] PWDATA;
|
||||
logic [`XLEN/8-1:0] PSTRB;
|
||||
logic [3:0][`XLEN-1:0] PRDATA;
|
||||
logic [`XLEN-1:0] HREADBRIDGE;
|
||||
logic HRESPBRIDGE, HREADYBRIDGE, HSELBRIDGE, HSELBRIDGED;
|
||||
|
||||
// Determine which region of physical memory (if any) is being accessed
|
||||
// Use a trimmed down portion of the PMA checker - only the address decoders
|
||||
|
@ -153,7 +153,7 @@ module uncore (
|
|||
// sdc interface
|
||||
.SDCCmdOut, .SDCCmdIn, .SDCCmdOE, .SDCDatIn, .SDCCLK,
|
||||
// interrupt to PLIC
|
||||
.SDCIntM
|
||||
.SDCIntM
|
||||
);
|
||||
end else begin : sdc
|
||||
assign SDCCLK = 0;
|
||||
|
@ -163,22 +163,22 @@ module uncore (
|
|||
|
||||
// AHB Read Multiplexer
|
||||
assign HRDATA = ({`XLEN{HSELRamD}} & HREADRam) |
|
||||
({`XLEN{HSELEXTD}} & HRDATAEXT) |
|
||||
({`XLEN{HSELEXTD}} & HRDATAEXT) |
|
||||
({`XLEN{HSELBRIDGED}} & HREADBRIDGE) |
|
||||
({`XLEN{HSELBootRomD}} & HREADBootRom) |
|
||||
({`XLEN{HSELSDCD}} & HREADSDC);
|
||||
|
||||
assign HRESP = HSELRamD & HRESPRam |
|
||||
HSELEXTD & HRESPEXT |
|
||||
HSELEXTD & HRESPEXT |
|
||||
HSELBRIDGE & HRESPBRIDGE |
|
||||
HSELBootRomD & HRESPBootRom |
|
||||
HSELSDC & HRESPSDC;
|
||||
HSELSDC & HRESPSDC;
|
||||
|
||||
assign HREADY = HSELRamD & HREADYRam |
|
||||
HSELEXTD & HREADYEXT |
|
||||
HSELEXTD & HREADYEXT |
|
||||
HSELBRIDGED & HREADYBRIDGE |
|
||||
HSELBootRomD & HREADYBootRom |
|
||||
HSELSDCD & HREADYSDC |
|
||||
HSELSDCD & HREADYSDC |
|
||||
HSELNoneD; // don't lock up the bus if no region is being accessed
|
||||
|
||||
// Address Decoder Delay (figure 4-2 in spec)
|
||||
|
|
|
@ -35,12 +35,12 @@ module wallypipelinedcore (
|
|||
input logic MTimerInt, MExtInt, SExtInt, MSwInt,
|
||||
input logic [63:0] MTIME_CLINT,
|
||||
// Bus Interface
|
||||
input logic [`AHBW-1:0] HRDATA,
|
||||
input logic [`AHBW-1:0] HRDATA,
|
||||
input logic HREADY, HRESP,
|
||||
output logic HCLK, HRESETn,
|
||||
output logic [`PA_BITS-1:0] HADDR,
|
||||
output logic [`AHBW-1:0] HWDATA,
|
||||
output logic [`XLEN/8-1:0] HWSTRB,
|
||||
output logic [`PA_BITS-1:0] HADDR,
|
||||
output logic [`AHBW-1:0] HWDATA,
|
||||
output logic [`XLEN/8-1:0] HWSTRB,
|
||||
output logic HWRITE,
|
||||
output logic [2:0] HSIZE,
|
||||
output logic [2:0] HBURST,
|
||||
|
@ -58,17 +58,17 @@ module wallypipelinedcore (
|
|||
logic IntDivE, W64E;
|
||||
logic CSRReadM, CSRWriteM, PrivilegedM;
|
||||
logic [1:0] AtomicM;
|
||||
logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE;
|
||||
logic [`XLEN-1:0] SrcAM;
|
||||
logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE;
|
||||
logic [`XLEN-1:0] SrcAM;
|
||||
logic [2:0] Funct3E;
|
||||
logic [31:0] InstrD;
|
||||
logic [31:0] InstrM, InstrOrigM;
|
||||
logic [`XLEN-1:0] PCSpillF, PCE, PCLinkE;
|
||||
logic [`XLEN-1:0] PCM;
|
||||
logic [`XLEN-1:0] CSRReadValW, MDUResultW;
|
||||
logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF;
|
||||
logic [1:0] MemRWM;
|
||||
logic InstrValidD, InstrValidE, InstrValidM;
|
||||
logic [31:0] InstrM, InstrOrigM;
|
||||
logic [`XLEN-1:0] PCSpillF, PCE, PCLinkE;
|
||||
logic [`XLEN-1:0] PCM;
|
||||
logic [`XLEN-1:0] CSRReadValW, MDUResultW;
|
||||
logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF;
|
||||
logic [1:0] MemRWM;
|
||||
logic InstrValidD, InstrValidE, InstrValidM;
|
||||
logic InstrMisalignedFaultM;
|
||||
logic IllegalBaseInstrD, IllegalFPUInstrD, IllegalIEUFPUInstrD;
|
||||
logic InstrPageFaultF, LoadPageFaultM, StoreAmoPageFaultM;
|
||||
|
@ -86,8 +86,8 @@ module wallypipelinedcore (
|
|||
logic [4:0] RdE, RdM, RdW;
|
||||
logic FPUStallD;
|
||||
logic FWriteIntE;
|
||||
logic [`FLEN-1:0] FWriteDataM;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic [`FLEN-1:0] FWriteDataM;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic [`XLEN-1:0] FCvtIntResW;
|
||||
logic FCvtIntW;
|
||||
logic FDivBusyE;
|
||||
|
@ -95,22 +95,22 @@ module wallypipelinedcore (
|
|||
logic FCvtIntStallD;
|
||||
logic FpLoadStoreM;
|
||||
logic [4:0] SetFflagsM;
|
||||
logic [`XLEN-1:0] FIntDivResultW;
|
||||
logic [`XLEN-1:0] FIntDivResultW;
|
||||
|
||||
// memory management unit signals
|
||||
logic ITLBWriteF;
|
||||
logic ITLBMissF;
|
||||
logic [`XLEN-1:0] SATP_REGW;
|
||||
logic [`XLEN-1:0] SATP_REGW;
|
||||
logic STATUS_MXR, STATUS_SUM, STATUS_MPRV;
|
||||
logic [1:0] STATUS_MPP, STATUS_FS;
|
||||
logic [1:0] STATUS_MPP, STATUS_FS;
|
||||
logic [1:0] PrivilegeModeW;
|
||||
logic [`XLEN-1:0] PTE;
|
||||
logic [`XLEN-1:0] PTE;
|
||||
logic [1:0] PageType;
|
||||
logic sfencevmaM, WFIStallM;
|
||||
logic SelHPTW;
|
||||
|
||||
// PMA checker signals
|
||||
var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0];
|
||||
var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0];
|
||||
var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0];
|
||||
|
||||
// IMem stalls
|
||||
|
@ -119,14 +119,14 @@ module wallypipelinedcore (
|
|||
|
||||
// cpu lsu interface
|
||||
logic [2:0] Funct3M;
|
||||
logic [`XLEN-1:0] IEUAdrE;
|
||||
logic [`XLEN-1:0] WriteDataM;
|
||||
logic [`XLEN-1:0] IEUAdrM;
|
||||
logic [`LLEN-1:0] ReadDataW;
|
||||
logic [`XLEN-1:0] IEUAdrE;
|
||||
logic [`XLEN-1:0] WriteDataM;
|
||||
logic [`XLEN-1:0] IEUAdrM;
|
||||
logic [`LLEN-1:0] ReadDataW;
|
||||
logic CommittedM;
|
||||
|
||||
// AHB ifu interface
|
||||
logic [`PA_BITS-1:0] IFUHADDR;
|
||||
logic [`PA_BITS-1:0] IFUHADDR;
|
||||
logic [2:0] IFUHBURST;
|
||||
logic [1:0] IFUHTRANS;
|
||||
logic [2:0] IFUHSIZE;
|
||||
|
@ -134,9 +134,9 @@ module wallypipelinedcore (
|
|||
logic IFUHREADY;
|
||||
|
||||
// AHB LSU interface
|
||||
logic [`PA_BITS-1:0] LSUHADDR;
|
||||
logic [`XLEN-1:0] LSUHWDATA;
|
||||
logic [`XLEN/8-1:0] LSUHWSTRB;
|
||||
logic [`PA_BITS-1:0] LSUHADDR;
|
||||
logic [`XLEN-1:0] LSUHWDATA;
|
||||
logic [`XLEN/8-1:0] LSUHWSTRB;
|
||||
logic LSUHWRITE;
|
||||
logic LSUHREADY;
|
||||
|
||||
|
@ -160,8 +160,8 @@ module wallypipelinedcore (
|
|||
logic BigEndianM;
|
||||
logic FCvtIntE;
|
||||
logic CommittedF;
|
||||
logic BranchD, BranchE, JumpD, JumpE;
|
||||
logic DCacheStallM, ICacheStallF;
|
||||
logic BranchD, BranchE, JumpD, JumpE;
|
||||
logic DCacheStallM, ICacheStallF;
|
||||
|
||||
// instruction fetch unit: PC, branch prediction, instruction cache
|
||||
ifu ifu(.clk, .reset,
|
||||
|
@ -247,20 +247,10 @@ module wallypipelinedcore (
|
|||
ebu ebu(// IFU connections
|
||||
.clk, .reset,
|
||||
// IFU interface
|
||||
.IFUHADDR,
|
||||
.IFUHBURST,
|
||||
.IFUHTRANS,
|
||||
.IFUHREADY,
|
||||
.IFUHSIZE,
|
||||
.IFUHADDR, .IFUHBURST, .IFUHTRANS, .IFUHREADY, .IFUHSIZE,
|
||||
// LSU interface
|
||||
.LSUHADDR,
|
||||
.LSUHWDATA,
|
||||
.LSUHWSTRB,
|
||||
.LSUHSIZE,
|
||||
.LSUHBURST,
|
||||
.LSUHTRANS,
|
||||
.LSUHWRITE,
|
||||
.LSUHREADY,
|
||||
.LSUHADDR, .LSUHWDATA, .LSUHWSTRB, .LSUHSIZE, .LSUHBURST,
|
||||
.LSUHTRANS, .LSUHWRITE, .LSUHREADY,
|
||||
// BUS interface
|
||||
.HREADY, .HRESP, .HCLK, .HRESETn,
|
||||
.HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue