Changed away from specific benchmark test case to the simple test case. Added generics to the simple testcase to be able to control performance features. Updated the GHDL scripts to accept more than 1 command line parameter. Updated the performance test makefiles to call the simple test bench with appropriate generics and GHDL settings

This commit is contained in:
Mikael Mortensen 2024-03-17 14:11:09 +00:00
parent 4e49b898e1
commit 64ce75aba8
15 changed files with 10301 additions and 912 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,43 +0,0 @@
#!/usr/bin/env bash
set -e
cd $(dirname "$0")
echo "Tip: Compile application with USER_FLAGS+=-DUART[0/1]_SIM_MODE to auto-enable UART[0/1]'s simulation mode (redirect UART output to simulator console)."
# Prepare simulation output files for UART0 and UART 1
# - Testbench receiver log file (neorv32.testbench_uart?.out)
# - Direct simulation output (neorv32.uart?.sim_mode.text.out)
for uart in 0 1; do
for item in \
testbench_uart"$uart" \
uart"$uart".sim_mode.text; do
touch neorv32."$item".out
chmod 777 neorv32."$item".out
done
done
GHDL="${GHDL:-ghdl}"
$GHDL -m --std=08 --work=neorv32 --workdir=build neorv32_tb_benchmark
if [ -z "$1" ]
then
GHDL_RUN_ARGS="${@:---stop-time=10ms}"
else
GHDL_RUN_ARGS=$1
fi
echo "Using simulation run arguments: $GHDL_RUN_ARGS";
runcmd="$GHDL -r --std=08 --work=neorv32 --workdir=build neorv32_tb_benchmark \
--max-stack-alloc=0 \
--ieee-asserts=disable \
--assert-level=error $GHDL_RUN_ARGS"
if [ -n "$GHDL_DEVNULL" ]; then
$runcmd >> /dev/null
else
$runcmd
fi

View file

@ -1,18 +0,0 @@
#!/usr/bin/env bash
set -e
cd $(dirname "$0")
NEORV32_LOCAL_RTL=${NEORV32_LOCAL_RTL:-../../rtl}
mkdir -p build
ghdl -i --std=08 --work=neorv32 --workdir=build \
"$NEORV32_LOCAL_RTL"/core/*.vhd \
"$NEORV32_LOCAL_RTL"/core/mem/*.vhd \
"$NEORV32_LOCAL_RTL"/processor_templates/*.vhd \
"$NEORV32_LOCAL_RTL"/system_integration/*.vhd \
"$NEORV32_LOCAL_RTL"/test_setups/*.vhd \
neorv32_tb.benchmark.vhd \
uart_rx.simple.vhd

View file

@ -1,9 +0,0 @@
#!/usr/bin/env bash
# Abort if any command returns != 0
set -e
cd $(dirname "$0")
./ghdl.setup.sh
./ghdl.run.sh $1

View file

@ -1,612 +0,0 @@
-- #################################################################################################
-- # << NEORV32 - Default Processor Testbench >> #
-- # ********************************************************************************************* #
-- # The processor is configured to use a maximum of functional units (for testing purpose). #
-- # Use the "User Configuration" section to configure the testbench according to your needs. #
-- # See NEORV32 data sheet for more information. #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
-- # Copyright (c) 2024, Stephan Nolting. All rights reserved. #
-- # #
-- # Redistribution and use in source and binary forms, with or without modification, are #
-- # permitted provided that the following conditions are met: #
-- # #
-- # 1. Redistributions of source code must retain the above copyright notice, this list of #
-- # conditions and the following disclaimer. #
-- # #
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of #
-- # conditions and the following disclaimer in the documentation and/or other materials #
-- # provided with the distribution. #
-- # #
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to #
-- # endorse or promote products derived from this software without specific prior written #
-- # permission. #
-- # #
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS #
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF #
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED #
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED #
-- # OF THE POSSIBILITY OF SUCH DAMAGE. #
-- # ********************************************************************************************* #
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting #
-- #################################################################################################
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
library ieee;
use std.env.finish;
library neorv32;
use neorv32.neorv32_package.all;
use neorv32.neorv32_application_image.all; -- this file is generated by the image generator
use std.textio.all;
entity neorv32_tb_benchmark is
end neorv32_tb_benchmark;
architecture neorv32_tb_benchmark_rtl of neorv32_tb_benchmark is
-- User Configuration ---------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- general --
constant int_imem_c : boolean := true; -- true: use proc-internal IMEM, false: use external simulated IMEM (ext. mem A)
constant int_dmem_c : boolean := true; -- true: use proc-internal DMEM, false: use external simulated DMEM (ext. mem B)
constant imem_size_c : natural := 128*1024; -- size in bytes of processor-internal IMEM / external mem A
constant dmem_size_c : natural := 8*1024; -- size in bytes of processor-internal DMEM / external mem B
constant f_clock_c : natural := 100000000; -- main clock in Hz
constant baud0_rate_c : natural := 19200; -- simulation UART0 (primary UART) baud rate
constant baud1_rate_c : natural := 19200; -- simulation UART1 (secondary UART) baud rate
constant icache_en_c : boolean := false; -- implement i-cache
constant icache_block_size_c : natural := 64; -- i-cache block size in bytes
-- simulated external Wishbone memory A (can be used as external IMEM) --
constant ext_mem_a_base_addr_c : std_ulogic_vector(31 downto 0) := x"00000000"; -- wishbone memory base address (external IMEM base)
constant ext_mem_a_size_c : natural := imem_size_c; -- wishbone memory size in bytes
constant ext_mem_a_latency_c : natural := 8; -- latency in clock cycles (min 1, max 255), plus 1 cycle initial delay
-- simulated external Wishbone memory B (can be used as external DMEM) --
constant ext_mem_b_base_addr_c : std_ulogic_vector(31 downto 0) := x"80000000"; -- wishbone memory base address (external DMEM base)
constant ext_mem_b_size_c : natural := dmem_size_c; -- wishbone memory size in bytes
constant ext_mem_b_latency_c : natural := 8; -- latency in clock cycles (min 1, max 255), plus 1 cycle initial delay
-- simulated external Wishbone memory C (can be used to simulate external IO access) --
constant ext_mem_c_base_addr_c : std_ulogic_vector(31 downto 0) := x"F0000000"; -- wishbone memory base address (default begin of EXTERNAL IO area)
constant ext_mem_c_size_c : natural := icache_block_size_c/2; -- wishbone memory size in bytes, should be smaller than an iCACHE block
constant ext_mem_c_latency_c : natural := 128; -- latency in clock cycles (min 1, max 255), plus 1 cycle initial delay
-- simulation interrupt trigger --
constant irq_trigger_base_addr_c : std_ulogic_vector(31 downto 0) := x"FF000000";
-- -------------------------------------------------------------------------------------------
-- internals - hands off! --
constant uart0_baud_val_c : real := real(f_clock_c) / real(baud0_rate_c);
constant uart1_baud_val_c : real := real(f_clock_c) / real(baud1_rate_c);
constant t_clock_c : time := (1 sec) / f_clock_c;
-- generators --
signal clk_gen, rst_gen : std_ulogic := '0';
-- text.io --
file file_uart0_tx_out : text open write_mode is "neorv32.testbench_uart0.out";
-- uart --
signal uart0_txd, uart1_txd : std_ulogic;
signal uart0_cts, uart1_cts : std_ulogic;
-- gpio --
signal gpio : std_ulogic_vector(63 downto 0);
-- twi --
signal twi_scl, twi_sda : std_logic;
signal twi_scl_i, twi_scl_o, twi_sda_i, twi_sda_o : std_ulogic;
-- 1-wire --
signal onewire : std_logic;
signal onewire_i, onewire_o : std_ulogic;
-- spi & sdi --
signal spi_csn: std_ulogic_vector(7 downto 0);
signal spi_di, spi_do, spi_clk : std_ulogic;
signal sdi_di, sdi_do, sdi_clk, sdi_csn : std_ulogic;
-- irq --
signal msi_ring, mei_ring : std_ulogic;
-- SLINK echo --
signal slink_dat : std_ulogic_vector(31 downto 0);
signal slink_val : std_ulogic;
signal slink_lst : std_ulogic;
signal slink_rdy : std_ulogic;
-- Wishbone bus --
type wishbone_t is record
addr : std_ulogic_vector(31 downto 0); -- address
wdata : std_ulogic_vector(31 downto 0); -- master write data
rdata : std_ulogic_vector(31 downto 0); -- master read data
we : std_ulogic; -- write enable
sel : std_ulogic_vector(03 downto 0); -- byte enable
stb : std_ulogic; -- strobe
cyc : std_ulogic; -- valid cycle
ack : std_ulogic; -- transfer acknowledge
err : std_ulogic; -- transfer error
tag : std_ulogic_vector(02 downto 0); -- request tag
end record;
signal wb_cpu, wb_mem_a, wb_mem_b, wb_mem_c, wb_irq : wishbone_t;
-- Wishbone access latency type --
type ext_mem_read_latency_t is array (0 to 255) of std_ulogic_vector(31 downto 0);
-- simulated external memory c (IO) --
signal ext_ram_c : mem32_t(0 to ext_mem_c_size_c/4-1); -- uninitialized, used to simulate external IO
-- simulated external memory bus feedback type --
type ext_mem_t is record
rdata : ext_mem_read_latency_t;
acc_en : std_ulogic;
ack : std_ulogic_vector(255 downto 0);
end record;
signal ext_mem_a, ext_mem_b, ext_mem_c : ext_mem_t;
begin
-- Clock/Reset Generator ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
clk_gen <= not clk_gen after (t_clock_c/2);
rst_gen <= '0', '1' after 60*(t_clock_c/2);
-- The Core of the Problem ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_top_inst: neorv32_top
generic map (
-- General --
CLOCK_FREQUENCY => f_clock_c, -- clock frequency of clk_i in Hz
CLOCK_GATING_EN => true, -- enable clock gating when in sleep mode
HART_ID => x"00000000", -- hardware thread ID
VENDOR_ID => x"00000000", -- vendor's JEDEC ID
INT_BOOTLOADER_EN => false, -- boot configuration: true = boot explicit bootloader; false = boot from int/ext (I)MEM
-- On-Chip Debugger (OCD) --
ON_CHIP_DEBUGGER_EN => true, -- implement on-chip debugger
-- RISC-V CPU Extensions --
CPU_EXTENSION_RISCV_A => true, -- implement atomic memory operations extension?
CPU_EXTENSION_RISCV_B => true, -- implement bit-manipulation extension?
CPU_EXTENSION_RISCV_C => false, -- implement compressed extension?
CPU_EXTENSION_RISCV_E => false, -- implement embedded RF extension?
CPU_EXTENSION_RISCV_M => true, -- implement mul/div extension?
CPU_EXTENSION_RISCV_U => false, -- implement user mode extension?
CPU_EXTENSION_RISCV_Zfinx => true, -- implement 32-bit floating-point extension (using INT reg!)
CPU_EXTENSION_RISCV_Zicntr => true, -- implement base counters?
CPU_EXTENSION_RISCV_Zicond => true, -- implement integer conditional operations?
CPU_EXTENSION_RISCV_Zihpm => true, -- implement hardware performance monitors?
CPU_EXTENSION_RISCV_Zmmul => false, -- implement multiply-only M sub-extension?
CPU_EXTENSION_RISCV_Zxcfu => true, -- implement custom (instr.) functions unit?
-- Extension Options --
FAST_MUL_EN => true, -- use DSPs for M extension's multiplier
FAST_SHIFT_EN => true, -- use barrel shifter for shift operations
REGFILE_HW_RST => false, -- no hardware reset
-- Physical Memory Protection (PMP) --
PMP_NUM_REGIONS => 0, -- number of regions (0..16)
PMP_MIN_GRANULARITY => 4, -- minimal region granularity in bytes, has to be a power of 2, min 4 bytes
PMP_TOR_MODE_EN => true, -- implement TOR mode
PMP_NAP_MODE_EN => true, -- implement NAPOT/NA4 mode
-- Hardware Performance Monitors (HPM) --
HPM_NUM_CNTS => 12, -- number of implemented HPM counters (0..29)
HPM_CNT_WIDTH => 40, -- total size of HPM counters (0..64)
-- Atomic Memory Access - Reservation Set Granularity --
AMO_RVS_GRANULARITY => 4, -- size in bytes, has to be a power of 2, min 4
-- Internal Instruction memory --
MEM_INT_IMEM_EN => int_imem_c , -- implement processor-internal instruction memory
MEM_INT_IMEM_SIZE => imem_size_c, -- size of processor-internal instruction memory in bytes
-- Internal Data memory --
MEM_INT_DMEM_EN => int_dmem_c, -- implement processor-internal data memory
MEM_INT_DMEM_SIZE => dmem_size_c, -- size of processor-internal data memory in bytes
-- Internal Cache memory --
ICACHE_EN => icache_en_c, -- implement instruction cache
ICACHE_NUM_BLOCKS => 8, -- i-cache: number of blocks (min 2), has to be a power of 2
ICACHE_BLOCK_SIZE => icache_block_size_c, -- i-cache: block size in bytes (min 4), has to be a power of 2
ICACHE_ASSOCIATIVITY => 2, -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2
-- Internal Data Cache (dCACHE) --
DCACHE_EN => false, -- implement data cache
DCACHE_NUM_BLOCKS => 8, -- d-cache: number of blocks (min 1), has to be a power of 2
DCACHE_BLOCK_SIZE => 64, -- d-cache: block size in bytes (min 4), has to be a power of 2
-- External memory interface --
MEM_EXT_EN => false, -- implement external memory bus interface?
MEM_EXT_TIMEOUT => 256, -- cycles after a pending bus access auto-terminates (0 = disabled)
MEM_EXT_PIPE_MODE => false, -- protocol: false=classic/standard wishbone mode, true=pipelined wishbone mode
MEM_EXT_BIG_ENDIAN => false, -- byte order: true=big-endian, false=little-endian
MEM_EXT_ASYNC_RX => false, -- use register buffer for RX data when false
MEM_EXT_ASYNC_TX => false, -- use register buffer for TX data when false
-- Execute in-place module (XIP) --
XIP_EN => false, -- implement execute in place module (XIP)?
XIP_CACHE_EN => false, -- implement XIP cache?
XIP_CACHE_NUM_BLOCKS => 4, -- number of blocks (min 1), has to be a power of 2
XIP_CACHE_BLOCK_SIZE => 256, -- block size in bytes (min 4), has to be a power of 2
-- External Interrupts Controller (XIRQ) --
XIRQ_NUM_CH => 32, -- number of external IRQ channels (0..32)
XIRQ_TRIGGER_TYPE => (others => '1'), -- trigger type: 0=level, 1=edge
XIRQ_TRIGGER_POLARITY => (others => '1'), -- trigger polarity: 0=low-level/falling-edge, 1=high-level/rising-edge
-- Processor peripherals --
IO_GPIO_NUM => 64, -- number of GPIO input/output pairs (0..64)
IO_MTIME_EN => true, -- implement machine system timer (MTIME)?
IO_UART0_EN => true, -- implement primary universal asynchronous receiver/transmitter (UART0)?
IO_UART0_RX_FIFO => 32, -- RX fifo depth, has to be a power of two, min 1
IO_UART0_TX_FIFO => 32, -- TX fifo depth, has to be a power of two, min 1
IO_UART1_EN => true, -- implement secondary universal asynchronous receiver/transmitter (UART1)?
IO_UART1_RX_FIFO => 1, -- RX fifo depth, has to be a power of two, min 1
IO_UART1_TX_FIFO => 1, -- TX fifo depth, has to be a power of two, min 1
IO_SPI_EN => false, -- implement serial peripheral interface (SPI)?
IO_SPI_FIFO => 4, -- SPI RTX fifo depth, has to be zero or a power of two
IO_SDI_EN => false, -- implement serial data interface (SDI)?
IO_SDI_FIFO => 4, -- SDI RTX fifo depth, has to be zero or a power of two
IO_TWI_EN => false, -- implement two-wire interface (TWI)?
IO_PWM_NUM_CH => 12, -- number of PWM channels to implement (0..12); 0 = disabled
IO_WDT_EN => false, -- implement watch dog timer (WDT)?
IO_TRNG_EN => false, -- implement true random number generator (TRNG)?
IO_TRNG_FIFO => 4, -- TRNG fifo depth, has to be a power of two, min 1
IO_CFS_EN => false, -- implement custom functions subsystem (CFS)?
IO_CFS_CONFIG => (others => '0'), -- custom CFS configuration generic
IO_CFS_IN_SIZE => 32, -- size of CFS input conduit in bits
IO_CFS_OUT_SIZE => 32, -- size of CFS output conduit in bits
IO_NEOLED_EN => false, -- implement NeoPixel-compatible smart LED interface (NEOLED)?
IO_NEOLED_TX_FIFO => 8, -- NEOLED TX FIFO depth, 1..32k, has to be a power of two
IO_GPTMR_EN => false, -- implement general purpose timer (GPTMR)?
IO_ONEWIRE_EN => false, -- implement 1-wire interface (ONEWIRE)?
IO_DMA_EN => false, -- implement direct memory access controller (DMA)?
IO_SLINK_EN => false, -- implement stream link interface (SLINK)?
IO_SLINK_RX_FIFO => 2, -- RX fifo depth, has to be a power of two, min 1
IO_SLINK_TX_FIFO => 2, -- TX fifo depth, has to be a power of two, min 1
IO_CRC_EN => false -- implement cyclic redundancy check unit (CRC)?
)
port map (
-- Global control --
clk_i => clk_gen, -- global clock, rising edge
rstn_i => rst_gen, -- global reset, low-active, async
-- JTAG on-chip debugger interface (available if ON_CHIP_DEBUGGER_EN = true) --
jtag_trst_i => '1', -- low-active TAP reset (optional)
jtag_tck_i => '0', -- serial clock
jtag_tdi_i => '0', -- serial data input
jtag_tdo_o => open, -- serial data output
jtag_tms_i => '0', -- mode select
-- Wishbone bus interface (available if MEM_EXT_EN = true) --
wb_tag_o => wb_cpu.tag, -- request tag
wb_adr_o => wb_cpu.addr, -- address
wb_dat_i => wb_cpu.rdata, -- read data
wb_dat_o => wb_cpu.wdata, -- write data
wb_we_o => wb_cpu.we, -- read/write
wb_sel_o => wb_cpu.sel, -- byte enable
wb_stb_o => wb_cpu.stb, -- strobe
wb_cyc_o => wb_cpu.cyc, -- valid cycle
wb_ack_i => wb_cpu.ack, -- transfer acknowledge
wb_err_i => wb_cpu.err, -- transfer error
-- Stream Link Interface (available if IO_SLINK_EN = true) --
slink_rx_dat_i => slink_dat, -- RX input data
slink_rx_val_i => slink_val, -- RX valid input
slink_rx_lst_i => slink_lst, -- last element of stream
slink_rx_rdy_o => slink_rdy, -- RX ready to receive
slink_tx_dat_o => slink_dat, -- TX output data
slink_tx_val_o => slink_val, -- TX valid output
slink_tx_lst_o => slink_lst, -- last element of stream
slink_tx_rdy_i => slink_rdy, -- TX ready to send
-- XIP (execute in place via SPI) signals (available if XIP_EN = true) --
xip_csn_o => open, -- chip-select, low-active
xip_clk_o => open, -- serial clock
xip_dat_i => '0', -- device data input
xip_dat_o => open, -- controller data output
-- GPIO (available if IO_GPIO_NUM > true) --
gpio_o => gpio, -- parallel output
gpio_i => gpio, -- parallel input
-- primary UART0 (available if IO_UART0_EN = true) --
uart0_txd_o => uart0_txd, -- UART0 send data
uart0_rxd_i => uart0_txd, -- UART0 receive data
uart0_rts_o => uart1_cts, -- HW flow control: UART0.RX ready to receive ("RTR"), low-active, optional
uart0_cts_i => uart0_cts, -- HW flow control: UART0.TX allowed to transmit, low-active, optional
-- secondary UART1 (available if IO_UART1_EN = true) --
uart1_txd_o => uart1_txd, -- UART1 send data
uart1_rxd_i => uart1_txd, -- UART1 receive data
uart1_rts_o => uart0_cts, -- HW flow control: UART0.RX ready to receive ("RTR"), low-active, optional
uart1_cts_i => uart1_cts, -- HW flow control: UART0.TX allowed to transmit, low-active, optional
-- SPI (available if IO_SPI_EN = true) --
spi_clk_o => spi_clk, -- SPI serial clock
spi_dat_o => spi_do, -- controller data out, peripheral data in
spi_dat_i => spi_di, -- controller data in, peripheral data out
spi_csn_o => spi_csn, -- SPI CS
-- SDI (available if IO_SDI_EN = true) --
sdi_clk_i => sdi_clk, -- SDI serial clock
sdi_dat_o => sdi_do, -- controller data out, peripheral data in
sdi_dat_i => sdi_di, -- controller data in, peripheral data out
sdi_csn_i => sdi_csn, -- chip-select
-- TWI (available if IO_TWI_EN = true) --
twi_sda_i => twi_sda_i, -- serial data line sense input
twi_sda_o => twi_sda_o, -- serial data line output (pull low only)
twi_scl_i => twi_scl_i, -- serial clock line sense input
twi_scl_o => twi_scl_o, -- serial clock line output (pull low only)
-- 1-Wire Interface (available if IO_ONEWIRE_EN = true) --
onewire_i => onewire_i, -- 1-wire bus sense input
onewire_o => onewire_o, -- 1-wire bus output (pull low only)
-- PWM (available if IO_PWM_NUM_CH > 0) --
pwm_o => open, -- pwm channels
-- Custom Functions Subsystem IO --
cfs_in_i => (others => '0'), -- custom CFS inputs
cfs_out_o => open, -- custom CFS outputs
-- NeoPixel-compatible smart LED interface (available if IO_NEOLED_EN = true) --
neoled_o => open, -- async serial data line
-- Machine timer system time (available if IO_MTIME_EN = true) --
mtime_time_o => open,
-- GPTMR timer capture (available if IO_GPTMR_EN = true) --
gptmr_trig_i => gpio(63), -- capture trigger
-- External platform interrupts (available if XIRQ_NUM_CH > 0) --
xirq_i => gpio(31 downto 0), -- IRQ channels
-- CPU Interrupts --
mtime_irq_i => '0', -- machine software interrupt, available if IO_MTIME_EN = false
msw_irq_i => msi_ring, -- machine software interrupt
mext_irq_i => mei_ring -- machine external interrupt
);
-- TWI tri-state driver --
twi_sda <= '0' when (twi_sda_o = '0') else 'Z'; -- module can only pull the line low actively
twi_scl <= '0' when (twi_scl_o = '0') else 'Z';
twi_sda_i <= std_ulogic(twi_sda);
twi_scl_i <= std_ulogic(twi_scl);
-- 1-Wire tri-state driver --
onewire <= '0' when (onewire_o = '0') else 'Z'; -- module can only pull the line low actively
onewire_i <= std_ulogic(onewire);
-- TWI termination (pull-ups) --
twi_scl <= 'H';
twi_sda <= 'H';
-- 1-Wire termination (pull-up) --
onewire <= 'H';
-- SPI/SDI echo --
sdi_clk <= spi_clk;
sdi_csn <= spi_csn(7);
sdi_di <= spi_do;
spi_di <= sdi_do when (spi_csn(7) = '0') else spi_do;
-- UART Simulation Receiver ---------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
uart0_checker: entity work.uart_rx_simple
generic map (
name => "uart0",
uart_baud_val_c => uart0_baud_val_c
)
port map (
clk => clk_gen,
uart_txd => uart0_txd
);
uart1_checker: entity work.uart_rx_simple
generic map (
name => "uart1",
uart_baud_val_c => uart1_baud_val_c
)
port map (
clk => clk_gen,
uart_txd => uart1_txd
);
-- Wishbone Fabric ------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- CPU broadcast signals --
wb_mem_a.addr <= wb_cpu.addr;
wb_mem_a.wdata <= wb_cpu.wdata;
wb_mem_a.we <= wb_cpu.we;
wb_mem_a.sel <= wb_cpu.sel;
wb_mem_a.tag <= wb_cpu.tag;
wb_mem_a.cyc <= wb_cpu.cyc;
wb_mem_b.addr <= wb_cpu.addr;
wb_mem_b.wdata <= wb_cpu.wdata;
wb_mem_b.we <= wb_cpu.we;
wb_mem_b.sel <= wb_cpu.sel;
wb_mem_b.tag <= wb_cpu.tag;
wb_mem_b.cyc <= wb_cpu.cyc;
wb_mem_c.addr <= wb_cpu.addr;
wb_mem_c.wdata <= wb_cpu.wdata;
wb_mem_c.we <= wb_cpu.we;
wb_mem_c.sel <= wb_cpu.sel;
wb_mem_c.tag <= wb_cpu.tag;
wb_mem_c.cyc <= wb_cpu.cyc;
wb_irq.addr <= wb_cpu.addr;
wb_irq.wdata <= wb_cpu.wdata;
wb_irq.we <= wb_cpu.we;
wb_irq.sel <= wb_cpu.sel;
wb_irq.tag <= wb_cpu.tag;
wb_irq.cyc <= wb_cpu.cyc;
-- CPU read-back signals (no mux here since peripherals have "output gates") --
wb_cpu.rdata <= wb_mem_a.rdata or wb_mem_b.rdata or wb_mem_c.rdata or wb_irq.rdata;
wb_cpu.ack <= wb_mem_a.ack or wb_mem_b.ack or wb_mem_c.ack or wb_irq.ack;
wb_cpu.err <= wb_mem_a.err or wb_mem_b.err or wb_mem_c.err or wb_irq.err;
-- peripheral select via STROBE signal --
wb_mem_a.stb <= wb_cpu.stb when (wb_cpu.addr >= ext_mem_a_base_addr_c) and (wb_cpu.addr < std_ulogic_vector(unsigned(ext_mem_a_base_addr_c) + ext_mem_a_size_c)) else '0';
wb_mem_b.stb <= wb_cpu.stb when (wb_cpu.addr >= ext_mem_b_base_addr_c) and (wb_cpu.addr < std_ulogic_vector(unsigned(ext_mem_b_base_addr_c) + ext_mem_b_size_c)) else '0';
wb_mem_c.stb <= wb_cpu.stb when (wb_cpu.addr >= ext_mem_c_base_addr_c) and (wb_cpu.addr < std_ulogic_vector(unsigned(ext_mem_c_base_addr_c) + ext_mem_c_size_c)) else '0';
wb_irq.stb <= wb_cpu.stb when (wb_cpu.addr = irq_trigger_base_addr_c) else '0';
-- Wishbone Memory A (simulated external IMEM) --------------------------------------------
-- -------------------------------------------------------------------------------------------
generate_ext_imem:
if (int_imem_c = false) generate
ext_mem_a_access: process(clk_gen)
variable ext_ram_a : mem32_t(0 to ext_mem_a_size_c/4-1) := mem32_init_f(application_init_image, ext_mem_a_size_c/4); -- initialized, used to simulate external IMEM
begin
if rising_edge(clk_gen) then
-- control --
ext_mem_a.ack(0) <= wb_mem_a.cyc and wb_mem_a.stb; -- wishbone acknowledge
-- write access --
if ((wb_mem_a.cyc and wb_mem_a.stb and wb_mem_a.we) = '1') then -- valid write access
for i in 0 to 3 loop
if (wb_mem_a.sel(i) = '1') then
ext_ram_a(to_integer(unsigned(wb_mem_a.addr(index_size_f(ext_mem_a_size_c/4)+1 downto 2))))(7+i*8 downto 0+i*8) := wb_mem_a.wdata(7+i*8 downto 0+i*8);
end if;
end loop; -- i
end if;
-- read access --
ext_mem_a.rdata(0) <= ext_ram_a(to_integer(unsigned(wb_mem_a.addr(index_size_f(ext_mem_a_size_c/4)+1 downto 2)))); -- word aligned
-- virtual read and ack latency --
if (ext_mem_a_latency_c > 1) then
for i in 1 to ext_mem_a_latency_c-1 loop
ext_mem_a.rdata(i) <= ext_mem_a.rdata(i-1);
ext_mem_a.ack(i) <= ext_mem_a.ack(i-1) and wb_mem_a.cyc;
end loop;
end if;
-- bus output register --
wb_mem_a.err <= '0';
if (ext_mem_a.ack(ext_mem_a_latency_c-1) = '1') and (wb_mem_a.cyc = '1') then
wb_mem_a.rdata <= ext_mem_a.rdata(ext_mem_a_latency_c-1);
wb_mem_a.ack <= '1';
else
wb_mem_a.rdata <= (others => '0');
wb_mem_a.ack <= '0';
end if;
end if;
end process ext_mem_a_access;
end generate;
generate_ext_imem_false:
if (int_imem_c = true) generate
wb_mem_a.rdata <= (others => '0');
wb_mem_a.ack <= '0';
wb_mem_a.err <= '0';
end generate;
-- Wishbone Memory B (simulated external DMEM) --------------------------------------------
-- -------------------------------------------------------------------------------------------
generate_ext_dmem:
if (int_dmem_c = false) generate
ext_mem_b_access: process(clk_gen)
variable ext_ram_b : mem32_t(0 to ext_mem_b_size_c/4-1) := (others => (others => '0')); -- zero, used to simulate external DMEM
begin
if rising_edge(clk_gen) then
-- control --
ext_mem_b.ack(0) <= wb_mem_b.cyc and wb_mem_b.stb; -- wishbone acknowledge
-- write access --
if ((wb_mem_b.cyc and wb_mem_b.stb and wb_mem_b.we) = '1') then -- valid write access
for i in 0 to 3 loop
if (wb_mem_b.sel(i) = '1') then
ext_ram_b(to_integer(unsigned(wb_mem_b.addr(index_size_f(ext_mem_b_size_c/4)+1 downto 2))))(7+i*8 downto 0+i*8) := wb_mem_b.wdata(7+i*8 downto 0+i*8);
end if;
end loop; -- i
end if;
-- read access --
ext_mem_b.rdata(0) <= ext_ram_b(to_integer(unsigned(wb_mem_b.addr(index_size_f(ext_mem_b_size_c/4)+1 downto 2)))); -- word aligned
-- virtual read and ack latency --
if (ext_mem_b_latency_c > 1) then
for i in 1 to ext_mem_b_latency_c-1 loop
ext_mem_b.rdata(i) <= ext_mem_b.rdata(i-1);
ext_mem_b.ack(i) <= ext_mem_b.ack(i-1) and wb_mem_b.cyc;
end loop;
end if;
-- bus output register --
wb_mem_b.err <= '0';
if (ext_mem_b.ack(ext_mem_b_latency_c-1) = '1') and (wb_mem_b.cyc = '1') then
wb_mem_b.rdata <= ext_mem_b.rdata(ext_mem_b_latency_c-1);
wb_mem_b.ack <= '1';
else
wb_mem_b.rdata <= (others => '0');
wb_mem_b.ack <= '0';
end if;
end if;
end process ext_mem_b_access;
end generate;
generate_ext_dmem_false:
if (int_dmem_c = true) generate
wb_mem_b.rdata <= (others => '0');
wb_mem_b.ack <= '0';
wb_mem_b.err <= '0';
end generate;
-- Wishbone Memory C (simulated external IO) ----------------------------------------------
-- -------------------------------------------------------------------------------------------
ext_mem_c_access: process(clk_gen)
begin
if rising_edge(clk_gen) then
-- control --
ext_mem_c.ack(0) <= wb_mem_c.cyc and wb_mem_c.stb; -- wishbone acknowledge
-- write access --
if ((wb_mem_c.cyc and wb_mem_c.stb and wb_mem_c.we) = '1') then -- valid write access
for i in 0 to 3 loop
if (wb_mem_c.sel(i) = '1') then
ext_ram_c(to_integer(unsigned(wb_mem_c.addr(index_size_f(ext_mem_c_size_c/4)+1 downto 2))))(7+i*8 downto 0+i*8) <= wb_mem_c.wdata(7+i*8 downto 0+i*8);
end if;
end loop; -- i
end if;
-- read access --
ext_mem_c.rdata(0) <= ext_ram_c(to_integer(unsigned(wb_mem_c.addr(index_size_f(ext_mem_c_size_c/4)+1 downto 2)))); -- word aligned
-- virtual read and ack latency --
if (ext_mem_c_latency_c > 1) then
for i in 1 to ext_mem_c_latency_c-1 loop
ext_mem_c.rdata(i) <= ext_mem_c.rdata(i-1);
ext_mem_c.ack(i) <= ext_mem_c.ack(i-1) and wb_mem_c.cyc;
end loop;
end if;
-- bus output register --
if (ext_mem_c.ack(ext_mem_c_latency_c-1) = '1') and (wb_mem_c.cyc = '1') then
wb_mem_c.rdata <= ext_mem_c.rdata(ext_mem_c_latency_c-1);
wb_mem_c.ack <= '1';
wb_mem_c.err <= '0';
else
wb_mem_c.rdata <= (others => '0');
wb_mem_c.ack <= '0';
wb_mem_c.err <= '0';
end if;
end if;
end process ext_mem_c_access;
-- Wishbone IRQ Triggers ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
irq_trigger: process(rst_gen, clk_gen)
begin
if (rst_gen = '0') then
msi_ring <= '0';
mei_ring <= '0';
elsif rising_edge(clk_gen) then
-- bus interface --
wb_irq.rdata <= (others => '0');
wb_irq.ack <= wb_irq.cyc and wb_irq.stb and wb_irq.we and and_reduce_f(wb_irq.sel);
wb_irq.err <= '0';
-- trigger RISC-V platform IRQs --
if ((wb_irq.cyc and wb_irq.stb and wb_irq.we and and_reduce_f(wb_irq.sel)) = '1') then
msi_ring <= wb_irq.wdata(03); -- machine software interrupt
mei_ring <= wb_irq.wdata(11); -- machine software interrupt
end if;
end if;
end process irq_trigger;
-- Exit simulation ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
wait_for_sim_exit: process(gpio)
begin
if (gpio(32) = '1') then
finish;
end if;
end process wait_for_sim_exit;
end neorv32_tb_benchmark_rtl;

View file

@ -1,77 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use std.textio.all;
entity uart_rx_simple is
generic (
name : string;
uart_baud_val_c : real);
port (
clk : in std_ulogic;
uart_txd : in std_ulogic
);
end entity;
architecture a of uart_rx_simple is
signal uart_rx_sync : std_ulogic_vector(04 downto 0) := (others => '1');
signal uart_rx_busy : std_ulogic := '0';
signal uart_rx_sreg : std_ulogic_vector(08 downto 0) := (others => '0');
signal uart_rx_baud_cnt : real;
signal uart_rx_bitcnt : natural;
file file_uart_tx_out : text open write_mode is "neorv32.testbench_" & name & ".out";
begin
uart_rx_console : process(clk)
variable i : integer;
variable l : line;
begin
-- "UART" --
if rising_edge(clk) then
-- synchronizer --
uart_rx_sync <= uart_rx_sync(3 downto 0) & uart_txd;
-- arbiter --
if (uart_rx_busy = '0') then -- idle
uart_rx_busy <= '0';
uart_rx_baud_cnt <= round(0.5 * uart_baud_val_c);
uart_rx_bitcnt <= 9;
if (uart_rx_sync(4 downto 1) = "1100") then -- start bit? (falling edge)
uart_rx_busy <= '1';
end if;
else
if (uart_rx_baud_cnt <= 0.0) then
if (uart_rx_bitcnt = 1) then
uart_rx_baud_cnt <= round(0.5 * uart_baud_val_c);
else
uart_rx_baud_cnt <= round(uart_baud_val_c);
end if;
if (uart_rx_bitcnt = 0) then
uart_rx_busy <= '0'; -- done
i := to_integer(unsigned(uart_rx_sreg(8 downto 1)));
if (i < 32) or (i > 32+95) then -- printable char?
report name & ".tx: (" & integer'image(i) & ")"; -- print code
else
report name & ".tx: " & character'val(i); -- print ASCII
end if;
if (i = 10) then -- Linux line break
writeline(file_uart_tx_out, l);
elsif (i /= 13) then -- Remove additional carriage return
write(l, character'val(i));
end if;
else
uart_rx_sreg <= uart_rx_sync(4) & uart_rx_sreg(8 downto 1);
uart_rx_bitcnt <= uart_rx_bitcnt - 1;
end if;
else
uart_rx_baud_cnt <= uart_rx_baud_cnt - 1.0;
end if;
end if;
end if;
end process uart_rx_console;
end architecture;

View file

@ -26,7 +26,8 @@ if [ -z "$1" ]
then
GHDL_RUN_ARGS="${@:---stop-time=10ms}"
else
GHDL_RUN_ARGS=$1
# Lets pass down all the parameters to GHDL instead of just 1
GHDL_RUN_ARGS=$@
fi
echo "Using simulation run arguments: $GHDL_RUN_ARGS";

View file

@ -6,4 +6,5 @@ set -e
cd $(dirname "$0")
./ghdl.setup.sh
./ghdl.run.sh $1
# We want to be able to pass down more than 1 parameter to GHDL
./ghdl.run.sh $@

View file

@ -48,6 +48,13 @@ use neorv32.neorv32_application_image.all; -- this file is generated by the imag
use std.textio.all;
entity neorv32_tb_simple is
generic (
FAST_MUL_ENABLE : boolean := true; -- control D$ enablement, default is true
FAST_SHIFT_ENBABLE : boolean := true; -- set the size of IMEM, default is 32kB
DCACHE_ENABLE : boolean := true; -- control D$ enablement, default is true
IMEM_SIZE : natural := 32*1024; -- set the size of IMEM, default is 32kB
ICACHE_ENABLE : boolean := true -- control I$ enablement, default is true
);
end neorv32_tb_simple;
architecture neorv32_tb_simple_rtl of neorv32_tb_simple is
@ -57,13 +64,18 @@ architecture neorv32_tb_simple_rtl of neorv32_tb_simple is
-- general --
constant int_imem_c : boolean := true; -- true: use proc-internal IMEM, false: use external simulated IMEM (ext. mem A)
constant int_dmem_c : boolean := true; -- true: use proc-internal DMEM, false: use external simulated DMEM (ext. mem B)
constant imem_size_c : natural := 32*1024; -- size in bytes of processor-internal IMEM / external mem A
constant imem_size_c : natural := IMEM_SIZE; -- size in bytes of processor-internal IMEM / external mem A
constant dmem_size_c : natural := 8*1024; -- size in bytes of processor-internal DMEM / external mem B
constant f_clock_c : natural := 100000000; -- main clock in Hz
constant baud0_rate_c : natural := 19200; -- simulation UART0 (primary UART) baud rate
constant baud1_rate_c : natural := 19200; -- simulation UART1 (secondary UART) baud rate
constant icache_en_c : boolean := true; -- implement i-cache
constant dcache_en_c : boolean := DCACHE_ENABLE; -- implement d-cache
constant dcache_block_size_c : natural := 64; -- d-cache block size in bytes
constant icache_en_c : boolean := ICACHE_ENABLE; -- implement i-cache
constant icache_block_size_c : natural := 64; -- i-cache block size in bytes
-- performance --
constant fast_mul_en_c : boolean := FAST_MUL_ENABLE; -- true: enable single cycle integer multiplication
constant fast_shift_en_c : boolean := FAST_SHIFT_ENBABLE; -- true: enable single cycle integer shift
-- simulated external Wishbone memory A (can be used as external IMEM) --
constant ext_mem_a_base_addr_c : std_ulogic_vector(31 downto 0) := x"00000000"; -- wishbone memory base address (external IMEM base)
constant ext_mem_a_size_c : natural := imem_size_c; -- wishbone memory size in bytes
@ -182,8 +194,8 @@ begin
CPU_EXTENSION_RISCV_Zmmul => false, -- implement multiply-only M sub-extension?
CPU_EXTENSION_RISCV_Zxcfu => true, -- implement custom (instr.) functions unit?
-- Extension Options --
FAST_MUL_EN => true, -- use DSPs for M extension's multiplier
FAST_SHIFT_EN => true, -- use barrel shifter for shift operations
FAST_MUL_EN => fast_mul_en_c, -- use DSPs for M extension's multiplier
FAST_SHIFT_EN => fast_shift_en_c, -- use barrel shifter for shift operations
REGFILE_HW_RST => false, -- no hardware reset
-- Physical Memory Protection (PMP) --
PMP_NUM_REGIONS => 5, -- number of regions (0..16)
@ -207,9 +219,9 @@ begin
ICACHE_BLOCK_SIZE => icache_block_size_c, -- i-cache: block size in bytes (min 4), has to be a power of 2
ICACHE_ASSOCIATIVITY => 2, -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2
-- Internal Data Cache (dCACHE) --
DCACHE_EN => true, -- implement data cache
DCACHE_EN => dcache_en_c, -- implement data cache
DCACHE_NUM_BLOCKS => 8, -- d-cache: number of blocks (min 1), has to be a power of 2
DCACHE_BLOCK_SIZE => 64, -- d-cache: block size in bytes (min 4), has to be a power of 2
DCACHE_BLOCK_SIZE => dcache_block_size_c, -- d-cache: block size in bytes (min 4), has to be a power of 2
-- External bus interface --
XBUS_EN => true, -- implement external memory bus interface?
XBUS_TIMEOUT => 256, -- cycles after a pending bus access auto-terminates (0 = disabled)

View file

@ -81,9 +81,7 @@ NEORV32_EXG_PATH = $(NEORV32_HOME)/sw/image_gen
# Path to NEORV32 core rtl folder
NEORV32_RTL_PATH = $(NEORV32_LOCAL_RTL)/core
# Path to NEORV32 sim folder
NEORV32_SIM_PATH = $(NEORV32_HOME)/sim
# Path to NEORV32 test folder
NEORV32_SIM_FOLDER ?= $(NEORV32_SIM_PATH)/simple
NEORV32_SIM_PATH = $(NEORV32_HOME)/sim/simple
# Marker file to check for NEORV32 home folder
NEORV32_HOME_MARKER = $(NEORV32_INC_PATH)/neorv32.h
@ -300,7 +298,7 @@ endif
# -----------------------------------------------------------------------------
sim: $(APP_IMG) install
@echo "Simulating processor using simple testbench..."
@sh $(NEORV32_SIM_FOLDER)/ghdl.sh $(GHDL_RUN_FLAGS)
@sh $(NEORV32_SIM_PATH)/ghdl.sh $(GHDL_RUN_FLAGS)
# -----------------------------------------------------------------------------

View file

@ -1742,7 +1742,7 @@ int main() {
neorv32_uart0_printf("\nbge - branch backward - tot. %d cyc\n", stopTime - startTime);
neorv32_uart0_printf("\ntotal %d cyc\n", totalTime);
#endif
neorv32_uart0_printf("\nbge rs1,rs2,imm inst %d cyc\n", (stopTime - startTime)/(instLoop * instCalls));
neorv32_uart0_printf("\nbge rs1,rs2,imm branch backward inst %d cyc\n", (stopTime - startTime)/(instLoop * instCalls));
#endif
#if rv32I_branch_bltu == 1

View file

@ -1,6 +1,5 @@
# Modify this variable to fit your NEORV32 setup (neorv32 home folder)
NEORV32_HOME ?= ../../../..
NEORV32_SIM_FOLDER = $(NEORV32_SIM_PATH)/benchmark
GHDL_RUN_FLAGS ?= -gICACHE_ENABLE=false -gDCACHE_ENABLE=false -gIMEM_SIZE=132072 --stop-time=4500us
include $(NEORV32_HOME)/sw/common/common.mk

View file

@ -1,6 +1,6 @@
# Modify this variable to fit your NEORV32 setup (neorv32 home folder)
NEORV32_HOME ?= ../../../..
MARCH ?= rv32im_zicsr_zifencei
NEORV32_SIM_FOLDER = $(NEORV32_SIM_PATH)/benchmark
GHDL_RUN_FLAGS ?= -gICACHE_ENABLE=false -gDCACHE_ENABLE=false -gIMEM_SIZE=132072 --stop-time=1500us
include $(NEORV32_HOME)/sw/common/common.mk

View file

@ -1,6 +1,6 @@
# Modify this variable to fit your NEORV32 setup (neorv32 home folder)
NEORV32_HOME ?= ../../../..
MARCH ?= rv32i_zicsr_zifencei_zfinx
NEORV32_SIM_FOLDER = $(NEORV32_SIM_PATH)/benchmark
GHDL_RUN_FLAGS ?= -gICACHE_ENABLE=false -gDCACHE_ENABLE=false -gIMEM_SIZE=132072 --stop-time=4500us
include $(NEORV32_HOME)/sw/common/common.mk

View file

@ -1,14 +1,14 @@
#!/bin/bash
cd I
make USER_FLAGS+=-DRUN_CHECK USER_FLAGS+=-DUART0_SIM_MODE USER_FLAGS+=-DSILENT_MODE USER_FLAGS+=-Drv32I_all clean_all exe
make USER_FLAGS+=-DRUN_CHECK USER_FLAGS+=-DUART0_SIM_MODE USER_FLAGS+=-DSILENT_MODE USER_FLAGS+=-Drv32_all clean_all exe
make sim
cd ..
cd M
make USER_FLAGS+=-DRUN_CHECK USER_FLAGS+=-DUART0_SIM_MODE USER_FLAGS+=-DSILENT_MODE USER_FLAGS+=-Drv32M_all clean_all exe
make USER_FLAGS+=-DRUN_CHECK USER_FLAGS+=-DUART0_SIM_MODE USER_FLAGS+=-DSILENT_MODE USER_FLAGS+=-Drv32_all clean_all exe
make sim
cd ..
cd Zfinx
make USER_FLAGS+=-DRUN_CHECK USER_FLAGS+=-DUART0_SIM_MODE USER_FLAGS+=-DSILENT_MODE USER_FLAGS+=-Drv32Zfinx_all clean_all exe
make USER_FLAGS+=-DRUN_CHECK USER_FLAGS+=-DUART0_SIM_MODE USER_FLAGS+=-DSILENT_MODE USER_FLAGS+=-Drv32_all clean_all exe
make sim
cd ..