diff --git a/CHANGELOG.md b/CHANGELOG.md index 5305234c..435d1beb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ defined by the `hw_version_c` constant in the main VHDL package file [`rtl/core/ | Date (*dd.mm.yyyy*) | Version | Comment | |:----------:|:-------:|:--------| +| 08.06.2021 | 1.5.6.7 | clean-up of Wishbone interface module (dead code removal); added new package constant `wb_rx_buffer_c` to configure SYNC (default) or ASYNC Wishbone RX path (allows trade-off between performance/latency and timing closure) | | 06.06.2021 | 1.5.6.6 | :bug: fixed bug in PWM base address configuration; :warning: removed user-access HPM counter access via `hpmcounter3[h]`:`hpmcounter3[h]` CSRs, hardwaired according `mcounteren` bits to zero: HPM can only be used in machine mode; reworded 64-bit counters (`cycle`, `instret`, `hpmcounter` + `mtime`) overflow logic: now using dedicated CARRY chain instead of overflow detector (can improve timing); | | 05.06.2021 | 1.5.6.5 | removed debug mode's `stepie` flag (used to allow interrupts during single-stepping) as the debugger can emulate interrupts | | 04.06.2021 | 1.5.6.4 | :warning: removed `IO_PWM_EN` generic, replaced by `IO_PWM_NUM_CH` generic - PWM ontroller now supports implementation of up to 60 channels via `IO_PWM_NUM_CH` (`IO_PWM_NUM_CH` = 0 will omit the PWM controller); :bug: fixed minor bug in `minstreth` counter logic | diff --git a/docs/datasheet/soc_sysinfo.adoc b/docs/datasheet/soc_sysinfo.adoc index 797c0465..06759629 100644 --- a/docs/datasheet/soc_sysinfo.adoc +++ b/docs/datasheet/soc_sysinfo.adoc @@ -47,7 +47,7 @@ and default clock speed) for correct operation. | `2` | _SYSINFO_FEATURES_MEM_INT_IMEM_ | set if the processor-internal DMEM implemented (via top's _MEM_INT_DMEM_EN_ generic) | `3` | _SYSINFO_FEATURES_MEM_INT_IMEM_ROM_ | set if the processor-internal IMEM is read-only (via top's _MEM_INT_IMEM_ROM_ generic) | `4` | _SYSINFO_FEATURES_MEM_INT_DMEM_ | set if the processor-internal IMEM is implemented (via top's _MEM_INT_IMEM_EN_ generic) -| `5` | _SYSINFO_FEATURES_MEM_EXT_ENDIAN_ | set if external bus interface uses BIG-endian byte-order (via package's `xbus_big_endian_c` constant) +| `5` | _SYSINFO_FEATURES_MEM_EXT_ENDIAN_ | set if external bus interface uses BIG-endian byte-order (via package's `wb_big_endian_c` constant) | `6` | _SYSINFO_FEATURES_ICACHE_ | set if processor-internal instruction cache is implemented (via _ICACHE_EN_ generic) | `14` | _SYSINFO_FEATURES_HW_RESET_ | set if on-chip debugger implemented (via _ON_CHIP_DEBUGGER_EN_ generic) | `15` | _SYSINFO_FEATURES_HW_RST_ | set if a dedicated hardware reset of all core registers is implemented (via package's _dedicated_reset_c_ constant) diff --git a/docs/datasheet/soc_wishbone.adoc b/docs/datasheet/soc_wishbone.adoc index c62f39f4..6f21659e 100644 --- a/docs/datasheet/soc_wishbone.adoc +++ b/docs/datasheet/soc_wishbone.adoc @@ -23,7 +23,8 @@ | Configuration generics: | _MEM_EXT_EN_ | enable external memory interface when _true_ | | _MEM_EXT_TIMEOUT_ | number of clock cycles after which an unacknowledged external bus access will auto-terminate (0 = disabled) | Configuration constants in VHDL package file `neorv32_package.vhd`: | `wb_pipe_mode_c` | when _false_ (default): classic/standard Wishbone protocol; when _true_: pipelined Wishbone protocol -| | `xbus_big_endian_c` | byte-order (Endianness) of external memory interface; true=BIG, false=little (default) +| | `wb_big_endian_c` | byte-order (Endianness) of external memory interface; true=BIG, false=little (default) +| | `wb_rx_buffer_c` | enable register buffer for RX path (default) | CPU interrupts: | none | |======================= @@ -49,7 +50,7 @@ in the in the main VHDL package file (`rtl/neorv32_package.vhd`): [source,vhdl] ---- --- (external) bus interface -- +-- external bus interface -- constant wb_pipe_mode_c : boolean := false; ---- @@ -76,8 +77,19 @@ project. **Interface Latency** -The Wishbone gateway introduces two additional latency cycles: Processor-outgoing and -incoming signals -are fully registered. Thus, any access from the CPU to a processor-external devices requires +2 clock cycles. +By default, the Wishbone gateway introduces two additional latency cycles: processor-outgoing ("TX") and +processor-incoming ("RX") signals are fully registered. Thus, any access from the CPU to a processor-external devices +via Wishbone requires 2 additional clock cycles (at least; depending on device's latency). + +If the attached Wishbone network / peripheral already provides output registers or if the Wishbone network is not relevant +for timing closure, the default buffering of incoming ("RX") data within the gateway can be disabled. +The configuration is done via the `wb_rx_buffer_c` constant in the in the main VHDL package file (`rtl/neorv32_package.vhd`): + +[source,vhdl] +---- +-- external bus interface -- +constant wb_rx_buffer_c : boolean := false; -- false to implement "async" RX (non-default) +---- **Bus Access Timeout** @@ -125,14 +137,14 @@ See section <<_bus_interface>> for the CPU bus interface protocol. The NEORV32 CPU and the Processor setup are *little-endian* architectures. To allow direct connection to a big-endian memory system the external bus interface provides an _Endianness configuration_. The -Endianness (of the external memory interface) can be configured via the global `xbus_big_endian_c` +Endianness (of the external memory interface) can be configured via the global `wb_big_endian_c` constant in the main VHDL package file (`rtl/neorv32_package.vhd`). By default, the external memory interface uses little-endian byte-order. [source,vhdl] ---- --- (external) bus interface -- -constant xbus_big_endian_c : boolean := true; +-- external bus interface -- +constant wb_big_endian_c : boolean := true; ---- Application software can check the Endianness configuration of the external bus interface via the diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index f5940c0d..94345c84 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -45,8 +45,9 @@ package neorv32_package is constant dspace_base_c : std_ulogic_vector(31 downto 0) := x"80000000"; -- default data memory address space base address -- external bus interface -- - constant wb_pipe_mode_c : boolean := false; -- external bus protocol: false=classic/standard wishbone mode (default), true=pipelined wishbone mode - constant xbus_big_endian_c : boolean := false; -- external memory access byte order: true=big-endian, false=little-endian (default) + constant wb_pipe_mode_c : boolean := false; -- protocol: false=classic/standard wishbone mode (default), true=pipelined wishbone mode + constant wb_big_endian_c : boolean := false; -- byte order: true=big-endian, false=little-endian (default) + constant wb_rx_buffer_c : boolean := true; -- use register buffer for RX data when true (default) -- CPU core -- constant ipb_entries_c : natural := 4; -- entries in CPU instruction prefetch buffer, has to be a power of 2, default=2 @@ -87,7 +88,7 @@ package neorv32_package is -- Architecture Constants (do not modify!) ------------------------------------------------ -- ------------------------------------------------------------------------------------------- constant data_width_c : natural := 32; -- native data path width - do not change! - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01050606"; -- no touchy! + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01050607"; -- no touchy! constant archid_c : natural := 19; -- official NEORV32 architecture ID - hands off! constant rf_r0_is_reg_c : boolean := true; -- x0 is a *physical register* that has to be initialized to zero by the CPU constant def_rst_val_c : std_ulogic := cond_sel_stdulogic_f(dedicated_reset_c, '0', '-'); @@ -1648,7 +1649,6 @@ package neorv32_package is -- ------------------------------------------------------------------------------------------- component neorv32_wishbone generic ( - WB_PIPELINED_MODE : boolean := false; -- false: classic/standard wishbone mode, true: pipelined wishbone mode -- Internal instruction memory -- MEM_INT_IMEM_EN : boolean := true; -- implement processor-internal instruction memory MEM_INT_IMEM_SIZE : natural := 8*1024; -- size of processor-internal instruction memory in bytes diff --git a/rtl/core/neorv32_sysinfo.vhd b/rtl/core/neorv32_sysinfo.vhd index 7342dccb..f56f2438 100644 --- a/rtl/core/neorv32_sysinfo.vhd +++ b/rtl/core/neorv32_sysinfo.vhd @@ -130,7 +130,7 @@ begin sysinfo_mem(2)(02) <= bool_to_ulogic_f(MEM_INT_IMEM_EN); -- processor-internal instruction memory implemented? sysinfo_mem(2)(03) <= bool_to_ulogic_f(MEM_INT_IMEM_ROM); -- processor-internal instruction memory implemented as ROM? sysinfo_mem(2)(04) <= bool_to_ulogic_f(MEM_INT_DMEM_EN); -- processor-internal data memory implemented? - sysinfo_mem(2)(05) <= bool_to_ulogic_f(xbus_big_endian_c); -- is external memory bus interface using BIG-endian byte-order? + sysinfo_mem(2)(05) <= bool_to_ulogic_f(wb_big_endian_c); -- is external memory bus interface using BIG-endian byte-order? sysinfo_mem(2)(06) <= bool_to_ulogic_f(ICACHE_EN); -- processor-internal instruction cache implemented? -- sysinfo_mem(2)(13 downto 07) <= (others => '0'); -- reserved diff --git a/rtl/core/neorv32_top.vhd b/rtl/core/neorv32_top.vhd index 9eb1f112..68c80954 100644 --- a/rtl/core/neorv32_top.vhd +++ b/rtl/core/neorv32_top.vhd @@ -739,7 +739,6 @@ begin if (MEM_EXT_EN = true) generate neorv32_wishbone_inst: neorv32_wishbone generic map ( - WB_PIPELINED_MODE => wb_pipe_mode_c, -- false: classic/standard wishbone mode, true: pipelined wishbone mode -- Internal instruction memory -- MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes diff --git a/rtl/core/neorv32_wishbone.vhd b/rtl/core/neorv32_wishbone.vhd index f379cd94..34ab9352 100644 --- a/rtl/core/neorv32_wishbone.vhd +++ b/rtl/core/neorv32_wishbone.vhd @@ -1,20 +1,19 @@ -- ################################################################################################# -- # << NEORV32 - External Bus Interface (WISHBONE) >> # -- # ********************************************************************************************* # --- # The interface provides registers for all outgoing and for all incoming signals. If the host # --- # cancels an active transfer, the Wishbone arbiter still waits some time for the bus system to # --- # ACK/ERR the transfer before the arbiter forces termination. # +-- # All bus accesses from the CPU, which do not target the internal IO region / the internal # +-- # bootloader / the internal instruction or data memories (if implemented), are delegated via # +-- # this Wishbone gateway to the external bus interface. Accessed peripherals can have a response # +-- # latency of up to BUS_TIMEOUT - 1 cycles. # -- # # -- # Even when all processor-internal memories and IO devices are disabled, the EXTERNAL address # -- # space ENDS at address 0xffff0000 (begin of internal BOOTROM address space). # -- # # --- # All bus accesses from the CPU, which do not target the internal IO region / the internal # --- # bootloader / the internal instruction or data memories (if implemented), are delegated via # --- # this Wishbone gateway to the external bus interface. Accessed peripherals can have a response # --- # latency of up to BUS_TIMEOUT - 2 cycles. # +-- # The interface uses registers for ALL OUTGOING AND FOR ALL INCOMING signals. Hence, an access # +-- # latency of (at least) 2 cycles is added. # -- # # --- # This interface supports classic/standard Wishbone transactions (WB_PIPELINED_MODE = false) # --- # and also pipelined transactions (WB_PIPELINED_MODE = true). # +-- # This interface supports classic/standard Wishbone transactions (pkg.wb_pipe_mode_c = false) # +-- # and also pipelined transactions (pkg.wb_pipe_mode_c = true). # -- # ********************************************************************************************* # -- # BSD 3-Clause License # -- # # @@ -56,7 +55,6 @@ use neorv32.neorv32_package.all; entity neorv32_wishbone is generic ( - WB_PIPELINED_MODE : boolean := false; -- false: classic/standard wishbone mode, true: pipelined wishbone mode -- Internal instruction memory -- MEM_INT_IMEM_EN : boolean := true; -- implement processor-internal instruction memory MEM_INT_IMEM_SIZE : natural := 8*1024; -- size of processor-internal instruction memory in bytes @@ -109,42 +107,48 @@ architecture neorv32_wishbone_rtl of neorv32_wishbone is signal xbus_access : std_ulogic; -- bus arbiter - type ctrl_state_t is (IDLE, BUSY, RESYNC); + type ctrl_state_t is (IDLE, BUSY); type ctrl_t is record state : ctrl_state_t; we : std_ulogic; - rd_req : std_ulogic; - wr_req : std_ulogic; adr : std_ulogic_vector(31 downto 0); wdat : std_ulogic_vector(31 downto 0); rdat : std_ulogic_vector(31 downto 0); - sel : std_ulogic_vector(3 downto 0); + sel : std_ulogic_vector(03 downto 0); ack : std_ulogic; err : std_ulogic; timeout : std_ulogic_vector(index_size_f(BUS_TIMEOUT)-1 downto 0); src : std_ulogic; lock : std_ulogic; - priv : std_ulogic_vector(1 downto 0); + priv : std_ulogic_vector(01 downto 0); end record; signal ctrl : ctrl_t; signal stb_int : std_ulogic; signal cyc_int : std_ulogic; + signal rdata : std_ulogic_vector(31 downto 0); + + -- async RX mode -- + signal ack_gated : std_ulogic; + signal rdata_gated : std_ulogic_vector(31 downto 0); begin -- Sanity Checks -------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- + -- protocol -- + assert not (wb_pipe_mode_c = false) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing STANDARD Wishbone protocol." severity note; + assert not (wb_pipe_mode_c = true) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing PIEPLINED Wishbone protocol." severity note; + -- bus timeout -- assert not (BUS_TIMEOUT /= 0) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing auto-timeout (" & integer'image(BUS_TIMEOUT) & " cycles)." severity note; assert not (BUS_TIMEOUT = 0) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing no auto-timeout (can cause permanent CPU stall!)." severity note; - -- external memory interface protocol -- - assert not (wb_pipe_mode_c = false) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing STANDARD Wishbone protocol." severity note; - assert not (wb_pipe_mode_c = true) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing PIEPLINED Wishbone protocol." severity note; - -- endianness -- - assert not (xbus_big_endian_c = false) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing LITTLE-ENDIAN byte order." severity note; - assert not (xbus_big_endian_c = true) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing BIG-ENDIAN byte." severity note; + assert not (wb_big_endian_c = false) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing LITTLE-endian byte order." severity note; + assert not (wb_big_endian_c = true) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing BIG-endian byte." severity note; + + -- async RC -- + assert not (wb_rx_buffer_c = false) report "NEORV32 PROCESSOR CONFIG NOTE: External Bus Interface - Implementing ASYNC RX path." severity note; -- Access Control ------------------------------------------------------------------------- @@ -157,6 +161,7 @@ begin -- actual external bus access? -- xbus_access <= (not int_imem_acc) and (not int_dmem_acc) and (not int_boot_acc); + -- Bus Arbiter ----------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- bus_arbiter: process(rstn_i, clk_i) @@ -164,8 +169,6 @@ begin if (rstn_i = '0') then ctrl.state <= IDLE; ctrl.we <= def_rst_val_c; - ctrl.rd_req <= '0'; - ctrl.wr_req <= '0'; ctrl.adr <= (others => def_rst_val_c); ctrl.wdat <= (others => def_rst_val_c); ctrl.rdat <= (others => def_rst_val_c); @@ -178,7 +181,7 @@ begin ctrl.priv <= (others => def_rst_val_c); elsif rising_edge(clk_i) then -- defaults -- - ctrl.rdat <= (others => '0'); + ctrl.rdat <= (others => '0'); -- required for internal output gating ctrl.ack <= '0'; ctrl.err <= '0'; ctrl.timeout <= std_ulogic_vector(to_unsigned(BUS_TIMEOUT, index_size_f(BUS_TIMEOUT))); @@ -188,12 +191,10 @@ begin when IDLE => -- waiting for host request -- ------------------------------------------------------------ - ctrl.rd_req <= '0'; - ctrl.wr_req <= '0'; -- buffer all outgoing signals -- - ctrl.we <= wren_i or ctrl.wr_req; + ctrl.we <= wren_i; ctrl.adr <= addr_i; - if (xbus_big_endian_c = true) then -- big-endian + if (wb_big_endian_c = true) then -- big-endian ctrl.wdat <= bswap32_f(data_i); ctrl.sel <= bit_rev_f(ben_i); else -- little-endian @@ -204,36 +205,26 @@ begin ctrl.lock <= lock_i; ctrl.priv <= priv_i; -- valid new or buffered read/write request -- - if ((xbus_access and (wren_i or ctrl.wr_req or rden_i or ctrl.rd_req)) = '1') then + if ((xbus_access and (wren_i or rden_i)) = '1') then ctrl.state <= BUSY; end if; when BUSY => -- transfer in progress -- ------------------------------------------------------------ ctrl.rdat <= wb_dat_i; - if (wb_err_i = '1') then -- abnormal bus termination + if (wb_err_i = '1') or -- abnormal bus termination + ((timeout_en_c = true) and (or_reduce_f(ctrl.timeout) = '0')) then -- valid timeout ctrl.err <= '1'; ctrl.state <= IDLE; elsif (wb_ack_i = '1') then -- normal bus termination ctrl.ack <= '1'; ctrl.state <= IDLE; - elsif (timeout_en_c = true) and (or_reduce_f(ctrl.timeout) = '0') then -- valid timeout - ctrl.err <= '1'; - ctrl.state <= IDLE; end if; -- timeout counter -- if (timeout_en_c = true) then ctrl.timeout <= std_ulogic_vector(unsigned(ctrl.timeout) - 1); -- timeout counter end if; - when RESYNC => -- make sure transfer is done! - -- ------------------------------------------------------------ - ctrl.wr_req <= ctrl.wr_req or wren_i; -- buffer new request - ctrl.rd_req <= ctrl.rd_req or rden_i; -- buffer new request - if (wb_ack_i = '0') then - ctrl.state <= IDLE; - end if; - when others => -- undefined -- ------------------------------------------------------------ ctrl.state <= IDLE; @@ -243,8 +234,12 @@ begin end process bus_arbiter; -- host access -- - data_o <= ctrl.rdat when (xbus_big_endian_c = false) else bswap32_f(ctrl.rdat); -- endianness conversion - ack_o <= ctrl.ack; + ack_gated <= wb_ack_i when (ctrl.state = BUSY) else '0'; -- CPU ack gate for "async" RX + rdata_gated <= wb_dat_i when (ctrl.state = BUSY) else (others => '0'); -- CPU read data gate for "async" RX + rdata <= ctrl.rdat when (wb_rx_buffer_c = true) else rdata_gated; + + data_o <= rdata when (wb_big_endian_c = false) else bswap32_f(rdata); -- endianness conversion + ack_o <= ctrl.ack when (wb_rx_buffer_c = true) else ack_gated; err_o <= ctrl.err; -- wishbone interface -- @@ -254,15 +249,15 @@ begin wb_lock_o <= ctrl.lock; -- 1 = exclusive access request - wb_adr_o <= ctrl.adr; - wb_dat_o <= ctrl.wdat; - wb_we_o <= ctrl.we; - wb_sel_o <= ctrl.sel; - wb_stb_o <= stb_int when (WB_PIPELINED_MODE = true) else cyc_int; - wb_cyc_o <= cyc_int; + wb_adr_o <= ctrl.adr; + wb_dat_o <= ctrl.wdat; + wb_we_o <= ctrl.we; + wb_sel_o <= ctrl.sel; + wb_stb_o <= stb_int when (wb_pipe_mode_c = true) else cyc_int; + wb_cyc_o <= cyc_int; stb_int <= '1' when (ctrl.state = BUSY) else '0'; - cyc_int <= '0' when (ctrl.state = IDLE) or (ctrl.state = RESYNC) else '1'; + cyc_int <= '1' when (ctrl.state = BUSY) else '0'; end neorv32_wishbone_rtl; diff --git a/sim/neorv32_tb.vhd b/sim/neorv32_tb.vhd index 7e4a88c5..ce047cc6 100644 --- a/sim/neorv32_tb.vhd +++ b/sim/neorv32_tb.vhd @@ -56,7 +56,6 @@ architecture neorv32_tb_rtl of neorv32_tb is -- general -- constant ext_imem_c : boolean := false; -- false: use and boot from proc-internal IMEM, true: use and boot from external (initialized) simulated IMEM (ext. mem A) constant ext_dmem_c : boolean := false; -- false: use proc-internal DMEM, true: use external simulated DMEM (ext. mem B) - constant icache_en_c : boolean := true; -- set true to use processor-internal instruction cache constant imem_size_c : natural := 16*1024; -- size in bytes of processor-internal IMEM / external mem A constant dmem_size_c : natural := 8*1024; -- size in bytes of processor-internal DMEM / external mem B constant f_clock_c : natural := 100000000; -- main clock in Hz @@ -155,7 +154,7 @@ architecture neorv32_tb_rtl of neorv32_tb is begin mem_v := (others => (others => '0')); for i in 0 to init'length-1 loop -- init only in range of source data array - if (xbus_big_endian_c = false) then + if (wb_big_endian_c = false) then mem_v(i) := init(i); else mem_v(i) := bswap32_f(init(i)); @@ -223,7 +222,7 @@ begin MEM_INT_DMEM_EN => int_dmem_c, -- implement processor-internal data memory MEM_INT_DMEM_SIZE => dmem_size_c, -- size of processor-internal data memory in bytes -- Internal Cache memory -- - ICACHE_EN => icache_en_c, -- implement instruction cache + ICACHE_EN => true, -- implement instruction cache ICACHE_NUM_BLOCKS => 8, -- i-cache: number of blocks (min 2), has to be a power of 2 ICACHE_BLOCK_SIZE => 64, -- i-cache: block size in bytes (min 4), has to be a power of 2 ICACHE_ASSOCIATIVITY => 2, -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2