Add new option ONLY_ALIGNED and new prefetch buffer

Compressed instruction now can only be in pairs and jumps have to be aligned
2025-04-22 04:57:25 -04:00 · 2017-01-02 12:30:28 +01:00 · 2017-01-02 12:30:28 +01:00 · c727adf719
commit c727adf719
parent f21769f93a
4 changed files with 495 additions and 7 deletions
--- a/if_stage.sv
+++ b/if_stage.sv
@ -156,8 +156,11 @@ module riscv_if_stage #(

        // CONFIG_REGION: SMALL_IF
        `ifdef SMALL_IF
+
+        // CONFIG_REGION: ONLY_ALIGNED
+        `ifdef ONLY_ALIGNED
          // prefetch buffer, caches a fixed number of instructions
-            riscv_prefetch_buffer_small prefetch_buffer_i
+            riscv_prefetch_buffer_only_aligned prefetch_buffer_i
              (
                .clk               ( clk                         ),
                .rst_n             ( rst_n                       ),
@ -167,12 +170,6 @@ module riscv_if_stage #(
                .branch_i          ( branch_req                  ),
                .addr_i            ( {fetch_addr_n[31:1], 1'b0}  ),

-                // CONFIG_REGION: HWLP_SUPPORT
-                `ifdef HWLP_SUPPORT
-                .hwloop_i          ( hwlp_jump                   ),
-                .hwloop_target_i   ( hwlp_target                 ),
-                `endif // HWLP_SUPPORT
-
                .ready_i           ( fetch_ready                 ),
                .valid_o           ( fetch_valid                 ),
                .rdata_o           ( fetch_rdata                 ),
@ -188,6 +185,35 @@ module riscv_if_stage #(
                // Prefetch Buffer Status
                .busy_o            ( prefetch_busy               )
              );
+        `else 
+            // prefetch buffer, caches a fixed number of instructions
+            riscv_prefetch_buffer_small prefetch_buffer_i
+              (
+                .clk               ( clk                         ),
+                .rst_n             ( rst_n                       ),
+
+                .req_i             ( req_i                       ),
+
+                .branch_i          ( branch_req                  ),
+                .addr_i            ( {fetch_addr_n[31:1], 1'b0}  ),
+
+                .ready_i           ( fetch_ready                 ),
+                .valid_o           ( fetch_valid                 ),
+                .rdata_o           ( fetch_rdata                 ),
+                .addr_o            ( fetch_addr                  ),
+
+                // goes to instruction memory / instruction cache
+                .instr_req_o       ( instr_req_o                 ),
+                .instr_addr_o      ( instr_addr_o                ),
+                .instr_gnt_i       ( instr_gnt_i                 ),
+                .instr_rvalid_i    ( instr_rvalid_i              ),
+                .instr_rdata_i     ( instr_rdata_i               ),
+
+                // Prefetch Buffer Status
+                .busy_o            ( prefetch_busy               )
+              );
+        `endif // ONLY_ALIGNED
+
        `else // SMALL_IF
        generate
          if (RDATA_WIDTH == 32) begin : prefetch_32
--- a/prefetch_buffer_only_aligned.sv
+++ b/prefetch_buffer_only_aligned.sv
@ -0,0 +1,348 @@
+// Copyright 2015 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Engineer:       Markus Wegmann - markus.wegmann@technokrat.ch              //
+//                                                                            //
+// Design Name:    Prefetcher Buffer for 32 bit memory interface              //
+// Project Name:   littleRISCV                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    Prefetch buffer to only handle full or pairs of            //
+//                 misaligned instructions to reduce area.                    //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+
+`include "riscv_config.sv"
+
+
+module riscv_prefetch_buffer_only_aligned
+(
+  input  logic        clk,
+  input  logic        rst_n,
+
+  // ID interface
+  input  logic        req_i,
+
+  input  logic        branch_i,
+  input  logic [31:0] addr_i,
+
+  input  logic        ready_i,
+  output logic        valid_o,
+  output logic [31:0] rdata_o,
+  output logic [31:0] addr_o,
+
+  // goes to instruction memory / instruction cache
+  output logic        instr_req_o,
+  input  logic        instr_gnt_i,
+  output logic [31:0] instr_addr_o,
+  input  logic [31:0] instr_rdata_i,
+  input  logic        instr_rvalid_i,
+
+  // Prefetch Buffer Status
+  output logic        busy_o
+);
+  
+
+  /// Regs
+  enum logic [1:0] {IDLE, WAIT_GNT, WAIT_RVALID, WAIT_ABORTED } CS, NS; // Will handle the steps for the memory interface
+
+
+  logic [31:0]  fetch_addr_Q, fetch_addr_n; // The adress from the current fetch
+  logic [31:0]  fetch_rdata_Q, fetch_rdata_n; // A 32 bit register to store current instruction if stalled
+  logic         fetch_valid_Q, fetch_valid_n;
+
+  /// Combinational signals
+  logic [31:0]  addr_pc_next; // Calculate the next adress (adder as process counter)
+  logic [31:0]  addr_mux; // The next address mux to be used
+  logic [31:0]  instr_mux;
+
+  logic addr_is_misaligned;
+  logic instr_is_in_regs;
+  logic instr_in_regs_is_compressed;
+
+  enum logic [1:0] {FULL_INSTR_ALIGNED, C_INSTR_ALIGNED, C_INSTR_MISALIGNED} instruction_format;
+
+
+  assign busy_o = (CS != IDLE) || instr_req_o;
+  assign addr_is_misaligned = (fetch_addr_Q[1] == 1'b1); // Check if address is misaligned
+
+  assign instr_is_in_regs = (fetch_valid_Q && addr_is_misaligned);
+
+  assign instr_mux = fetch_valid_Q ? fetch_rdata_Q : instr_rdata_i;
+  assign fetch_rdata_n = instr_mux;
+
+  // Calculate next address. This is the actual PC of littleRISCV. Will use same adder instance for all cases
+  always_comb
+  begin
+    unique case (instruction_format)
+      FULL_INSTR_ALIGNED:             addr_pc_next = fetch_addr_Q + 32'h4;
+      C_INSTR_ALIGNED:                addr_pc_next = fetch_addr_Q + 32'h2;
+      C_INSTR_MISALIGNED:             addr_pc_next = fetch_addr_Q + 32'h2;
+      default:                        addr_pc_next = fetch_addr_Q + 32'h4;
+    endcase
+  end
+
+  // Construct the outgoing instruction
+  always_comb
+  begin
+    unique case (instruction_format )
+      FULL_INSTR_ALIGNED:             rdata_o = instr_mux;
+      C_INSTR_ALIGNED:                rdata_o = {16'hxxxx, instr_mux[15:0]};
+      C_INSTR_MISALIGNED:             rdata_o = {16'hxxxx, instr_mux[31:16]};
+      default:                        rdata_o = instr_mux;
+    endcase
+  end
+
+
+
+  always_comb
+  begin
+    NS = CS;
+
+    fetch_addr_n = fetch_addr_Q;
+    fetch_rdata_n = fetch_rdata_Q;
+    fetch_valid_n = fetch_valid_Q;
+
+    valid_o = 1'b0;
+    instr_req_o = 1'b0;
+    instr_addr_o = {fetch_addr_Q[31:2], 2'b00};
+    addr_mux = fetch_addr_Q;
+    addr_o = fetch_addr_Q;
+
+    instruction_format = FULL_INSTR_ALIGNED;
+
+    unique case (CS)
+      IDLE: begin
+
+        if (branch_i) begin // If we have a branch condition, fetch from the new address
+          fetch_valid_n = 1'b0;
+          addr_mux = addr_i;
+        end
+
+        if (req_i) begin // Only proceed if ID wants to fetch new instructions
+          // Check if we already buffered in cache
+          if (~branch_i && instr_is_in_regs) begin
+            // Assume it has to be a compressed instruction, as we only allow pairs of compressed instructions
+            instruction_format = C_INSTR_MISALIGNED;
+            addr_o = fetch_addr_Q;
+            addr_mux = addr_pc_next;
+            valid_o = 1'b1;
+
+            if (ready_i) begin // Do not change state if ID is not ready
+              fetch_addr_n = addr_mux;
+              fetch_valid_n = 1'b0;
+              NS = IDLE;
+            end
+          end
+          
+          // Else we have to fetch all instruction parts (aligned or misaligned in case of branch)
+          else begin
+            fetch_addr_n = addr_mux;
+            fetch_valid_n = 1'b0;
+
+            instr_req_o = 1'b1;
+            instr_addr_o = {addr_mux[31:2], 2'b00};
+
+            if (instr_gnt_i)
+              NS = WAIT_RVALID;
+            else
+              NS = WAIT_GNT;
+          end
+        end
+      end
+
+
+      // Wait for grant of instruction memory
+      WAIT_GNT: begin
+        instr_req_o = 1'b1;
+        instr_addr_o = {fetch_addr_Q[31:2], 2'b00};
+        
+        if (~branch_i) begin
+          if (instr_gnt_i)
+              NS = WAIT_RVALID;
+          else
+              NS = WAIT_GNT;
+        end
+        else begin // if branch_i
+          fetch_valid_n = 1'b0;
+          if (instr_rvalid_i) begin
+            if (req_i) begin
+              
+              addr_mux = addr_i;
+              fetch_addr_n = addr_mux;
+
+              instr_req_o = 1'b1;
+              instr_addr_o = {addr_mux[31:2], 2'b00};
+
+              if (instr_gnt_i)
+                NS = WAIT_RVALID;
+              else
+                NS = WAIT_GNT;
+            end
+            else
+              NS = IDLE;
+          end
+          else
+            NS = WAIT_ABORTED;
+        end
+      end
+
+
+      WAIT_RVALID: begin
+        if (~branch_i) begin
+          
+          NS = WAIT_RVALID;
+
+          // Wait for valid data from instruction memory and proceed only if a new instruction is wanted OR if we were stalled.
+          if (instr_rvalid_i | fetch_valid_Q) begin
+            
+            if (ready_i) begin
+              fetch_valid_n = 1'b0;
+            end
+            else
+              fetch_valid_n = 1'b1; // "Stall" fetch
+
+            addr_mux = addr_pc_next;
+
+
+            // If our wanted instruction address is aligned, we have fetched all parts needed.
+            if (fetch_addr_Q[1] == 1'b0) begin 
+              if (instr_mux[1:0] != 2'b11) begin // If compressed instruction
+                instruction_format = C_INSTR_ALIGNED;
+                addr_o = fetch_addr_Q;
+                valid_o = 1'b1;
+
+                if (ready_i) begin // Do not change state if ID is not ready
+                  NS = IDLE; // Can go to IDLE as there is still a part of an instruction left to process in cache
+                             // (and we do not want an unneccessary access if next instruction should be compressed as well)
+                  fetch_addr_n = addr_mux; 
+                  fetch_valid_n = 1'b1;
+                end
+              end
+
+              else begin // If full instruction
+                instruction_format = FULL_INSTR_ALIGNED;
+                addr_o = fetch_addr_Q;
+                valid_o = 1'b1;
+
+                instr_addr_o = {addr_mux[31:2], 2'b00};
+
+                if (ready_i) begin // Do not change state if ID is not ready
+                  instr_req_o = 1'b1;
+                  fetch_addr_n = addr_mux;
+
+                  if (instr_gnt_i)
+                    NS = WAIT_RVALID;
+                  else
+                    NS = WAIT_GNT;
+                end
+              end
+            end
+            
+            else begin // If wanted instruction address is misaligned
+                instruction_format = C_INSTR_MISALIGNED_DIRECT;
+                addr_o = fetch_addr_Q;
+                valid_o = 1'b1;
+                
+                if (ready_i) begin // Do not change state if ID is not ready
+                  instr_req_o = 1'b1;
+                  fetch_addr_n = addr_mux;
+                  instr_addr_o = {addr_mux[31:2], 2'b00};
+
+                  if (instr_gnt_i)
+                    NS = WAIT_RVALID;
+                  else
+                    NS = WAIT_GNT;
+                end
+            end
+          end
+        end 
+
+        else begin // if branch_i
+          fetch_valid_n = 1'b0;
+          
+          if (instr_rvalid_i) begin
+            if (req_i) begin
+              
+              addr_mux = addr_i;
+              fetch_addr_n = addr_mux;
+
+              instr_req_o = 1'b1;
+              instr_addr_o = {addr_mux[31:2], 2'b00};
+
+              if (instr_gnt_i)
+                NS = WAIT_RVALID;
+              else
+                NS = WAIT_GNT;
+            end
+            else
+              NS = IDLE;
+          end
+          else
+            NS = WAIT_ABORTED;
+        end
+      end
+
+
+      // Wait for rvalid to finish latest access accordingly
+      WAIT_ABORTED: begin
+        if (instr_rvalid_i) begin
+          if (req_i) begin
+            
+            addr_mux = addr_i;
+
+            fetch_addr_n = addr_mux;
+
+            instr_req_o = 1'b1;
+            instr_addr_o = {addr_mux[31:2], 2'b00};
+
+            if (instr_gnt_i)
+              NS = WAIT_RVALID;
+            else
+              NS = WAIT_GNT;
+          end
+          else
+            NS = IDLE;
+        end
+        else begin
+          NS = WAIT_ABORTED;
+        end
+      end
+
+      default: NS = IDLE;
+    endcase;
+  end
+
+
+
+  //////////////////////////////////////////////////////////////////////////////
+  // registers                                                                //
+  //////////////////////////////////////////////////////////////////////////////
+
+  always_ff @(posedge clk, negedge rst_n)
+  begin
+    if(rst_n == 1'b0)
+    begin
+      CS                      <= IDLE;
+      fetch_addr_Q            <= 32'h0000;
+      fetch_rdata_Q           <= 32'h0000;
+      fetch_valid_Q           <= 1'b0;
+    end  
+    else begin
+      CS                      <= NS;
+      fetch_addr_Q            <= fetch_addr_n;
+      fetch_rdata_Q           <= fetch_rdata_n;    
+      fetch_valid_Q           <= fetch_valid_n;
+    end
+  end
+
+endmodule
--- a/scripts/example_configs/11_very_small_only_aligned.sv
+++ b/scripts/example_configs/11_very_small_only_aligned.sv
@ -0,0 +1,113 @@
+// Copyright 2015 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Engineer:       Michael Gautschi - gautschi@iis.ee.ethz.ch                 //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                 Markus Wegmann - markus.wegmann@technokrat.ch              //
+//                                                                            //
+// Design Name:    RISC-V config file                                         //
+// Project Name:   RI5CY                                                      //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    Configure optional simulation modules                      //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+// no traces for synthesis, they are not synthesizable
+`ifndef SYNTHESIS
+`ifndef PULP_FPGA_EMUL
+`define TRACE_EXECUTION
+`endif
+//`define SIMCHECKER
+`endif
+
+
+// littleRISCV configuration. 
+
+// Decomment to enable.
+
+// The format should be strictly followed so the ri5cly-manage tool can parse the configuration
+// A CONFIG section declares a config definition, a CONFIG_REGION enables the tool to remove disabled code
+// for export. See the ri5cly-manage.py tool help and source code in the /scripts folder for more information.
+
+
+
+// CONFIG: MUL_SUPPORT
+// will enable RISCV32M support for multiplication, division, MAC operations. Uses a lot of multiplications
+//`define MUL_SUPPORT
+
+// CONFIG: VEC_SUPPORT
+// will enable RISCV32V support for vector operations.
+//`define VEC_SUPPORT
+
+// CONFIG: HWLP_SUPPORT
+// will enable hardware loop support.
+//`define HWLP_SUPPORT
+
+// CONFIG: BIT_SUPPORT
+// will enable bit manipulation and counting support.
+//`define BIT_SUPPORT
+
+// CONFIG: LSU_ADDER_SUPPORT
+// will enable an additional adder in the LSU for better timings.
+//`define LSU_ADDER_SUPPORT
+
+`ifdef LSU_ADDER_SUPPORT
+
+// CONFIG: PREPOST_SUPPORT
+// will enable pre/post increment load/store support support.
+//`define PREPOST_SUPPORT
+
+`endif // LSU_ADDER_SUPPORT
+
+// CONFIG: MATH_SPECIAL_SUPPORT
+// will enable clip, min and max operations support.
+//`define MATH_SPECIAL_SUPPORT
+
+
+// Dependent definitions
+
+// CONFIG: THREE_PORT_REG_FILE
+// enables 3r2w reg file (rather than 2r1w)
+//`define THREE_PORT_REG_FILE
+
+
+`ifndef MUL_SUPPORT
+`ifndef VEC_SUPPORT
+`ifndef BIT_SUPPORT
+`ifndef LSU_ADDER_SUPPORT
+`ifndef PREPOST_SUPPORT
+`ifndef MATH_SPECIAL_SUPPORT
+
+// CONFIG: SIMPLE_ALU
+// will enable simplified ALU for less gates. It does not support vectors, shuffling, nor bit operations.
+`define SIMPLE_ALU
+
+// CONFIG: SMALL_IF
+// will disable large FIFO in IF stage and use a more simple one.
+`define SMALL_IF
+
+// CONFIG: RV32E
+// will reduce the register file to 16 words
+`define RV32E
+
+// CONFIG: ONLY_ALIGNED
+// will only allow aligned memory accesses and therefore overlapping mustn't occur
+`define ONLY_ALIGNED
+
+`endif
+`endif
+`endif
+`endif
+`endif
+`endif
--- a/src_files.yml
+++ b/src_files.yml
@ -23,6 +23,7 @@ riscv:
    mult.sv,
    prefetch_buffer.sv,
    prefetch_buffer_small.sv,
+    prefetch_buffer_only_aligned.sv,
    prefetch_L0_buffer.sv,
    riscv_core.sv,
  ]