diff --git a/if_stage.sv b/if_stage.sv index 5d84ed91..d69a8546 100644 --- a/if_stage.sv +++ b/if_stage.sv @@ -34,6 +34,9 @@ `include "defines.sv" module if_stage +#( + parameter RDATA_WIDTH = 32 +) ( input logic clk, input logic rst_n, @@ -46,11 +49,11 @@ module if_stage output logic valid_o, // instruction cache interface - output logic instr_req_o, - output logic [31:0] instr_addr_o, - input logic instr_gnt_i, - input logic instr_rvalid_i, - input logic [31:0] instr_rdata_i, + output logic instr_req_o, + output logic [31:0] instr_addr_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + input logic [RDATA_WIDTH-1:0] instr_rdata_i, // Output of IF Pipeline stage output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding @@ -181,34 +184,67 @@ module if_stage end - // prefetch buffer, caches a fixed number of instructions - prefetch_buffer prefetch_buffer_i - ( - .clk ( clk ), - .rst_n ( rst_n ), + generate + if (RDATA_WIDTH == 32) begin : prefetch_32 + // prefetch buffer, caches a fixed number of instructions + prefetch_buffer prefetch_buffer_i + ( + .clk ( clk ), + .rst_n ( rst_n ), - .req_i ( 1'b1 ), // TODO: FETCH_ENABLE! - .branch_i ( branch_req ), - .addr_i ( fetch_addr_n ), + .req_i ( 1'b1 ), // TODO: FETCH_ENABLE! + .branch_i ( branch_req ), + .addr_i ( fetch_addr_n ), - .ready_i ( fetch_ready ), - .valid_o ( fetch_valid ), - .rdata_o ( fetch_rdata ), - .addr_o ( fetch_addr ), + .ready_i ( fetch_ready ), + .valid_o ( fetch_valid ), + .rdata_o ( fetch_rdata ), + .addr_o ( fetch_addr ), - .unaligned_valid_o ( fetch_unaligned_valid ), - .unaligned_rdata_o ( fetch_unaligned_rdata ), + .unaligned_valid_o ( fetch_unaligned_valid ), + .unaligned_rdata_o ( fetch_unaligned_rdata ), - // goes to instruction memory / instruction cache - .instr_req_o ( instr_req_o ), - .instr_addr_o ( instr_addr_o ), - .instr_gnt_i ( instr_gnt_i ), - .instr_rvalid_i ( instr_rvalid_i ), - .instr_rdata_i ( instr_rdata_i ), + // goes to instruction memory / instruction cache + .instr_req_o ( instr_req_o ), + .instr_addr_o ( instr_addr_o ), + .instr_gnt_i ( instr_gnt_i ), + .instr_rvalid_i ( instr_rvalid_i ), + .instr_rdata_i ( instr_rdata_i ), - // Prefetch Buffer Status - .busy_o ( prefetch_busy ) - ); + // Prefetch Buffer Status + .busy_o ( prefetch_busy ) + ); + end else if (RDATA_WIDTH == 128) begin : prefetch_128 + // prefetch buffer, caches a fixed number of instructions + prefetch_L0_buffer prefetch_buffer_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + .req_i ( 1'b1 ), // TODO: FETCH_ENABLE! + .branch_i ( branch_req ), + .addr_i ( fetch_addr_n ), + + .ready_i ( fetch_ready ), + .valid_o ( fetch_valid ), + .rdata_o ( fetch_rdata ), + .addr_o ( fetch_addr ), + + .unaligned_valid_o ( fetch_unaligned_valid ), + .unaligned_rdata_o ( fetch_unaligned_rdata ), + + // goes to instruction memory / instruction cache + .instr_req_o ( instr_req_o ), + .instr_addr_o ( instr_addr_o ), + .instr_gnt_i ( instr_gnt_i ), + .instr_rvalid_i ( instr_rvalid_i ), + .instr_rdata_i ( instr_rdata_i ), + + // Prefetch Buffer Status + .busy_o ( prefetch_busy ) + ); + end + endgenerate // offset FSM state diff --git a/prefetch_L0_buffer.sv b/prefetch_L0_buffer.sv new file mode 100644 index 00000000..d7a156da --- /dev/null +++ b/prefetch_L0_buffer.sv @@ -0,0 +1,686 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// DEI @ UNIBO - University of Bologna // +// // +// Engineer: Andreas Traber - atraber@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// Create Date: 06/08/2014 // +// Design Name: RISC-V processor core // +// Module Name: prefetch_buffer.sv // +// Project Name: RI5CY // +// Language: SystemVerilog // +// // +// Description: Prefetch Buffer that caches instructions. This cuts overly // +// long critical paths to the instruction cache // +// // +// Revision: // +// Revision v0.1 - File Created // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +module prefetch_L0_buffer +#( + parameter RDATA_IN_WIDTH = 128 +) +( + input logic clk, + input logic rst_n, + + input logic req_i, + input logic branch_i, + input logic ready_i, + input logic [31:0] addr_i, + + output logic valid_o, + output logic [31:0] rdata_o, + output logic [31:0] addr_o, + + output logic unaligned_valid_o, + output logic [31:0] unaligned_rdata_o, + + // goes to instruction memory / instruction cache + output logic instr_req_o, + output logic [31:0] instr_addr_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + input logic [RDATA_IN_WIDTH/32-1:0][31:0] instr_rdata_i, + + // Prefetch Buffer Status + output logic busy_o +); + + + enum logic [2:0] {EMPTY, VALID_L0, WAIT_GNT, WAIT_RVALID, WAIT_ABORTED } CS, NS; + logic [31:0] current_address; + logic [1:0] pointer_cs, pointer_ns; + logic update_current_address; + + logic [3:0][31:0] L0_buffer; + logic valid_L0_buffer; + logic [15:0] previous_chunk; + logic valid_previous_chunk; + logic clear_buffer; + + + logic [127:0] L0_buffer_misaligned; + + + + + assign L0_buffer_misaligned[15:0] = previous_chunk; + + + assign busy_o = (CS != EMPTY); + + + always_ff @(posedge clk or negedge rst_n) + begin + if(~rst_n) + begin + CS <= EMPTY; + current_address <= '0; + pointer_cs <= '0; + end + else + begin + CS <= NS; + + if(branch_i) + begin + current_address <= {addr_i[31:4],4'b0000}; + pointer_cs <= addr_i[3:2]; + end + else + begin + if(update_current_address) + current_address <= current_address + 5'h10; // jump to the next cache line + + pointer_cs <= pointer_ns; + end + end + end + + + + always_comb + begin + + valid_o = 1'b0; + + case(addr_o[3:2]) + 2'b00: begin unaligned_rdata_o = {L0_buffer[0][15:0], previous_chunk }; unaligned_valid_o = valid_previous_chunk; end + 2'b01: begin unaligned_rdata_o = {L0_buffer[1][15:0], L0_buffer[0][31:16] }; unaligned_valid_o = valid_o; end + 2'b10: begin unaligned_rdata_o = {L0_buffer[2][15:0], L0_buffer[1][31:16] }; unaligned_valid_o = valid_o; end + 2'b11: begin unaligned_rdata_o = {L0_buffer[3][15:0], L0_buffer[2][31:16] }; unaligned_valid_o = valid_o; end + endcase // addr_o + + addr_o = current_address + (pointer_cs<<2); + pointer_ns = pointer_cs; + instr_req_o = 1'b0; + instr_addr_o = (branch_i) ? addr_i : current_address + 5'h10; + update_current_address = 1'b0; + rdata_o = instr_rdata_i[pointer_cs]; + clear_buffer = 1'b0; + + + + + + case(CS) + + EMPTY: + begin + instr_req_o = branch_i; + if(branch_i) // make the request to icache + begin + + if(instr_gnt_i) + begin + NS = WAIT_RVALID; + end + else + begin + NS = WAIT_GNT; + end + end + else + begin + NS = EMPTY; + end + end //~EMPTY + + + + + + WAIT_RVALID: + begin + if(branch_i) // there is a pending branch + begin + if(instr_rvalid_i) + begin + instr_req_o = 1'b1; + instr_addr_o = {addr_i[31:4],4'b0000}; + + if(instr_gnt_i) + begin + NS = WAIT_RVALID; + end + else + begin + NS = WAIT_GNT; + end + end + else + begin + NS = WAIT_ABORTED; + end + + end + else // else (branch_i) + begin + valid_o = instr_rvalid_i; + + + case(pointer_cs) + 2'b00: + begin + unaligned_rdata_o = { instr_rdata_i[0][15:0], L0_buffer[3][31:16] }; + if(valid_L0_buffer) + begin + unaligned_valid_o = instr_rvalid_i; + end + else + begin + unaligned_valid_o = 1'b0; + end + end + + 2'b01: begin unaligned_rdata_o = {instr_rdata_i[1][15:0], instr_rdata_i[0][31:16] }; unaligned_valid_o = instr_rvalid_i; end + 2'b10: begin unaligned_rdata_o = {instr_rdata_i[2][15:0], instr_rdata_i[1][31:16] }; unaligned_valid_o = instr_rvalid_i; end + 2'b11: begin unaligned_rdata_o = {instr_rdata_i[3][15:0], instr_rdata_i[2][31:16] }; unaligned_valid_o = instr_rvalid_i; end + endcase // pointer_cs + + if(instr_rvalid_i) + begin + + if(&pointer_cs) // we are receiving the last packet, then prefetch the next one + begin + + if(ready_i) + begin + instr_req_o = 1'b1; //if the cpu is ready to sample the instruction, then ask for a new instruction + instr_addr_o = current_address + 5'h10; + + if(instr_gnt_i) + begin + NS = WAIT_RVALID; + pointer_ns = '0; + update_current_address = 1'b1; + end + else + begin + NS = WAIT_GNT; + end + end + else + begin + NS = VALID_L0; + end + end + else // not the last chunk + begin + NS = VALID_L0; + if(ready_i) + begin + pointer_ns = pointer_cs + 1'b1; + end + else + begin + pointer_ns = pointer_cs; + end + end + + end + else // still wait instr_rvalid_i + begin + NS = WAIT_RVALID; + end + end + + end //~WAIT_RVALID + + + + + VALID_L0: + begin + valid_o = 1'b1; + rdata_o = L0_buffer[pointer_cs]; + case(pointer_cs) + 2'b00: begin unaligned_rdata_o = {L0_buffer[0][15:0], previous_chunk }; unaligned_valid_o = valid_previous_chunk; end + 2'b01: begin unaligned_rdata_o = {L0_buffer[1][15:0], L0_buffer[0][31:16] }; unaligned_valid_o = 1'b1; end + 2'b10: begin unaligned_rdata_o = {L0_buffer[2][15:0], L0_buffer[1][31:16] }; unaligned_valid_o = 1'b1; end + 2'b11: begin unaligned_rdata_o = {L0_buffer[3][15:0], L0_buffer[2][31:16] }; unaligned_valid_o = 1'b1; end + endcase // pointer_cs + + + if(branch_i) + begin + instr_req_o = 1'b1; + instr_addr_o = {addr_i[31:4],4'b0000}; + if(instr_gnt_i) + begin + NS = WAIT_RVALID; + end + else + begin + NS = WAIT_GNT; + end + end + else + begin + if(ready_i) + begin + if( &pointer_cs ) // we are dispathing the last packet, therefore prefetch the next cache line + begin + instr_req_o = 1'b1; + instr_addr_o = current_address + 5'h10; + update_current_address = 1'b1; + pointer_ns = '0; + + if(instr_gnt_i) + begin + NS = WAIT_RVALID; + pointer_ns = '0; + update_current_address = 1'b1; + end + else + begin + NS = WAIT_GNT; + end + end + else + begin + pointer_ns = pointer_cs + 1'b1; + NS = VALID_L0; + end + end + else // not ready, stay here!!!! + begin + NS = VALID_L0; + end + end + + + + end //~VALID_L0 + + + + + + WAIT_GNT: + begin + if(branch_i) + begin + instr_req_o = 1'b1; + instr_addr_o = {addr_i[31:4],4'b0000}; + + if(instr_gnt_i) + begin + NS = WAIT_RVALID; + end + else + begin + NS = WAIT_GNT; + end + end + else + begin + instr_req_o = 1'b1; + instr_addr_o = current_address; // has been previously updated + + if(instr_gnt_i) + begin + NS = WAIT_RVALID; + end + else + begin + NS = WAIT_GNT; + end + end + + end //~WAIT_GNT + + + WAIT_ABORTED: + begin + clear_buffer = 1'b1; + if(instr_rvalid_i) + begin + instr_req_o = 1'b1; + instr_addr_o = current_address; + + if(instr_gnt_i) + begin + NS = WAIT_RVALID; + end + else + begin + NS = WAIT_GNT; + end + end + else + begin + NS = WAIT_ABORTED; + end + end //~WAIT_ABORTED + + + default: + begin + NS = EMPTY; + clear_buffer = 1'b1; + end + + + + endcase //~CS + end + + + + + + + + + + + + + + + + + + + + + always_ff @(posedge clk or negedge rst_n) + begin + if(rst_n == 1'b0) + begin + valid_L0_buffer <= 1'b0; + L0_buffer <= '0; + previous_chunk <= '0; + valid_previous_chunk <= 1'b0; + end + else + begin + if(branch_i || clear_buffer) + begin + valid_L0_buffer <= 1'b0; + valid_previous_chunk <= 1'b0; + end + else + begin + if(instr_rvalid_i) + begin + L0_buffer <= instr_rdata_i; + valid_L0_buffer <= 1'b1; + + if(valid_L0_buffer ) + begin + valid_previous_chunk <= 1'b1; + previous_chunk <= L0_buffer[3][31:16]; + end + end + end + end + end + + + + +endmodule // prefetcher_L0_buffer + + +/* + + + + logic [RDATA_IN_WIDTH/32-1:0][31:0] L0_buffer; + logic valid_L0_buffer; + logic [15:0] previous_chunk; + logic valid_previous_chunk; + logic [31:0] current_address; + logic clear_buffer; + logic update_L0_buffer; + + localparam CACHE_LINE_WIDTH = RDATA_IN_WIDTH/32; + + logic [31:0] rdata_int; + enum logic [2:0] {EMPTY, VALID_L0, WAIT_GNT, WAIT_RVALID, WAIT_ABORTED } CS, NS; + + + always_ff @(posedge clk or negedge rst_n) + begin + if(~rst_n) + begin + valid_L0_buffer <= '0; + L0_buffer <= '0; + previous_chunk <= '0; + valid_previous_chunk <= 1'b0; + current_address <= '0; + end + else + begin + if(clear_buffer) + begin + valid_L0_buffer <= 1'b0; + valid_previous_chunk <= 1'b0; + current_address <= '0; + end + else + begin + if(branch_i) + current_address <= { addr_i[31:2+$clog2(CACHE_LINE_WIDTH)] , {2+$clog2(CACHE_LINE_WIDTH){1'b0}} }; + + if(update_L0_buffer) + begin + L0_buffer <= instr_rdata_i; + valid_L0_buffer <= 1'b1; + if(valid_L0_buffer) + begin + previous_chunk <= L0_buffer[RDATA_IN_WIDTH/32-1][31:16]; + valid_previous_chunk <= 1'b1; + end + end + end + end + end + + + always_comb + begin : BIG_MUX_MIS_AND_ALIGNED + case(addr_i) + 4'h0 : begin rdata_int = L0_buffer[0]; end + 4'h2 : begin rdata_int = {L0_buffer[1][15:0],L0_buffer[0][31:16]}; end + 4'h4 : begin rdata_int = L0_buffer[1]; end + 4'h6 : begin rdata_int = {L0_buffer[2][15:0],L0_buffer[1][31:16]}; end + 4'h8 : begin rdata_int = L0_buffer[2]; end + 4'hA : begin rdata_int = {L0_buffer[2][15:0],L0_buffer[2][31:16]}; end + 4'hC : begin rdata_int = L0_buffer[3]; end + 4'hA : begin rdata_int = {previous_chunk,L0_buffer[3][31:16]}; end + endcase + end + + + + + + always_ff @(posedge clk or negedge rst_n) + begin + if(~rst_n) + begin + CS <= EMPTY; + end + else + begin + CS <= NS; + end + end + + + //MAIN FSM + always_comb + begin + instr_req_o = 1'b0; + instr_addr_o = { addr_i[31:2+$clog2(CACHE_LINE_WIDTH)] , {2+$clog2(CACHE_LINE_WIDTH){1'b0}} }; // default Aligned access + NS = CS; + update_L0_buffer = 1'b0; + clear_buffer = 1'b0; + + valid_o = 1'b0; + rdata_o = rdata_int; + addr_o = '0; + + unaligned_valid_o = 1'b0; + unaligned_rdata_o = '0; + + + + case(CS) + + // default state, not waiting for requested data + EMPTY: + begin + + instr_req_o = branch_i; + instr_addr_o = { addr_i[31:2+$clog2(CACHE_LINE_WIDTH)] , {2+$clog2(CACHE_LINE_WIDTH){1'b0}} }; // default Aligned access + + if ( branch_i ) + begin + if(instr_gnt_i) //~> granted request + begin + NS = WAIT_RVALID; + end + else + begin //~> got a request but no grant + NS = WAIT_GNT; + end + end + else // stay here + begin + NS = EMPTY; + end + end // case: IDLE + + + VALID_L0: + begin + if( addr_i[31:2+$clog2(CACHE_LINE_WIDTH)] == current_address[31:2+$clog2(CACHE_LINE_WIDTH)]) //this cache line is locally sampled here + begin + if(addr_i[3:0] != 4'hE) // this case requires a second fetch + begin + + end + else + begin + + end + + end + + end + + // we sent a request but did not yet get a grant + WAIT_GNT: + begin + instr_req_o = 1'b1; + + if(instr_gnt_i) + NS = WAIT_RVALID; + else + NS = WAIT_GNT; + end // case: WAIT_GNT + + + + // we wait for rvalid, after that we are ready to serve a new request + WAIT_RVALID: + begin + + if (instr_rvalid_i) + begin + update_L0_buffer = 1'b1; + + if(req_i) + begin + if (instr_gnt_i) + begin + NS = WAIT_RVALID; + end + else + begin + NS = WAIT_GNT; + end + end + else + begin + NS = VALID_L0; + end + end + else + begin + // we are requested to abort our current request + // we didn't get an rvalid yet, so wait for it + if (branch_i) + begin + NS = WAIT_ABORTED; + end + end + + + end // case: WAIT_RVALID + + + + + // our last request was aborted, but we didn't yet get a rvalid and + // there was no new request sent yet + // we assume that req_i is set to high + WAIT_ABORTED: + begin + // prepare for next request + instr_req_o = 1'b1; + + if (instr_rvalid_i) begin + // no need to send address, already done in WAIT_RVALID + + if (instr_gnt_i) begin + NS = WAIT_RVALID; + end else begin + NS = WAIT_GNT; + end + end + end + + default: + begin + NS = EMPTY; + instr_req_o = 1'b0; + end + endcase + end + +endmodule // prefetcher_L0_buffer + +*/ diff --git a/riscv_core.sv b/riscv_core.sv index 7315e421..cddebf31 100644 --- a/riscv_core.sv +++ b/riscv_core.sv @@ -30,7 +30,8 @@ module riscv_core #( - parameter N_EXT_PERF_COUNTERS = 0 + parameter N_EXT_PERF_COUNTERS = 0, + parameter INSTR_RDATA_WIDTH = 32 ) ( // Clock and Reset @@ -43,16 +44,16 @@ module riscv_core input logic [4:0] cluster_id_i, // Instruction memory interface - output logic instr_req_o, - input logic instr_grant_i, // TODO: rename to instr_gnt_i - input logic instr_rvalid_i, - output logic [31:0] instr_addr_o, - input logic [31:0] instr_rdata_i, + output logic instr_req_o, + input logic instr_gnt_i, + input logic instr_rvalid_i, + output logic [31:0] instr_addr_o, + input logic [INSTR_RDATA_WIDTH-1:0] instr_rdata_i, // Data memory interface output logic data_req_o, input logic data_gnt_i, - input logic data_r_valid_i, // TODO: rename to data_rvalid_i + input logic data_rvalid_i, output logic data_we_o, output logic [3:0] data_be_o, output logic [31:0] data_addr_o, @@ -96,7 +97,7 @@ module riscv_core logic useincr_addr_ex; // Active when post increment - logic data_misaligned; // Active when post increment + logic data_misaligned; // Jump and branch target and decision (EX->IF) logic [31:0] jump_target_id, jump_target_ex; @@ -227,7 +228,11 @@ module riscv_core // |___|_| |____/ |_/_/ \_\____|_____| // // // ////////////////////////////////////////////////// - if_stage if_stage_i + if_stage + #( + .RDATA_WIDTH ( INSTR_RDATA_WIDTH ) + ) + if_stage_i ( .clk ( clk ), .rst_n ( rst_n ), @@ -242,7 +247,7 @@ module riscv_core // instruction cache interface .instr_req_o ( instr_req_o ), .instr_addr_o ( instr_addr_o ), - .instr_gnt_i ( instr_grant_i ), + .instr_gnt_i ( instr_gnt_i ), .instr_rvalid_i ( instr_rvalid_i ), .instr_rdata_i ( instr_rdata_i ), @@ -510,7 +515,7 @@ module riscv_core //output to data memory .data_req_o ( data_req_o ), .data_gnt_i ( data_gnt_i ), - .data_rvalid_i ( data_r_valid_i ), + .data_rvalid_i ( data_rvalid_i ), .data_addr_o ( data_addr_o ), .data_we_o ( data_we_o ),