diff --git a/src/ariane.sv b/src/ariane.sv index 4d45b65e4..b71bf58b9 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -91,6 +91,8 @@ module ariane logic [63:0] pc_pcgen_if; logic set_pc_pcgen_if; logic is_branch_pcgen_if; + logic if_ready_if_pcgen; + logic pc_valid_pcgen_if; // -------------- // PCGEN <-> EX // -------------- @@ -108,7 +110,6 @@ module ariane logic instr_valid_if_id; logic [31:0] instr_rdata_if_id; logic decode_ack_id_if; - logic illegal_c_insn_if_id; logic is_compressed_if_id; logic illegal_c_insn_id_if; logic [63:0] pc_id_if_id; @@ -218,11 +219,12 @@ module ariane // NPC Generation // -------------- pcgen pcgen_i ( + .fetch_enable_i ( fetch_enable ), .flush_i ( flush ), - .pc_if_i ( pc_if ), + .if_ready_i ( ~if_ready_if_pcgen ), .resolved_branch_i ( resolved_branch ), .pc_if_o ( pc_pcgen_if ), - .set_pc_o ( set_pc_pcgen_if ), + .pc_if_valid_o ( pc_valid_pcgen_if ), .is_branch_o ( is_branch_pcgen_if ), .boot_addr_i ( boot_addr_i ), .epc_i ( epc_commit_pcgen ), @@ -235,11 +237,9 @@ module ariane // --------- if_stage if_stage_i ( .flush_i ( flush_ctrl_if ), - .req_i ( fetch_enable ), - .if_busy_o ( ), // ? + .pc_if_valid_i ( pc_valid_pcgen_if ), + .if_busy_o ( if_ready_if_pcgen ), .id_ready_i ( ready_id_if ), - .halt_if_i ( halt_if ), - .set_pc_i ( set_pc_pcgen_if ), .is_branch_i ( is_branch_pcgen_if ), .branch_predict_o ( branch_predict_if_id ), .fetch_addr_i ( pc_pcgen_if ), @@ -252,9 +252,6 @@ module ariane .instr_valid_id_o ( instr_valid_if_id ), .instr_rdata_id_o ( instr_rdata_if_id ), - .is_compressed_id_o ( is_compressed_if_id ), - .illegal_c_insn_id_o ( illegal_c_insn_if_id ), - .pc_if_o ( pc_if ), .pc_id_o ( pc_id_if_id ), .ex_o ( exception_if_id ), .* @@ -275,7 +272,6 @@ module ariane .instruction_i ( instr_rdata_if_id ), .instruction_valid_i ( instr_valid_if_id ), .decoded_instr_ack_o ( decode_ack_id_if ), - .is_compressed_i ( is_compressed_if_id ), .pc_if_i ( pc_id_if_id ), // PC from if .ex_if_i ( exception_if_id ), // exception from if .ready_o ( ready_id_if ), diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 2549f4568..0c6c350c2 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -67,11 +67,19 @@ module branch_engine ( resolved_branch_o.valid = valid_i; resolved_branch_o.is_mispredict = 1'b0; // calculate next PC, depending on whether the instruction is compressed or not this may be different - next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); + next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); // calculate target address simple 64 bit addition - target_address = $signed(operand_c_i) + $signed(imm_i); - // save pc - resolved_branch_o.pc = pc_i; + target_address = $signed(operand_c_i) + $signed(imm_i); + // save PC - we need this to get the target row in the branch target buffer + // we play this trick with the branch instruction which wraps a byte boundary: + // |---------- Place the prediction on this PC + // \/ + // ____________________________________________________ + // |branch [15:0] | branch[31:16] | compressed 1[15:0] | + // |____________________________________________________ + // This will relief the prefetcher to re-fetch partially fetched unaligned branch instructions e.g.: + // we don't have a back arch between prefetcher and decoder/instruction FIFO. + resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : (pc_i[63:2] + 64'h4); // write target address which goes to pc gen resolved_branch_o.target_address = (comparison_result) ? target_address : next_pc; resolved_branch_o.is_taken = comparison_result; @@ -81,7 +89,7 @@ module branch_engine ( if (target_address[0] == 1'b0) begin // TODO in case of branch which is not taken it is not necessary to check for the address if ( target_address != branch_predict_i.predict_address_i // we mis-predicted the address of the branch - || branch_predict_i.predict_taken_i != comparison_result // we mis-predicted the outcome of the branch + || branch_predict_i.predict_taken_i != comparison_result // we mis-predicted the outcome of the branch || branch_predict_i.valid == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one ) begin resolved_branch_o.is_mispredict = 1'b1; diff --git a/src/btb.sv b/src/btb.sv index 0f669c611..78ef5e603 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -59,7 +59,7 @@ module btb #( assign predict_taken_o = btb_q[$unsigned(index)].saturation_counter[BITS_SATURATION_COUNTER-1]; assign branch_target_address_o = btb_q[$unsigned(index)].target_address; - // update on a miss-predict + // update on a mis-predict always_comb begin : update_branchpredict btb_n = btb_q; saturation_counter = btb_q[$unsigned(update_pc)].saturation_counter; diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 911059d93..f8e5d44f7 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -26,175 +26,233 @@ import ariane_pkg::*; // clear_i clears the FIFO for the following cycle. module fetch_fifo ( - input logic clk, - input logic rst_n, + input logic clk_i, + input logic rst_ni, // control signals - input logic clear_i, // clears the contents of the fifo + input logic clear_i, // clears the contents of the fifo // input port - input logic [63:0] in_addr_i, - input logic [31:0] in_rdata_i, - input logic in_valid_i, - output logic in_ready_o, + // branch prediction at in_addr_i address, as this is an address and not PC it can be the case + // that we have two compressed instruction (or one compressed instruction and one unaligned instruction) so we need + // keep two prediction inputs: [c1|c0] <- prediction for c1 and c0 + input branchpredict_sbe branch_predict_i, + input logic [63:0] in_addr_i, + input logic [31:0] in_rdata_i, + input logic in_valid_i, + output logic in_ready_o, // output port - output logic [63:0] out_addr_o, - output logic [31:0] out_rdata_o, - output logic out_valid_o, - input logic out_ready_i + output branchpredict_sbe [1:0] branch_predict_o, + output logic [63:0] out_addr_o, + output logic [31:0] out_rdata_o, + output logic out_valid_o, + input logic out_ready_i + ); localparam DEPTH = 4; // must be 3 or greater - /* verilator lint_off LITENDIAN */ - // index 0 is used for output - logic [0:DEPTH-1] [63:0] addr_n, addr_int, addr_Q; - logic [0:DEPTH-1] [31:0] rdata_n, rdata_int, rdata_Q; - logic [0:DEPTH-1] valid_n, valid_int, valid_Q; + typedef struct packed { + branchpredict_sbe branch_predict; + logic [63:0] address; + logic [31:0] instruction; + } fetch_entry; + // input registers - bounding the path from memory + branchpredict_sbe branch_predict_n, branch_predict_q; + logic [63:0] in_addr_n, in_addr_q; + logic [31:0] in_rdata_n, in_rdata_q; + logic in_valid_n, in_valid_q; - logic [63:0] addr_next; - logic [31:0] rdata, rdata_unaligned; - logic valid, valid_unaligned; + fetch_entry mem_n[DEPTH-1:0], mem_q[DEPTH-1:0]; + logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q; + logic [$clog2(DEPTH)-1:0] write_pointer_n, write_pointer_q; + int unsigned status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool - logic aligned_is_compressed, unaligned_is_compressed; - logic aligned_is_compressed_st, unaligned_is_compressed_st; - /* lint_on */ + // status signals + logic full, empty; + // the last instruction was unaligned + logic unaligned_n, unaligned_q; + // save the unaligned part of the instruction to this ff + logic [15:0] unaligned_instr_n, unaligned_instr_q; + // save the address of the unaligned instruction + logic [63:0] unaligned_address_n, unaligned_address_q; - //---------------------------------------------------------------------------- - // output port - //---------------------------------------------------------------------------- - assign rdata = (valid_Q[0]) ? rdata_Q[0] : in_rdata_i; - assign valid = valid_Q[0] || in_valid_i; + // we always need two empty places + // as it could happen that we get two compressed instructions/cycle + assign full = (status_cnt_q == DEPTH - 2); + assign empty = (status_cnt_q == 0); + assign out_valid_o = ~empty; + assign in_ready_o = ~full; - assign rdata_unaligned = (valid_Q[1]) ? {rdata_Q[1][15:0], rdata[31:16]} : {in_rdata_i[15:0], rdata[31:16]}; - // it is implied that rdata_valid_Q[0] is set - assign valid_unaligned = (valid_Q[1] || (valid_Q[0] && in_valid_i)); + // Output assignments + assign branch_predict_o = mem_q[read_pointer_q].branch_predict; + assign out_addr_o = mem_q[read_pointer_q].address; + assign out_rdata_o = mem_q[read_pointer_q].instruction; - assign unaligned_is_compressed = rdata[17:16] != 2'b11; - assign aligned_is_compressed = rdata[1:0] != 2'b11; - assign unaligned_is_compressed_st = rdata_Q[0][17:16] != 2'b11; - assign aligned_is_compressed_st = rdata_Q[0][1:0] != 2'b11; - - //---------------------------------------------------------------------------- - // instruction aligner (if unaligned) - //---------------------------------------------------------------------------- + // ---------------- + // Input Registers + // ---------------- always_comb begin - // serve the aligned case even though the output address is unaligned when - // the next instruction will be from a hardware loop target - // in this case the current instruction is already prealigned in element 0 - if (out_addr_o[1]) begin - // unaligned case - out_rdata_o = rdata_unaligned; - - if (unaligned_is_compressed) - out_valid_o = valid; - else - out_valid_o = valid_unaligned; - end else begin - // aligned case - out_rdata_o = rdata; - out_valid_o = valid; - end - end - - assign out_addr_o = (valid_Q[0]) ? addr_Q[0] : in_addr_i; - - //---------------------------------------------------------------------------- - // input port - //---------------------------------------------------------------------------- - // we accept data as long as our fifo is not full - // we don't care about clear here as the data will be received one cycle - // later anyway - assign in_ready_o = ~valid_Q[DEPTH-2]; - - //---------------------------------------------------------------------------- - // FIFO management - //---------------------------------------------------------------------------- - always_comb begin - addr_int = addr_Q; - rdata_int = rdata_Q; - valid_int = valid_Q; - - if (in_valid_i) begin - for (int j = 0; j < DEPTH; j++) begin - if (~valid_Q[j]) begin - addr_int[j] = in_addr_i; - rdata_int[j] = in_rdata_i; - valid_int[j] = 1'b1; - break; - end - end + // if we are not ready latch the values + in_addr_n = in_addr_q; + in_rdata_n = in_rdata_q; + in_valid_n = in_rdata_q; + branch_predict_n = branch_predict_q; + // if we are ready to accept new data - do so! + if (out_valid_o) begin + in_addr_n = in_addr_i; + in_rdata_n = in_rdata_i; + in_valid_n = in_valid_i; + branch_predict_n = branch_predict_i; + end + // flush the input registers + if (clear_i) begin + in_valid_n = 1'b0; end end - assign addr_next = {addr_int[0][63:2], 2'b00} + 64'h4; + // -------------- + // FIFO Management + // -------------- + always_comb begin : output_port + // counter + automatic int status_cnt = status_cnt_q; + automatic int write_pointer = write_pointer_q; - // move everything by one step - always_comb begin - addr_n = addr_int; - rdata_n = rdata_int; - valid_n = valid_int; + write_pointer_n = write_pointer_q; + read_pointer_n = read_pointer_q; + mem_n = mem_q; + unaligned_n = unaligned_q; + unaligned_instr_n = unaligned_instr_q; + unaligned_address_n = unaligned_address_q; + // --------------------------------- + // Input port & Instruction Aligner + // --------------------------------- + if (in_valid_i && !unaligned_q) begin + // we got a valid instruction so we can satisfy the unaligned instruction + unaligned_n = 1'b0; + // check if the instruction is compressed + if(in_rdata_i[1:0] != 2'b11) begin + // it is compressed + mem_n[write_pointer_q].branch_predict = branch_predict_q; + mem_n[write_pointer_q].address = in_addr_q; + mem_n[write_pointer_q].instruction = in_rdata_q[15:0]; - if (out_ready_i && out_valid_o) begin - if (addr_int[0][1]) begin - // unaligned case - if (unaligned_is_compressed) begin - addr_n[0] = {addr_next[63:2], 2'b00}; + status_cnt++; + write_pointer++; + // is the second instruction also compressed, like: + // _____________________________________________ + // | compressed 2 [31:16] | compressed 1[15:0] | + // |____________________________________________ + if (in_rdata_i[17:16] != 2'b11) begin + mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; + mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; + mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; + + status_cnt++; + write_pointer++; + // or is it an unaligned 32 bit instruction like + // ____________________________________________________ + // |instr [15:0] | instr [31:16] | compressed 1[15:0] | + // |____________________________________________________ end else begin - addr_n[0] = {addr_next[63:2], 2'b10}; + // we've got an unaligned 32 bit instruction + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + // this does not consume space in the FIFO end - - // shift everything on ene step - for (int i = 0; i < DEPTH - 1; i++) - rdata_n[i] = rdata_int[i + 1]; - - rdata_n[DEPTH - 1] = 32'b0; - - valid_n = {valid_int[1:DEPTH-1], 1'b0}; end else begin - if (aligned_is_compressed) begin - // just increase address, do not move to next entry in FIFO - addr_n[0] = {addr_int[0][63:2], 2'b10}; - end else begin - // move to next entry in FIFO - addr_n[0] = {addr_next[63:2], 2'b00}; - // shift entry - for (int i = 0; i < DEPTH - 1; i++) - rdata_n[i] = rdata_int[i + 1]; - - rdata_n[DEPTH - 1] = 32'b0; - valid_n = {valid_int[1:DEPTH-1], 1'b0}; - end + // this is a full 32 bit instruction like + // _______________________ + // | instruction [31:0] | + // |______________________ + mem_n[write_pointer_q].branch_predict = branch_predict_q; + mem_n[write_pointer_q].address = in_addr_q; + mem_n[write_pointer_q].instruction = in_rdata_q; + status_cnt++; + write_pointer++; end end - // on a clear signal from outside we invalidate the content of the FIFO - // completely and start from an empty state + // we have an outstanding unaligned instruction + if (in_valid_i && unaligned_q) begin + mem_n[write_pointer_q].branch_predict = branch_predict_q; + mem_n[write_pointer_q].address = unaligned_address_q; + mem_n[write_pointer_q].instruction = {in_rdata_q[15:0], unaligned_instr_q}; + status_cnt++; + write_pointer++; + // whats up with the other upper 16 bit of this instruction + // is the second instruction also compressed, like: + // _____________________________________________ + // | compressed 2 [31:16] | compressed 1[15:0] | + // |____________________________________________ + if (in_rdata_i[17:16] != 2'b11) begin + mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; + mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; + mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; + status_cnt++; + write_pointer++; + // unaligned access served + unaligned_n = 1'b0; + // or is it an unaligned 32 bit instruction like + // ____________________________________________________ + // |instr [15:0] | instr [31:16] | compressed 1[15:0] | + // |____________________________________________________ + end else begin + // we've got an unaligned 32 bit instruction + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + // this does not consume space in the FIFO + end + end + + // ------------- + // Output port + // ------------- + // we are ready to accept a new request if we still have two places in the queue + if (out_ready_i) begin + read_pointer_n = read_pointer_q + 1; + status_cnt--; + end + write_pointer_n = write_pointer; + status_cnt_n = status_cnt; + if (clear_i) - valid_n = '0; + status_cnt_n = '0; + end - //---------------------------------------------------------------------------- - // registers - //---------------------------------------------------------------------------- - - always_ff @(posedge clk, negedge rst_n) begin - if(rst_n == 1'b0) begin - addr_Q <= '{default: '0}; - rdata_Q <= '{default: '0}; - valid_Q <= '0; + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + status_cnt_q <= '{default: 0}; + mem_q <= '{default: 0}; + read_pointer_q <= '{default: 0}; + write_pointer_q <= '{default: 0}; + unaligned_q <= 1'b0; + unaligned_instr_q <= 16'b0; + unaligned_address_q <= 64'b0; + // input registers + in_addr_q <= 64'b0; + in_rdata_q <= 32'b0; + in_valid_q <= 1'b0; + branch_predict_q <= '{default: 0}; end else begin - - addr_Q <= addr_n; - rdata_Q <= rdata_n; - valid_Q <= valid_n; - end + status_cnt_q <= status_cnt_n; + mem_q <= mem_n; + read_pointer_q <= read_pointer_n; + write_pointer_q <= write_pointer_n; + unaligned_q <= unaligned_n; + unaligned_instr_q <= unaligned_instr_n; + unaligned_address_q <= unaligned_address_n; + // input registers + in_addr_q <= in_addr_n; + in_rdata_q <= in_rdata_n; + in_valid_q <= in_rdata_n; + branch_predict_q <= branch_predict_n; end end - - //---------------------------------------------------------------------------- - // Assertions - //---------------------------------------------------------------------------- - `ifndef SYNTHESIS - `ifndef VERILATOR - assert property ( - @(posedge clk) (in_valid_i) |-> ((valid_Q[DEPTH-1] == 1'b0) || (clear_i == 1'b1)) ); - `endif - `endif endmodule \ No newline at end of file diff --git a/src/fifo.sv b/src/fifo.sv index 7810465c4..2f70d0566 100644 --- a/src/fifo.sv +++ b/src/fifo.sv @@ -21,19 +21,19 @@ module fifo #( parameter type dtype = logic[63:0], parameter int unsigned DEPTH = 4 )( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic flush_i, // flush the queue + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue // status flags - output logic full_o, // queue is full - output logic empty_o, // queue is empty + output logic full_o, // queue is full + output logic empty_o, // queue is empty output logic single_element_o, // there is just a single element in the queue // as long as the queue is not full we can push new data - input dtype data_i, // data to push into the queue - input logic push_i, // data is valid and can be pushed to the queue + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue // as long as the queue is not empty we can pop new elements - output dtype data_o, // output data - input logic pop_i // pop head from queue + output dtype data_o, // output data + input logic pop_i // pop head from queue ); // pointer to the read and write section of the queue logic [$clog2(DEPTH) - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q; @@ -44,7 +44,7 @@ module fifo #( assign full_o = (status_cnt_q == DEPTH); assign empty_o = (status_cnt_q == 0); - assign single_element_o = (status_cnt_q == 1); + assign single_element_o = (status_cnt_q == 1); // read and write queue logic always_comb begin : read_write_comb // default assignment diff --git a/src/id_stage.sv b/src/id_stage.sv index a2c251a43..f7498ff97 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -34,7 +34,6 @@ module id_stage #( input logic [31:0] instruction_i, input logic instruction_valid_i, output logic decoded_instr_ack_o, - input logic is_compressed_i, input logic [63:0] pc_if_i, input exception ex_if_i, // we already got an exception in IF @@ -97,6 +96,11 @@ module id_stage #( logic issue_instr_valid_sb_iro; logic issue_ack_iro_sb; // --------------------------------------------------- + // Compressed Decoder <-> Decoder + // --------------------------------------------------- + logic [31:0] instruction_decompressed; + logic instructio_compressed; + // --------------------------------------------------- // Decoder (DC) <-> Scoreboard (SB) // --------------------------------------------------- scoreboard_entry decoded_instr_dc_sb; @@ -129,10 +133,22 @@ module id_stage #( // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i.valid == 1) assign ready_o = ~full && (~unresolved_branch_q || resolved_branch_i.valid); + // compressed instruction decoding, or more precisely compressed instruction + // expander + // + // since it does not matter where we decompress instructions, we do it here + // to ease timing closure + compressed_decoder compressed_decoder_i ( + .instr_i ( instruction_i ), + .instr_o ( instruction_decompressed ), + .is_compressed_o ( instr_compressed ), + .illegal_instr_o ( ) // TODO + ); + decoder decoder_i ( .pc_i ( pc_if_i ), - .is_compressed_i ( is_compressed_i ), - .instruction_i ( instruction_i ), + .is_compressed_i ( instr_compressed ), + .instruction_i ( instruction_decompressed ), .ex_i ( ex_if_i ), .instruction_o ( decoded_instr_dc_sb ), .is_control_flow_instr_o ( is_control_flow_instr ), diff --git a/src/if_stage.sv b/src/if_stage.sv index 2303db867..c66368970 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -32,13 +32,11 @@ module if_stage ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, - input logic req_i, // request new instructions output logic if_busy_o, // is the IF stage busy fetching instructions? input logic id_ready_i, - input logic halt_if_i, // pipeline stall // ctrl flow instruction in input logic [63:0] fetch_addr_i, - input logic set_pc_i, // set new PC + input logic pc_if_valid_i, input logic is_branch_i, // the new PC was a branch e.g.: branch or jump // branchpredict out output branchpredict_sbe branch_predict_o, @@ -52,60 +50,30 @@ module if_stage ( output logic instr_valid_id_o, // instruction in IF/ID pipeline is valid output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding input logic instr_ack_i, - output logic is_compressed_id_o, // compressed decoder thinks this is a compressed instruction - output logic illegal_c_insn_id_o, // compressed decoder thinks this is an invalid instruction - output logic [63:0] pc_if_o, output logic [63:0] pc_id_o, output exception ex_o ); - logic if_ready, if_valid; - logic branch_req; - logic valid; - logic prefetch_busy; - logic fetch_valid; - logic fetch_ready; - logic [31:0] fetch_rdata; - logic [63:0] fetch_addr; // branch predict registers logic branch_valid_n, branch_valid_q; logic [63:0] predict_address_n, predict_address_q; logic predict_taken_n, predict_taken_q; - // offset FSM - enum logic[1:0] {WAIT, IDLE, WAIT_BRANCHED} offset_fsm_cs, offset_fsm_ns; - logic [31:0] instr_decompressed; - logic illegal_c_insn; - logic instr_compressed_int; - - // compressed instruction decoding, or more precisely compressed instruction - // expander - // - // since it does not matter where we decompress instructions, we do it here - // to ease timing closure - compressed_decoder compressed_decoder_i ( - .instr_i ( fetch_rdata ), - .instr_o ( instr_decompressed ), - .is_compressed_o ( instr_compressed_int ), - .illegal_instr_o ( illegal_c_insn ) - ); - // Pre-fetch buffer, caches a fixed number of instructions prefetch_buffer prefetch_buffer_i ( .clk ( clk_i ), .rst_n ( rst_ni ), .flush_i ( flush_i ), - .req_i ( req_i ), - .branch_i ( branch_req ), // kill everything - .addr_i ( {fetch_addr_i[63:1], 1'b0} ), + .fetch_addr_i ( {fetch_addr_i[63:1], 1'b0} ), + .fetch_valid_i ( pc_if_valid_i ), - .ready_i ( fetch_ready ), + .ready_i ( instr_ack_i ), .valid_o ( fetch_valid ), - .rdata_o ( fetch_rdata ), - .addr_o ( fetch_addr ), + .rdata_o ( instr_rdata_id_o ), + .addr_o ( pc_id_o ), // goes to instruction memory / instruction cache .instr_req_o ( instr_req_o ), @@ -118,136 +86,36 @@ module if_stage ( .busy_o ( prefetch_busy ) ); - // offset FSM state transition logic + assign instr_valid_id_o = fetch_valid & id_ready_i; + assign if_busy_o = prefetch_busy; + always_comb begin - offset_fsm_ns = offset_fsm_cs; - fetch_ready = 1'b0; - branch_req = 1'b0; - valid = 1'b0; + // if (flush_i) begin - unique case (offset_fsm_cs) - // no valid instruction data for ID stage - // assume aligned - IDLE: begin - if (req_i) begin - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end - - // take care of control flow changes - if (set_pc_i) begin - valid = 1'b0; - // switch to new PC from ID stage - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end - end - - // serving aligned 32 bit or 16 bit instruction, we don't know yet - WAIT: begin - if (fetch_valid) begin - valid = 1'b1; // an instruction is ready for ID stage - - if (req_i && if_valid) begin - fetch_ready = 1'b1; - offset_fsm_ns = WAIT; - end - end - - end - - default: begin - offset_fsm_ns = IDLE; - end - endcase - - // take care of control flow changes - if (set_pc_i) begin - valid = 1'b0; - // switch to new PC from PCGEN stage - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end + // end end - - // ------------- - // Branch Logic - // ------------- - // We need to pass those registers on to ID in the case we've set - // a new branch target (or jump) and we got a valid instruction - always_comb begin - // this is the latch case we keep the values - predict_address_n = predict_address_q; - predict_taken_n = predict_taken_q; - branch_valid_n = branch_valid_q; - // a new branch target has been set by PCGEN - // save this in the register stage - if (set_pc_i && is_branch_i) begin - predict_address_n = fetch_addr_i; - // whether we took the branch or not can be seen from the set PC - // nevertheless we also need to keep branches not taken - predict_taken_n = set_pc_i; - branch_valid_n = is_branch_i; - end - - if (if_valid) begin - branch_valid_n = is_branch_i; - end - end - // -------------------------------------------------------------- // IF-ID pipeline registers, frozen when the ID stage is stalled // -------------------------------------------------------------- always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS if (~rst_ni) begin - // offset FSM state - offset_fsm_cs <= IDLE; - instr_valid_id_o <= 1'b0; - instr_rdata_id_o <= '0; - illegal_c_insn_id_o <= 1'b0; - is_compressed_id_o <= 1'b0; - pc_id_o <= '0; ex_o <= '{default: 0}; branch_valid_q <= 1'b0; predict_address_q <= 64'b0; predict_taken_q <= 1'b0; end - else - begin - offset_fsm_cs <= offset_fsm_ns; - predict_address_q <= predict_address_n; - predict_taken_q <= predict_taken_n; - branch_valid_q <= branch_valid_n; - - if (if_valid) begin - // in case of a flush simply say that the next instruction - // is not valid anymore - if (flush_i) begin - instr_valid_id_o <= 1'b0; - end else - instr_valid_id_o <= 1'b1; - instr_rdata_id_o <= instr_decompressed; - illegal_c_insn_id_o <= illegal_c_insn; - is_compressed_id_o <= instr_compressed_int; - pc_id_o <= pc_if_o; - ex_o.cause <= 64'b0; // TODO: Output exception - ex_o.tval <= 64'b0; // TODO: Output exception - ex_o.valid <= 1'b0; // TODO: Output exception - // id stage acknowledged - end else if (instr_ack_i) begin - instr_valid_id_o <= 1'b0; - end + else begin + predict_address_q <= predict_address_n; + predict_taken_q <= predict_taken_n; + branch_valid_q <= branch_valid_n; + ex_o.cause <= 64'b0; // TODO: Output exception + ex_o.tval <= 64'b0; // TODO: Output exception + ex_o.valid <= 1'b0; // TODO: Output exception end end - // Assignments - assign pc_if_o = fetch_addr; - - assign if_ready = valid & id_ready_i; - assign if_valid = (~halt_if_i) & if_ready; - assign if_busy_o = prefetch_busy; assign branch_predict_o = {predict_address_q, predict_taken_q, branch_valid_q}; //------------- // Assertions diff --git a/src/pcgen.sv b/src/pcgen.sv index ac7450ac7..0cb51ce70 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -23,13 +23,14 @@ module pcgen ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low + input logic fetch_enable_i, input logic flush_i, - input logic [63:0] pc_if_i, - input branchpredict resolved_branch_i, // from controller signaling a branchpredict -> update BTB + input logic if_ready_i, + input branchpredict resolved_branch_i, // from controller signaling a branchpredict -> update BTB // to IF output logic [63:0] pc_if_o, // new PC - output logic set_pc_o, // request the PC to be set to pc_if_o - output logic is_branch_o, // to check if we branchpredicted we need to save whether this was a branch or not <- LOL + output logic pc_if_valid_o, // the PC is valid + output logic is_branch_o, // global input input logic [63:0] boot_addr_i, // CSR input @@ -43,44 +44,25 @@ module pcgen ( logic [63:0] npc_n, npc_q; logic is_branch; logic is_branch_n, is_branch_q; - logic set_pc_n, set_pc_q; - // pc which is used to look up the prediction in the BTB - logic [63:0] predict_pc; + assign pc_if_o = npc_q; - assign set_pc_o = set_pc_q; assign is_branch_o = is_branch_q; - // Predict PC source select - // the PC which we use for lookup in the BTB can come from two sources: - // 1. PC from if stage plus + 4 - // 2. or PC which we just predicted + 4 - always_comb begin : pc_btb_lookup - // Ad 2: From PC of previous cycle (which is now in IF) - // predict_pc = npc_q; - // // Ad 1: - // // in the previous cycle we set the PC to npc_q - // // calculate the plus one version - // end else begin - predict_pc = {pc_if_i[62:2], 2'b0} + 64'h4; - // end - end - btb #( .NR_ENTRIES(64), .BITS_SATURATION_COUNTER(2) ) btb_i ( - .vpc_i ( predict_pc ), + // Use the PC from last cycle to perform branch lookup + .vpc_i ( npc_q ), .branchpredict_i ( resolved_branch_i ), .is_branch_o ( is_branch ), .predict_taken_o ( predict_taken ), .branch_target_address_o ( branch_target_address ), .* ); - - // TODO: on flush output exception or other things but do not take branch // ------------------- // Next PC // ------------------- @@ -92,23 +74,23 @@ module pcgen ( // 5. Boot address always_comb begin : npc_select // default assignment - npc_n = npc_q; - set_pc_n = 1'b0; - is_branch_n = is_branch; + // default is a consecutive PC + if (if_ready_i && fetch_enable_i) + npc_n = {npc_q[62:2], 2'b0} + 64'h4; + else // or keep the PC stable if IF is not ready + npc_n = npc_q; + + pc_if_valid_o = 1'b0; + is_branch_n = is_branch; - // we already set the PC a cycle earlier - if (set_pc_q) - is_branch_n = 1'b0; // 4. Predict taken - if (is_branch && predict_taken && ~set_pc_q) begin - set_pc_n = 1'b1; - npc_n = branch_target_address; + if (is_branch && predict_taken) begin + npc_n = branch_target_address; end // 1.Debug // 3. Control flow change request if (resolved_branch_i.is_mispredict) begin - set_pc_n = 1'b1; // we already got the correct target address npc_n = resolved_branch_i.target_address; end @@ -120,7 +102,10 @@ module pcgen ( // 3. Return from exception - + // fetch enable + if (fetch_enable_i) begin + pc_if_valid_o = 1'b1; + end end // ------------------- // Sequential Process @@ -128,12 +113,10 @@ module pcgen ( // PCGEN -> IF Register always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin - npc_q <= 64'b0; - set_pc_q <= 1'b0; + npc_q <= boot_addr_i; is_branch_q <= 1'b0; end else begin npc_q <= npc_n; - set_pc_q <= set_pc_n; is_branch_q <= is_branch_n; end end diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index 5dd07ead8..324770286 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -28,10 +28,8 @@ module prefetch_buffer input logic rst_n, input logic flush_i, - input logic req_i, - - input logic branch_i, - input logic [63:0] addr_i, + input logic [63:0] fetch_addr_i, + input logic fetch_valid_i, input logic ready_i, output logic valid_o, @@ -51,9 +49,8 @@ module prefetch_buffer enum logic [1:0] {IDLE, WAIT_GNT, WAIT_RVALID, WAIT_ABORTED } CS, NS; - logic [63:0] instr_addr_q, fetch_addr; logic addr_valid; - + logic [63:0] instr_addr_q; logic fifo_valid; logic fifo_ready; logic fifo_clear; @@ -61,64 +58,50 @@ module prefetch_buffer //--------------------------------- // Prefetch buffer status //--------------------------------- - - assign busy_o = (CS != IDLE) || instr_req_o; + // we are busy if we are either waiting for a grant + // or if the fifo is full + assign busy_o = (CS inside {WAIT_GNT, WAIT_ABORTED}) && fifo_ready; //--------------------------------- // Fetch FIFO // consumes addresses and rdata //--------------------------------- fetch_fifo fifo_i ( - .clk ( clk ), - .rst_n ( rst_n ), + .clk_i ( clk ), + .rst_ni ( rst_n ), - .clear_i ( fifo_clear ), + .clear_i ( flush_i ), .in_addr_i ( instr_addr_q ), .in_rdata_i ( instr_rdata_i ), .in_valid_i ( fifo_valid ), .in_ready_o ( fifo_ready ), - .out_valid_o ( valid_o ), .out_ready_i ( ready_i ), .out_rdata_o ( rdata_o ), .out_addr_o ( addr_o ) ); - - //--------------- - // Fetch address - //--------------- - - assign fetch_addr = {instr_addr_q[63:2], 2'b00} + 64'd4; - assign fifo_clear = branch_i || flush_i; - - - //------------------------- + //-------------------------------------------------- // Instruction fetch FSM // deals with instruction memory / instruction cache - //------------------------- + //-------------------------------------------------- always_comb begin instr_req_o = 1'b0; - instr_addr_o = fetch_addr; + instr_addr_o = fetch_addr_i; fifo_valid = 1'b0; - addr_valid = 1'b0; NS = CS; unique case(CS) // default state, not waiting for requested data - IDLE: - begin - instr_addr_o = fetch_addr; + IDLE: begin + instr_addr_o = fetch_addr_i; instr_req_o = 1'b0; - if (branch_i) - instr_addr_o = addr_i; - - if (req_i & (fifo_ready | branch_i )) begin + if (fifo_ready && fetch_valid_i) begin instr_req_o = 1'b1; addr_valid = 1'b1; @@ -132,16 +115,10 @@ module prefetch_buffer end // case: IDLE // we sent a request but did not yet get a grant - WAIT_GNT: - begin + WAIT_GNT: begin instr_addr_o = instr_addr_q; instr_req_o = 1'b1; - if (branch_i) begin - instr_addr_o = addr_i; - addr_valid = 1'b1; - end - if(instr_gnt_i) NS = WAIT_RVALID; else @@ -150,16 +127,12 @@ module prefetch_buffer // we wait for rvalid, after that we are ready to serve a new request WAIT_RVALID: begin - instr_addr_o = fetch_addr; + instr_addr_o = fetch_addr_i; - if (branch_i) - instr_addr_o = addr_i; - - - if (req_i & (fifo_ready | branch_i)) begin + if (fifo_ready) begin // prepare for next request - if (instr_rvalid_i) begin + if (fifo_ready && fetch_valid_i) begin instr_req_o = 1'b1; fifo_valid = 1'b1; addr_valid = 1'b1; @@ -173,14 +146,12 @@ module prefetch_buffer end else begin // we are requested to abort our current request // we didn't get an rvalid yet, so wait for it - if (branch_i) begin - addr_valid = 1'b1; - NS = WAIT_ABORTED; + if (flush_i) begin + NS = WAIT_ABORTED; end end end else begin // just wait for rvalid and go back to IDLE, no new request - if (instr_rvalid_i) begin fifo_valid = 1'b1; NS = IDLE; @@ -194,11 +165,6 @@ module prefetch_buffer WAIT_ABORTED: begin instr_addr_o = instr_addr_q; - if (branch_i) begin - instr_addr_o = addr_i; - addr_valid = 1'b1; - end - if (instr_rvalid_i) begin instr_req_o = 1'b1; // no need to send address, already done in WAIT_RVALID