diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 534cda5d0..745fbc021 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -105,8 +105,8 @@ pages: script: - mkdir public - mkdocs build -d public - - mv covhtmlreport/ public/ - - lftp -e "mirror -R public . ; quit;" -u $FTP_USER,$FTP_PASSWORD $FTP_HOST + # - mv covhtmlreport/ public/ + # - lftp -e "mirror -R public . ; quit;" -u $FTP_USER,$FTP_PASSWORD $FTP_HOST artifacts: paths: - site diff --git a/CHANGELOG b/CHANGELOG index eab8fed0c..74a7a42b1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,5 @@ +v 0.3.0 + - New fetch interface, smaller and ready for macro-op fusion and dual-issue v 0.2.1 - Add support for Torture test framework v 0.2.0 diff --git a/Makefile b/Makefile index 919aa64a1..df3a0eccc 100644 --- a/Makefile +++ b/Makefile @@ -43,14 +43,14 @@ riscv-tests = rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p- rv64mi-p-csr rv64mi-p-mcsr rv64mi-p-illegal \ rv64mi-p-ma_addr rv64mi-p-ma_fetch rv64mi-p-sbreak rv64mi-p-scall \ rv64si-p-csr rv64si-p-ma_fetch rv64si-p-scall rv64si-p-wfi rv64si-p-sbreak \ - rv64si-p-dirty rv64uc-p-rvc - # rv64ui-v-add rv64ui-v-addi rv64ui-p-slli rv64ui-v-addiw rv64ui-v-addw rv64ui-v-and rv64ui-v-auipc \ - # rv64ui-v-beq rv64ui-v-bge rv64ui-v-bgeu rv64ui-v-andi rv64ui-v-blt rv64ui-v-bltu rv64ui-v-bne \ - # rv64ui-v-simple rv64ui-v-jal rv64ui-v-jalr rv64ui-v-or rv64ui-v-ori rv64ui-v-sub rv64ui-v-subw \ - # rv64ui-v-xor rv64ui-v-xori rv64ui-v-slliw rv64ui-v-sll rv64ui-v-slli rv64ui-v-sllw \ - # rv64ui-v-slt rv64ui-v-slti rv64ui-v-sltiu rv64ui-v-sltu rv64ui-v-sra rv64ui-v-srai \ - # rv64ui-v-sraiw rv64ui-v-sraw rv64ui-v-srl rv64ui-v-srli rv64ui-v-srliw rv64ui-v-srlw \ - # rv64ui-v-lb rv64ui-v-lbu rv64ui-v-ld rv64ui-v-lh rv64ui-v-lhu rv64ui-v-lui + rv64si-p-dirty rv64uc-p-rvc \ + rv64ui-v-add rv64ui-v-addi rv64ui-p-slli rv64ui-v-addiw rv64ui-v-addw rv64ui-v-and rv64ui-v-auipc \ + rv64ui-v-beq rv64ui-v-bge rv64ui-v-bgeu rv64ui-v-andi rv64ui-v-blt rv64ui-v-bltu rv64ui-v-bne \ + rv64ui-v-simple rv64ui-v-jal rv64ui-v-jalr rv64ui-v-or rv64ui-v-ori rv64ui-v-sub rv64ui-v-subw \ + rv64ui-v-xor rv64ui-v-xori rv64ui-v-slliw rv64ui-v-sll rv64ui-v-slli rv64ui-v-sllw \ + rv64ui-v-slt rv64ui-v-slti rv64ui-v-sltiu rv64ui-v-sltu rv64ui-v-sra rv64ui-v-srai \ + rv64ui-v-sraiw rv64ui-v-sraw rv64ui-v-srl rv64ui-v-srli rv64ui-v-srliw rv64ui-v-srlw \ + rv64ui-v-lb rv64ui-v-lbu rv64ui-v-ld rv64ui-v-lh rv64ui-v-lhu rv64ui-v-lui riscv-test = rv64ui-p-add diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 27ca6f38e..2c953b388 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -110,12 +110,10 @@ package ariane_pkg; // --------------- // store the decompressed instruction typedef struct packed { - branchpredict_sbe branch_predict; // this field contains branch prediction information regarding the forward branch path - exception ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions logic [63:0] address; // the address of the instructions from below logic [31:0] instruction; // instruction word - logic is_compressed; // bit indicating whether this instruction was previously compressed (e.g.: 16 bit) - logic is_illegal; // bit indicating whether the instruction was an illegal compressed instructions + branchpredict_sbe branch_predict; // this field contains branch prediction information regarding the forward branch path + exception ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions } fetch_entry; // --------------- diff --git a/src/ariane.sv b/src/ariane.sv index 3d0af9ad1..8beda455a 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -248,7 +248,7 @@ module ariane .boot_addr_i ( boot_addr_i ), .pc_commit_i ( pc_commit ), .epc_i ( epc_commit_pcgen ), - .eret_i ( eret ), + .eret_i ( eret ), .trap_vector_base_i ( trap_vector_base_commit_pcgen ), .ex_i ( ex_commit ), .* @@ -269,9 +269,14 @@ module ariane .instr_rdata_i ( fetch_rdata_ex_if ), .instr_ex_i ( fetch_ex_ex_if ), // fetch exception - .fetch_entry_o ( fetch_entry_if_id ), - .fetch_entry_valid_i ( fetch_valid_if_id ), - .instr_ack_i ( decode_ack_id_if ), + .fetch_entry_0_o ( fetch_entry_if_id ), + .fetch_entry_valid_0_o ( fetch_valid_if_id ), + .fetch_ack_0_i ( decode_ack_id_if ), + + // Reserved for future use + .fetch_entry_1_o ( ), + .fetch_entry_valid_1_o ( ), + .fetch_ack_1_i ( ), .* ); @@ -280,6 +285,7 @@ module ariane // --------- id_stage id_stage_i ( .flush_i ( flush_ctrl_if ), + .fetch_entry_i ( fetch_entry_if_id ), .fetch_entry_valid_i ( fetch_valid_if_id ), .decoded_instr_ack_o ( decode_ack_id_if ), @@ -519,9 +525,9 @@ module ariane assign tracer_if.flush_unissued = flush_unissued_instr_ctrl_id; assign tracer_if.flush = flush_ctrl_ex; // fetch - assign tracer_if.fetch = fetch_entry_if_id; - assign tracer_if.fetch_valid = fetch_valid_if_id; - assign tracer_if.fetch_ack = decode_ack_id_if; + assign tracer_if.instruction = id_stage_i.compressed_decoder_i.instr_o; + assign tracer_if.fetch_valid = id_stage_i.instr_realigner_i.fetch_entry_valid_o; + assign tracer_if.fetch_ack = id_stage_i.instr_realigner_i.fetch_ack_i; // Issue assign tracer_if.issue_ack = issue_stage_i.scoreboard_i.issue_ack_i; assign tracer_if.issue_sbe = issue_stage_i.scoreboard_i.issue_instr_o; diff --git a/src/compressed_decoder.sv b/src/compressed_decoder.sv index ff459d182..25c12be4e 100644 --- a/src/compressed_decoder.sv +++ b/src/compressed_decoder.sv @@ -27,9 +27,10 @@ import ariane_pkg::*; module compressed_decoder ( - input logic [15:0] instr_i, + input logic [31:0] instr_i, output logic [31:0] instr_o, - output logic illegal_instr_o + output logic illegal_instr_o, + output logic is_compressed_o ); // ------------------- @@ -38,6 +39,8 @@ module compressed_decoder always_comb begin illegal_instr_o = 1'b0; instr_o = '0; + is_compressed_o = 1'b1; + instr_o = instr_i; unique case (instr_i[1:0]) // C0 @@ -247,7 +250,8 @@ module compressed_decoder endcase end - default: ; + // normal instruction + default: is_compressed_o = 1'b0; endcase end endmodule \ No newline at end of file diff --git a/src/dcache_arbiter.sv b/src/dcache_arbiter.sv index 7cf80b01b..5526b8fb6 100644 --- a/src/dcache_arbiter.sv +++ b/src/dcache_arbiter.sv @@ -177,7 +177,7 @@ module dcache_arbiter #( else begin $error("There was a grant without a request."); $stop(); end // assert that the address does not contain X when request is sent assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_index_o)) ) - else begin $info("address contains X when request is set"); end + else begin $error("address contains X when request is set"); $stop(); end // there should be no rvalid when we are in IDLE // assert property ( diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 582102aed..7095a4494 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -28,257 +28,87 @@ module fetch_fifo // branch prediction at in_addr_i address, as this is an address and not PC it can be the case // that we have two compressed instruction (or one compressed instruction and one unaligned instruction) so we // only predict on one entry and discard (or keep) the other depending on its position and prediction. + // input port input branchpredict_sbe branch_predict_i, input exception ex_i, // fetch exception in input logic [63:0] in_addr_i, input logic [31:0] in_rdata_i, input logic in_valid_i, output logic in_ready_o, - // output port - output fetch_entry fetch_entry_o, - output logic out_valid_o, - input logic out_ready_i + // Dual Port Fetch FIFO + // output port 0 + output fetch_entry fetch_entry_0_o, + output logic fetch_entry_valid_0_o, + input logic fetch_ack_0_i, + // output port 1 + output fetch_entry fetch_entry_1_o, + output logic fetch_entry_valid_1_o, + input logic fetch_ack_1_i ); - localparam int unsigned DEPTH = 8; // must be a power of two - - // input registers - bounding the path from memory - branchpredict_sbe branch_predict_n, branch_predict_q; - exception ex_n, ex_q; - logic [63:0] in_addr_n, in_addr_q; - logic [31:0] in_rdata_n, in_rdata_q; - logic in_valid_n, in_valid_q; - // compressed to decompressed instruction - logic [31:0] decompressed_instruction [2]; - logic is_illegal [2]; + localparam int unsigned DEPTH = 4; // must be a power of two + // status signals + logic full, empty; fetch_entry mem_n[DEPTH-1:0], mem_q[DEPTH-1:0]; logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q; logic [$clog2(DEPTH)-1:0] write_pointer_n, write_pointer_q; logic [$clog2(DEPTH)-1:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool - // status signals - logic full, empty; - // the last instruction was unaligned - logic unaligned_n, unaligned_q; - // save the unaligned part of the instruction to this ff - logic [15:0] unaligned_instr_n, unaligned_instr_q; - // save the address of the unaligned instruction - logic [63:0] unaligned_address_n, unaligned_address_q; - // we always need two empty places - // as it could happen that we get two compressed instructions/cycle - /* verilator lint_off WIDTH */ - assign full = (status_cnt_q == DEPTH - 1); - assign empty = (status_cnt_q == 0); - /* verilator lint_on WIDTH */ - // the output is valid if we are are not empty - assign out_valid_o = !empty; - // we need space for at least 4 instructions: (as two fetch requests can be in-flight) - assign in_ready_o = !(status_cnt_q >= DEPTH - 4); + assign in_ready_o = (status_cnt_q < DEPTH-2); + assign full = (status_cnt_q == DEPTH); + assign empty = (status_cnt_q == '0); - // ---------------- - // Input Registers - // ---------------- - always_comb begin - // if we are ready to accept new data - do so! - if (!full) begin - in_addr_n = in_addr_i; - in_rdata_n = in_rdata_i; - in_valid_n = in_valid_i; - branch_predict_n = branch_predict_i; - ex_n = ex_i; - // otherwise stall - end else begin - in_addr_n = in_addr_q; - in_rdata_n = in_rdata_q; - in_valid_n = in_valid_q; - branch_predict_n = branch_predict_q; - ex_n = ex_q; - end - // flush the input registers - if (flush_i) begin - in_valid_n = 1'b0; - end - end - - // -------------------- - // Compressed Decoders - // -------------------- - // compressed instruction decoding, or more precisely compressed instruction expander - // since it does not matter where we decompress instructions, we do it here to ease timing closure - genvar i; - generate - for (i = 0; i < 2; i++) begin - compressed_decoder compressed_decoder_i ( - .instr_i ( in_rdata_q[(16*(i+1)-1):(i*16)] ), - .instr_o ( decompressed_instruction[i] ), - .illegal_instr_o ( is_illegal[i] ) - ); - end - endgenerate - - // -------------------------------------------- - // FIFO Management + Instruction (re)-aligner - // -------------------------------------------- - always_comb begin : output_port + always_comb begin : fetch_fifo_logic // counter automatic logic [$clog2(DEPTH)-1:0] status_cnt = status_cnt_q; automatic logic [$clog2(DEPTH)-1:0] write_pointer = write_pointer_q; + automatic logic [$clog2(DEPTH)-1:0] read_pointer = read_pointer_q; - write_pointer_n = write_pointer_q; - read_pointer_n = read_pointer_q; - mem_n = mem_q; - unaligned_n = unaligned_q; - unaligned_instr_n = unaligned_instr_q; - unaligned_address_n = unaligned_address_q; - // --------------------------------- - // Input port & Instruction Aligner - // --------------------------------- - if (in_valid_q && !full) begin - if (in_addr_q[1] == 1'b0) begin - // do we actually want the first instruction or was the address a half word access? - if (!unaligned_q) begin - // we got a valid instruction so we can satisfy the unaligned instruction - unaligned_n = 1'b0; - // check if the instruction is compressed - if (in_rdata_q[1:0] != 2'b11) begin - // it is compressed - mem_n[write_pointer_q] = { - branch_predict_q, ex_q, in_addr_q, decompressed_instruction[0], 1'b1, is_illegal[0] - }; + mem_n = mem_q; - status_cnt++; - write_pointer++; - - // is the second instruction also compressed, like: - // _____________________________________________ - // | compressed 2 [31:16] | compressed 1[15:0] | - // |____________________________________________ - // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction - // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction - if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - - mem_n[write_pointer_q + 1'b1] = { - branch_predict_q, ex_q, {in_addr_q[63:2], 2'b10}, decompressed_instruction[1], 1'b1, is_illegal[1] - }; - - status_cnt++; - write_pointer++; - // or is it an unaligned 32 bit instruction like - // ____________________________________________________ - // |instr [15:0] | instr [31:16] | compressed 1[15:0] | - // |____________________________________________________ - end else if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - // save the lower 16 bit - unaligned_instr_n = in_rdata_q[31:16]; - // and that it was unaligned - unaligned_n = 1'b1; - // save the address as well - unaligned_address_n = {in_addr_q[63:2], 2'b10}; - // this does not consume space in the FIFO - end - end else begin - // this is a full 32 bit instruction like - // _______________________ - // | instruction [31:0] | - // |______________________ - mem_n[write_pointer_q] = { - branch_predict_q, ex_q, in_addr_q, in_rdata_q, 1'b0, 1'b0 - }; - status_cnt++; - write_pointer++; - end - end - // we have an outstanding unaligned instruction - if (in_valid_q && unaligned_q) begin - - mem_n[write_pointer_q] = { - branch_predict_q, ex_q, unaligned_address_q, {in_rdata_q[15:0], unaligned_instr_q}, 1'b0, 1'b0 - }; - - status_cnt++; - write_pointer++; - // whats up with the other upper 16 bit of this instruction - // is the second instruction also compressed, like: - // _____________________________________________ - // | compressed 2 [31:16] | unaligned[31:16] | - // |____________________________________________ - // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction - // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction - if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - mem_n[write_pointer_q + 1'b1] = { - branch_predict_q, ex_q, {in_addr_q[63:2], 2'b10}, decompressed_instruction[1], 1'b1, is_illegal[1] - }; - - status_cnt++; - write_pointer++; - // unaligned access served - unaligned_n = 1'b0; - // or is it an unaligned 32 bit instruction like - // ____________________________________________________ - // |instr [15:0] | instr [31:16] | compressed 1[15:0] | - // |____________________________________________________ - end else if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - // save the lower 16 bit - unaligned_instr_n = in_rdata_q[31:16]; - // and that it was unaligned - unaligned_n = 1'b1; - // save the address as well - unaligned_address_n = {in_addr_q[63:2], 2'b10}; - // this does not consume space in the FIFO - // we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction - end else if (branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16) begin - // the next fetch will start from a 4 byte boundary again - unaligned_n = 1'b0; - end - end - end else if (in_addr_q[1] == 1'b1) begin // address was a half word access - // reset the unaligned flag as this is a completely new fetch (because consecutive fetches only happen on a word basis) - unaligned_n = 1'b0; - // this is a compressed instruction - if (in_rdata_q[17:16] != 2'b11) begin - // it is compressed - mem_n[write_pointer_q] = { - branch_predict_q, ex_q, in_addr_q, decompressed_instruction[1], 1'b1, is_illegal[1] - }; - - status_cnt++; - write_pointer++; - end else begin // this is the first part of a 32 bit unaligned instruction - // save the lower 16 bit - unaligned_instr_n = in_rdata_q[31:16]; - // and that it was unaligned - unaligned_n = 1'b1; - // save the address as well - unaligned_address_n = {in_addr_q[63:2], 2'b10}; - // this does not consume space in the FIFO - end - // there can never be a whole 32 bit instruction on a half word access - end + // ------------- + // Input Port + // ------------- + if (in_valid_i) begin + status_cnt++; + // new input data + mem_n[write_pointer_q] = {in_addr_i, in_rdata_i, branch_predict_i, ex_i}; + write_pointer++; end - // ------------- - // Output port - // ------------- - // we are ready to accept a new request if we still have two places in the queue - // Output assignments - fetch_entry_o = mem_q[read_pointer_q]; + // ------------- + // Fetch Port 0 + // ------------- + fetch_entry_valid_0_o = (status_cnt_q >= 1); + fetch_entry_0_o = mem_q[read_pointer_q]; - if (out_ready_i) begin - read_pointer_n = read_pointer_q + 1; + if (fetch_ack_0_i) begin + read_pointer++; + status_cnt--; + end + + // ------------- + // Fetch Port 1 + // ------------- + fetch_entry_valid_1_o = (status_cnt_q >= 2); + fetch_entry_1_o = mem_q[read_pointer_q + 1'b1]; + + if (fetch_ack_1_i) begin + read_pointer++; status_cnt--; end write_pointer_n = write_pointer; status_cnt_n = status_cnt; + read_pointer_n = read_pointer; if (flush_i) begin status_cnt_n = '0; write_pointer_n = 'b0; read_pointer_n = 'b0; - // clear the unaligned instruction - unaligned_n = 1'b0; end + end always_ff @(posedge clk_i or negedge rst_ni) begin @@ -287,32 +117,13 @@ module fetch_fifo mem_q <= '{default: 0}; read_pointer_q <= '{default: 0}; write_pointer_q <= '{default: 0}; - unaligned_q <= 1'b0; - unaligned_instr_q <= 16'b0; - unaligned_address_q <= 64'b0; - // input registers - in_addr_q <= 64'b0; - in_rdata_q <= 32'b0; - in_valid_q <= 1'b0; - branch_predict_q <= '{default: 0}; - ex_q <= '{default: 0}; end else begin status_cnt_q <= status_cnt_n; mem_q <= mem_n; read_pointer_q <= read_pointer_n; write_pointer_q <= write_pointer_n; - unaligned_q <= unaligned_n; - unaligned_instr_q <= unaligned_instr_n; - unaligned_address_q <= unaligned_address_n; - // input registers - in_addr_q <= in_addr_n; - in_rdata_q <= in_rdata_n; - in_valid_q <= in_valid_n; - branch_predict_q <= branch_predict_n; - ex_q <= ex_n; end end - //------------- // Assertions //------------- diff --git a/src/id_stage.sv b/src/id_stage.sv index f815ed9a1..f0cda51a3 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -48,18 +48,44 @@ module id_stage ( } issue_n, issue_q; - logic is_control_flow_instr; + logic is_control_flow_instr; scoreboard_entry decoded_instruction; + fetch_entry fetch_entry; + logic is_illegal; + logic [31:0] instruction; + logic is_compressed; + logic fetch_ack_i; + logic fetch_entry_valid; + + instr_realigner instr_realigner_i ( + .fetch_entry_0_i ( fetch_entry_i ), + .fetch_entry_valid_0_i ( fetch_entry_valid_i ), + .fetch_ack_0_o ( decoded_instr_ack_o ), + + .fetch_entry_o ( fetch_entry ), + .fetch_entry_valid_o ( fetch_entry_valid ), + .fetch_ack_i ( fetch_ack_i ), + .* + ); + + compressed_decoder compressed_decoder_i ( + .instr_i ( fetch_entry.instruction ), + .instr_o ( instruction ), + .illegal_instr_o ( is_illegal ), + .is_compressed_o ( is_compressed ) + + ); + decoder decoder_i ( - .pc_i ( fetch_entry_i.address ), - .is_compressed_i ( fetch_entry_i.is_compressed ), - .instruction_i ( fetch_entry_i.instruction ), - .branch_predict_i ( fetch_entry_i.branch_predict ), - .is_illegal_i ( fetch_entry_i.is_illegal ), - .ex_i ( fetch_entry_i.ex ), - .instruction_o ( decoded_instruction ), - .is_control_flow_instr_o ( is_control_flow_instr ), + .pc_i ( fetch_entry.address ), + .is_compressed_i ( is_compressed ), + .instruction_i ( instruction ), + .branch_predict_i ( fetch_entry.branch_predict ), + .is_illegal_i ( is_illegal ), + .ex_i ( fetch_entry.ex ), + .instruction_o ( decoded_instruction ), + .is_control_flow_instr_o ( is_control_flow_instr ), .* ); @@ -72,7 +98,7 @@ module id_stage ( always_comb begin issue_n = issue_q; - decoded_instr_ack_o = 1'b0; + fetch_ack_i = 1'b0; if (issue_instr_ack_i) issue_n.valid = 1'b0; @@ -80,8 +106,8 @@ module id_stage ( // if we have a space in the register and the fetch is valid, go get it // or the issue stage is currently acknowledging an instruction, which means that we will have space // for a new instruction - if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin - decoded_instr_ack_o = 1'b1; + if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid) begin + fetch_ack_i = 1'b1; issue_n = { 1'b1, decoded_instruction, is_control_flow_instr}; end diff --git a/src/if_stage.sv b/src/if_stage.sv index ada31a255..cbc626d9b 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -29,17 +29,22 @@ module if_stage ( input logic fetch_valid_i, // the fetch address is valid input branchpredict_sbe branch_predict_i, // branch prediction structure we get from the PC Gen stage and we // we need to pass it on to all the further stages (until ex) - // instruction cache interface + // I$ Interface output logic instr_req_o, output logic [63:0] instr_addr_o, input logic instr_gnt_i, input logic instr_rvalid_i, input logic [31:0] instr_rdata_i, input exception instr_ex_i, // Instruction fetch exception, valid if rvalid is one - // Output of IF Pipeline stage - output fetch_entry fetch_entry_o, // fetch entry containing all relevant data for the ID stage - output logic fetch_entry_valid_i, // instruction in IF is valid - input logic instr_ack_i // ID acknowledged this instruction + // Output of IF Pipeline stage -> Dual Port Fetch FIFO + // output port 0 + output fetch_entry fetch_entry_0_o, // fetch entry containing all relevant data for the ID stage + output logic fetch_entry_valid_0_o, // instruction in IF is valid + input logic fetch_ack_0_i, // ID acknowledged this instruction + // output port 1 + output fetch_entry fetch_entry_1_o, // fetch entry containing all relevant data for the ID stage + output logic fetch_entry_valid_1_o, // instruction in IF is valid + input logic fetch_ack_1_i // ID acknowledged this instruction ); enum logic [1:0] {IDLE, WAIT_GNT, WAIT_RVALID, WAIT_ABORTED } CS, NS; @@ -70,9 +75,6 @@ module if_stage ( .in_rdata_i ( instr_rdata_i ), .in_valid_i ( fifo_valid ), .in_ready_o ( fifo_ready ), - - .out_valid_o ( fetch_entry_valid_i ), - .out_ready_i ( instr_ack_i ), .* ); diff --git a/src/instr_realigner.sv b/src/instr_realigner.sv new file mode 100755 index 000000000..417482f53 --- /dev/null +++ b/src/instr_realigner.sv @@ -0,0 +1,239 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 14.05.2017 +// Description: Re-aligns compressed instruction +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +import ariane_pkg::*; + +module instr_realigner ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // control signals + input logic flush_i, + + input fetch_entry fetch_entry_0_i, + input logic fetch_entry_valid_0_i, + output logic fetch_ack_0_o, + + output fetch_entry fetch_entry_o, + output logic fetch_entry_valid_o, + input logic fetch_ack_i +); + // ---------- + // Registers + // ---------- + // the last instruction was unaligned + logic unaligned_n, unaligned_q; + // save the unaligned part of the instruction to this ff + logic [15:0] unaligned_instr_n, unaligned_instr_q; + // the previous instruction was compressed + logic compressed_n, compressed_q; + // register to save the unaligned address + logic [63:0] unaligned_address_n, unaligned_address_q; + // get the next instruction, needed on a unaligned access + logic jump_unaligned_half_word; + + // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction + // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction + logic kill_upper_16_bit; + assign kill_upper_16_bit = fetch_entry_0_i.branch_predict.valid && + fetch_entry_0_i.branch_predict.predict_taken && + fetch_entry_0_i.branch_predict.is_lower_16; + // ---------- + // Registers + // ---------- + always_comb begin : realign_instr + + unaligned_n = unaligned_q; + unaligned_instr_n = unaligned_instr_q; + compressed_n = compressed_q; + unaligned_address_n = unaligned_address_q; + + // directly output this instruction. adoptions are made throughout the process + fetch_entry_o = fetch_entry_0_i; + fetch_entry_valid_o = fetch_entry_valid_0_i; + fetch_ack_0_o = fetch_ack_i; + // we just jumped to a half word and encountered an unaligned 32-bit instruction + jump_unaligned_half_word = 1'b0; + // --------------------------------- + // Input port & Instruction Aligner + // --------------------------------- + // check if the entry if the fetch FIFO is valid and if we are currently not serving the second part + // of a compressed instruction + if (fetch_entry_valid_0_i && !compressed_q) begin + // ------------------------ + // Access on Word Boundary + // ------------------------ + if (fetch_entry_0_i.address[1] == 1'b0) begin + // do we actually want the first instruction or was the address a half word access? + if (!unaligned_q) begin + // we got a valid instruction so we can satisfy the unaligned instruction + unaligned_n = 1'b0; + // check if the instruction is compressed + if (fetch_entry_0_i.instruction[1:0] != 2'b11) begin + // it is compressed + fetch_entry_o.instruction = {15'b0, fetch_entry_0_i.instruction[15:0]}; + + // should we even look at the upper instruction bits? + if (!kill_upper_16_bit) begin + // Yes, so... + // 1. Is the second instruction also compressed, like: + // _____________________________________________ + // | compressed 2 [31:16] | compressed 1[15:0] | + // |____________________________________________ + if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin + // yes, this was a compressed instruction + compressed_n = 1'b1; + // do not advance the queue pointer + fetch_ack_0_o = 1'b0; + // 2. or is it an unaligned 32 bit instruction like + // ____________________________________________________ + // |instr [15:0] | instr [31:16] | compressed 1[15:0] | + // |____________________________________________________ + end else begin + // save the lower 16 bit + unaligned_instr_n = fetch_entry_0_i.instruction[31:16]; + // save the address + unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10}; + // and that it was unaligned + unaligned_n = 1'b1; + // this does not consume space in the FIFO + end + end + end + end + // this is a full 32 bit instruction like + // _______________________ + // | instruction [31:0] | + // |______________________ + + // we have an outstanding unaligned instruction + else if (unaligned_q) begin + + + fetch_entry_o.address = unaligned_address_q; + fetch_entry_o.instruction = {fetch_entry_0_i.instruction[15:0], unaligned_instr_q}; + + // again should we look at the upper bits? + if (!kill_upper_16_bit) begin + // whats up with the other upper 16 bit of this instruction + // is the second instruction also compressed, like: + // _____________________________________________ + // | compressed 2 [31:16] | unaligned[31:16] | + // |____________________________________________ + // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction + // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction + if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin + // this was a compressed instruction + compressed_n = 1'b1; + // do not advance the queue pointer + fetch_ack_0_o = 1'b0; + // unaligned access served + unaligned_n = 1'b0; + // or is it an unaligned 32 bit instruction like + // ____________________________________________________ + // |instr [15:0] | instr [31:16] | compressed 1[15:0] | + // |____________________________________________________ + end else if (!kill_upper_16_bit) begin + // save the lower 16 bit + unaligned_instr_n = fetch_entry_0_i.instruction[31:16]; + // save the address + unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10}; + // and that it was unaligned + unaligned_n = 1'b1; + end + end + // we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction + else if (fetch_entry_0_i.branch_predict.valid) begin + // the next fetch will start from a 4 byte boundary again + unaligned_n = 1'b0; + end + end + end + // ---------------------------- + // Access on half-Word Boundary + // ---------------------------- + else if (fetch_entry_0_i.address[1] == 1'b1) begin // address was a half word access + // reset the unaligned flag as this is a completely new fetch (because consecutive fetches only happen on a word basis) + unaligned_n = 1'b0; + // this is a compressed instruction + if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin + // it is compressed + fetch_entry_o.instruction = {15'b0, fetch_entry_0_i.instruction[15:0]}; + + // this is the first part of a 32 bit unaligned instruction + end else begin + // save the lower 16 bit + unaligned_instr_n = fetch_entry_0_i.instruction[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address + unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10}; + // we need to wait for the second instruction + fetch_entry_valid_o = 1'b0; + // so get it by acknowledging this instruction + fetch_ack_0_o = 1'b1; + // we got to an unaligned instruction -> get the next entry to full-fill the need + jump_unaligned_half_word = 1'b1; + end + // there can never be a whole 32 bit instruction on a half word access + end + end + // ---------------------------- + // Next compressed instruction + // ---------------------------- + // we are serving the second part of an instruction which was also compressed + if (compressed_q) begin + fetch_ack_0_o = fetch_ack_i; + compressed_n = 1'b0; + fetch_entry_o.instruction = {16'b0, fetch_entry_0_i.instruction[31:16]}; + fetch_entry_o.address = {fetch_entry_0_i.address[63:2], 2'b10}; + fetch_entry_valid_o = 1'b1; + end + + // if we didn't get an acknowledge keep the registers stable + if (!fetch_ack_i && !jump_unaligned_half_word) begin + unaligned_n = unaligned_q; + unaligned_instr_n = unaligned_instr_q; + compressed_n = compressed_q; + unaligned_address_n = unaligned_address_q; + end + + if (flush_i) begin + // clear the unaligned and compressed instruction + unaligned_n = 1'b0; + compressed_n = 1'b0; + end + end + + // --------- + // Registers + // --------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + unaligned_q <= 1'b0; + unaligned_instr_q <= 16'b0; + unaligned_address_q <= 64'b0; + compressed_q <= 1'b0; + end else begin + unaligned_q <= unaligned_n; + unaligned_instr_q <= unaligned_instr_n; + unaligned_address_q <= unaligned_address_n; + compressed_q <= compressed_n; + end + end + +endmodule \ No newline at end of file diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh index 798665557..a42202cc8 100755 --- a/src/util/instruction_tracer.svh +++ b/src/util/instruction_tracer.svh @@ -22,9 +22,9 @@ class instruction_tracer; // interface to the core virtual instruction_tracer_if tracer_if; // keep the decoded instructions in a queue - fetch_entry decode_queue [$]; + logic [31:0] decode_queue [$]; // keep the issued instructions in a queue - fetch_entry issue_queue [$]; + logic [31:0] issue_queue [$]; // issue scoreboard entries scoreboard_entry issue_sbe_queue [$]; scoreboard_entry issue_sbe; @@ -52,7 +52,7 @@ class instruction_tracer; endfunction : create_file task trace(); - fetch_entry decode_instruction, issue_instruction, issue_commit_instruction; + logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction; scoreboard_entry commit_instruction; // initialize register 0 @@ -69,7 +69,7 @@ class instruction_tracer; // ------------------- // we are decoding an instruction if (tracer_if.pck.fetch_valid && tracer_if.pck.fetch_ack) begin - decode_instruction = fetch_entry'(tracer_if.pck.fetch); + decode_instruction = tracer_if.pck.instruction; decode_queue.push_back(decode_instruction); end // ------------------- @@ -111,9 +111,9 @@ class instruction_tracer; // check if the write back is valid, if not we need to source the result from the register file // as the most recent version of this register will be there. if (tracer_if.pck.we) begin - printInstr(issue_sbe, issue_commit_instruction.instruction, tracer_if.pck.wdata, address_mapping); + printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata, address_mapping); end else - printInstr(issue_sbe, issue_commit_instruction.instruction, reg_file[commit_instruction.rd], address_mapping); + printInstr(issue_sbe, issue_commit_instruction, reg_file[commit_instruction.rd], address_mapping); end // -------------- @@ -162,7 +162,7 @@ class instruction_tracer; load_mapping = {}; endfunction; - function void printInstr(scoreboard_entry sbe, logic [63:0] instr, logic [63:0] result, logic [63:0] paddr); + function void printInstr(scoreboard_entry sbe, logic [31:0] instr, logic [63:0] result, logic [63:0] paddr); instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr); // print instruction to console string print_instr = iti.printInstr(); diff --git a/src/util/instruction_tracer_if.sv b/src/util/instruction_tracer_if.sv index b42d8ac3a..3b0209301 100755 --- a/src/util/instruction_tracer_if.sv +++ b/src/util/instruction_tracer_if.sv @@ -26,7 +26,7 @@ interface instruction_tracer_if ( logic flush_unissued; logic flush; // Decode - fetch_entry fetch; + logic [31:0] instruction; logic fetch_valid; logic fetch_ack; // Issue stage @@ -53,7 +53,7 @@ interface instruction_tracer_if ( exception exception; // the tracer just has a passive interface we do not drive anything with it clocking pck @(posedge clk); - input rstn, flush_unissued, flush, fetch, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr, + input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr, st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, wdata, we, commit_instr, commit_ack, exception; endclocking diff --git a/tb/wave/wave_core.do b/tb/wave/wave_core.do index d46236495..bb34380c4 100644 --- a/tb/wave/wave_core.do +++ b/tb/wave/wave_core.do @@ -7,6 +7,8 @@ add wave -noupdate -group if_stage -group fetch_fifo /core_tb/dut/if_stage_i/fet add wave -noupdate -group if_stage /core_tb/dut/if_stage_i/* add wave -noupdate -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/* +add wave -noupdate -group id_stage -group compressed_decoder /core_tb/dut/id_stage_i/compressed_decoder_i/* +add wave -noupdate -group id_stage -group instr_realigner /core_tb/dut/id_stage_i/instr_realigner_i/* add wave -noupdate -group id_stage /core_tb/dut/id_stage_i/* add wave -noupdate -group issue_stage -group scoreboard /core_tb/dut/issue_stage_i/scoreboard_i/*