From 1fa1e8c109d477e7405edbfd99e2b483ee4c9963 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 9 May 2017 10:58:03 +0200 Subject: [PATCH 01/43] Add branch decode logic in decoder --- src/ariane.sv | 2 +- src/decoder.sv | 45 +++- src/ex_stage.sv | 8 + src/id_stage.sv | 15 +- tb/wave/wave_core.do | 486 ++----------------------------------------- 5 files changed, 70 insertions(+), 486 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index fd1ce73a9..c4f0fab93 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -210,7 +210,7 @@ module ariane .set_pc_o ( set_pc_pcgen_if ), .is_branch_o ( is_branch_o ), .boot_addr_i ( boot_addr_i ), - .epc_i ( epc_i ), + .epc_i ( epc_commit_pcgen ), .trap_vector_base_i ( trap_vector_base_commit_pcgen ), .ex_i ( ex_commit ), .* diff --git a/src/decoder.sv b/src/decoder.sv index f36736972..e56f4c9a1 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -11,13 +11,14 @@ import ariane_pkg::*; module decoder ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic [63:0] pc_i, // PC from IF - input logic is_compressed_i, // is a compressed instruction - input logic [31:0] instruction_i, // instruction from IF - input exception ex_i, // if an exception occured in if - output scoreboard_entry instruction_o // scoreboard entry to scoreboard + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic [63:0] pc_i, // PC from IF + input logic is_compressed_i, // is a compressed instruction + input logic [31:0] instruction_i, // instruction from IF + input exception ex_i, // if an exception occured in if + output scoreboard_entry instruction_o, // scoreboard entry to scoreboard + output logic is_control_flow_instr_o // this instruction will change the control flow ); logic illegal_instr; instruction instr; @@ -45,6 +46,7 @@ module decoder ( always_comb begin : decoder imm_select = NOIMM; + is_control_flow_instr_o = 1'b0; illegal_instr = 1'b0; instruction_o.pc = pc_i; instruction_o.fu = NONE; @@ -294,17 +296,40 @@ module decoder ( OPCODE_BRANCH: begin // TODO: Implement - imm_select = BIMM; + imm_select = BIMM; + instruction_o.fu = ALU; + is_control_flow_instr_o = 1'b1; + + case (instr.stype.funct3) + 3'b000: instruction_o.op = EQ; + 3'b001: instruction_o.op = NE; + 3'b100: instruction_o.op = LTS; + 3'b101: instruction_o.op = GES; + 3'b110: instruction_o.op = LTU; + 3'b111: instruction_o.op = GEU; + default: begin + is_control_flow_instr_o = 1'b0; + illegal_instr = 1'b1; + end + endcase end OPCODE_JALR: begin // TODO: Implement - imm_select = UIMM; + instruction_o.fu = ALU; + imm_select = UIMM; + instruction_o.use_pc = 1'b1; + instruction_o.rd = instr.itype.rd; + is_control_flow_instr_o = 1'b1; end OPCODE_JAL: begin // TODO: Implement - imm_select = JIMM; + instruction_o.fu = ALU; + imm_select = JIMM; + instruction_o.use_pc = 1'b1; + instruction_o.rd = instr.utype.rd; + is_control_flow_instr_o = 1'b1; end OPCODE_AUIPC: begin diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 0a40f0d4f..f70e9a180 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -107,10 +107,17 @@ module ex_stage #( .is_equal_result_o ( ), .* ); + // -------------------- + // Control Flow Change + // ------------------- + // use ALU exception signal for storing instruction fetch exceptions if + // the target address is not aligned to a 4 byte boundary + // ---------------- // Multiplication // ---------------- // TODO + // ---------------- // Load-Store Unit // ---------------- @@ -127,4 +134,5 @@ module ex_stage #( .* ); + endmodule \ No newline at end of file diff --git a/src/id_stage.sv b/src/id_stage.sv index 9fb3adaec..58408a52d 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -92,13 +92,14 @@ module id_stage #( assign ready_o = ~full; decoder decoder_i ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .pc_i ( pc_if_i ), - .is_compressed_i ( is_compressed_i ), - .instruction_i ( instruction_i ), - .ex_i ( ex_if_i ), - .instruction_o ( decoded_instr_dc_sb ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .pc_i ( pc_if_i ), + .is_compressed_i ( is_compressed_i ), + .instruction_i ( instruction_i ), + .ex_i ( ex_if_i ), + .instruction_o ( decoded_instr_dc_sb ), + .is_control_flow_instr_o ( ) ); scoreboard #( diff --git a/tb/wave/wave_core.do b/tb/wave/wave_core.do index 948cd5a08..f916e25f9 100644 --- a/tb/wave/wave_core.do +++ b/tb/wave/wave_core.do @@ -1,14 +1,5 @@ -onerror {resume} -quietly WaveActivateNextPane {} 0 -add wave -noupdate -group instr_if /core_tb/instr_if/clk -add wave -noupdate -group instr_if /core_tb/instr_if/address -add wave -noupdate -group instr_if /core_tb/instr_if/data_wdata -add wave -noupdate -group instr_if /core_tb/instr_if/data_req -add wave -noupdate -group instr_if /core_tb/instr_if/data_gnt -add wave -noupdate -group instr_if /core_tb/instr_if/data_rvalid -add wave -noupdate -group instr_if /core_tb/instr_if/data_rdata -add wave -noupdate -group instr_if /core_tb/instr_if/data_we -add wave -noupdate -group instr_if /core_tb/instr_if/data_be +add wave -noupdate -group instr_if /core_tb/instr_if/* + add wave -noupdate -group Core /core_tb/dut/clk_i add wave -noupdate -group Core /core_tb/dut/clock_en_i add wave -noupdate -group Core /core_tb/dut/test_en_i @@ -23,461 +14,20 @@ add wave -noupdate -group Core /core_tb/dut/irq_id_i add wave -noupdate -group Core /core_tb/dut/irq_ack_o add wave -noupdate -group Core /core_tb/dut/irq_sec_i add wave -noupdate -group Core /core_tb/dut/sec_lvl_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/clk -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/rst_n -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/clear_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/in_addr_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/in_rdata_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/in_valid_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/in_ready_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/out_addr_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/out_rdata_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/out_valid_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/out_ready_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/out_valid_stored_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/addr_n -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/addr_int -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/addr_Q -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/rdata_n -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/rdata_int -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/rdata_Q -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/valid_n -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/valid_int -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/valid_Q -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/addr_next -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/rdata -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/rdata_unaligned -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/valid -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/valid_unaligned -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/aligned_is_compressed -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/unaligned_is_compressed -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/aligned_is_compressed_st -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/unaligned_is_compressed_st -add wave -noupdate -expand -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/j -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/clk -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/rst_n -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/req_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/branch_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/addr_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/ready_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/valid_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/addr_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/rdata_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/instr_req_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/instr_gnt_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/instr_addr_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/instr_rdata_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/instr_rvalid_i -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/busy_o -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/CS -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/NS -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/instr_addr_q -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/fetch_addr -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/addr_valid -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_valid -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_ready -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_clear -add wave -noupdate -expand -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/valid_stored -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/clk_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/rst_ni -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/flush_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/req_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/if_busy_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/id_ready_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/halt_if_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_req_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_addr_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_gnt_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_rvalid_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_rdata_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_valid_id_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_rdata_id_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/is_compressed_id_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/illegal_c_insn_id_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/pc_if_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/pc_id_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/ex_o -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/boot_addr_i -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/if_ready -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/if_valid -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/branch_req -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/valid -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/prefetch_busy -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/fetch_addr_n -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/fetch_valid -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/fetch_ready -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/fetch_rdata -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/fetch_addr -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/offset_fsm_cs -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/offset_fsm_ns -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_decompressed -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/illegal_c_insn -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/instr_compressed_int -add wave -noupdate -expand -group if_stage /core_tb/dut/if_stage_i/clear_instr_valid_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/clk_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/rst_ni -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/test_en_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/flush_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/instruction_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/instruction_valid_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/pc_if_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/ready_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/operator_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/operand_a_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/operand_b_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/trans_id_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/alu_ready_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/alu_valid_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/lsu_ready_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/lsu_valid_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/mult_ready_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/mult_valid_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/trans_id_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/wdata_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/wb_valid_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/waddr_a_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/wdata_a_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/we_a_i -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/commit_instr_o -add wave -noupdate -expand -group id_stage /core_tb/dut/id_stage_i/commit_ack_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/clk_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/rst_ni -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/full_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/flush_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/rd_clobber_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/rs1_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/rs1_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/rs1_valid_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/rs2_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/rs2_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/rs2_valid_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/commit_instr_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/commit_ack_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/decoded_instr_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/decoded_instr_valid_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/issue_instr_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/issue_instr_valid_o -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/issue_ack_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/trans_id_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/wdata_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/ex_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/wb_valid_i -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/mem_q -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/mem_n -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/issue_pointer_n -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/issue_pointer_q -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/commit_pointer_n -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/commit_pointer_q -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/top_pointer_n -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/top_pointer_q -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/top_pointer_qq -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/pointer_overflow -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/empty -add wave -noupdate -expand -group id_stage -expand -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/reset_condition -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/clk_i -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/rst_ni -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/pc_i -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/instruction_i -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/ex_i -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/instruction_o -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/instr -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_select -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_i_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_iz_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_s_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_sb_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_u_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_uj_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_z_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_s2_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_bi_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_s3_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_vs_type -add wave -noupdate -expand -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/imm_vu_type -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/clk_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/rst_ni -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/test_en_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/flush_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/issue_instr_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/issue_instr_valid_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/issue_ack_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/rs1_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/rs1_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/rs1_valid_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/rs2_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/rs2_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/rs2_valid_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/rd_clobber_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operator_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operand_a_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operand_b_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/trans_id_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/alu_ready_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/alu_valid_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/lsu_ready_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/lsu_valid_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/mult_ready_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/mult_valid_o -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/waddr_a_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/wdata_a_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/we_a_i -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/stall -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/fu_busy -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operand_a_regfile -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operand_b_regfile -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operand_a_n -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operand_a_q -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operand_b_n -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operand_b_q -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/alu_valid_n -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/alu_valid_q -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/trans_id_n -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/trans_id_q -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operator_n -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/operator_q -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/forward_rs1 -add wave -noupdate -expand -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/forward_rs2 -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/operator_i -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/operand_a_i -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/operand_b_i -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/adder_result_o -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/adder_result_ext_o -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/result_o -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/comparison_result_o -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/is_equal_result_o -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/operand_a_rev -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/operand_a_rev32 -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/operand_b_neg -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/adder_op_b_negate -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/adder_in_a -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/adder_in_b -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/adder_result -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_left -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_arithmetic -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_amt -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_op_a -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_op_a32 -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_result -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_result32 -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_right_result -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_right_result32 -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_left_result -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_left_result32 -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_op_a_64 -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/shift_op_a_32 -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/is_equal -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/is_greater_equal -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/cmp_signed -add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/cmp_result -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/clk_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/rst_ni -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/flush_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/operator_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/operand_a_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/operand_b_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/imm_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/lsu_ready_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/lsu_valid_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/trans_id_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/lsu_trans_id_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/lsu_result_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/lsu_valid_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/commit_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/enable_translation_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/fetch_req_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/fetch_gnt_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/fetch_valid_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/fetch_err_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/fetch_vaddr_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/fetch_rdata_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/priv_lvl_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/flag_pum_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/flag_mxr_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/pd_ppn_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/asid_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/flush_tlb_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/instr_if_address_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/instr_if_data_req_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/instr_if_data_be_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/instr_if_data_gnt_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/instr_if_data_rvalid_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/instr_if_data_rdata_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_if_address_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_if_data_wdata_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_if_data_req_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_if_data_we_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_if_data_be_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_if_data_gnt_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_if_data_rvalid_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_if_data_rdata_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/lsu_exception_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_misaligned -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/CS -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/NS -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/vaddr_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/stall -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/get_from_register -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/vaddr -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/be -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/operator -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/trans_id -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/vaddr_q -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_q -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/operator_q -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/trans_id_q -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/st_buffer_paddr -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/st_buffer_data -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/st_buffer_be -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/st_buffer_valid -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/st_ready -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/st_valid -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/translation_req -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/translation_valid -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/paddr_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/address_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_wdata_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_req_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_we_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_be_i -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_gnt_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_rvalid_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/data_rdata_o -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/rdata -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/address_match -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/op -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/rdata_d_ext -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/rdata_w_ext -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/rdata_h_ext -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/rdata_b_ext -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/clk_i -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/rst_ni -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/flush_i -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/operator_i -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/operand_a_i -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/operand_b_i -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/trans_id_i -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/csr_ready_o -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/csr_valid_i -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/csr_trans_id_o -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/csr_result_o -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/csr_valid_o -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/commit_i -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/csr_addr_o -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/csr_reg_n -add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/csr_reg_q -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/clk_i -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/rst_ni -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/operator_i -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/operand_a_i -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/operand_b_i -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/trans_id_i -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/alu_ready_o -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/alu_valid_i -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/alu_valid_o -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/alu_result_o -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/alu_trans_id_o -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/comparison_result_o -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/lsu_ready_o -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/lsu_valid_i -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/mult_ready_o -add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/mult_valid_i -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/clk_i -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/rst_ni -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/exception_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/commit_instr_i -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/commit_ack_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/waddr_a_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/wdata_a_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/we_a_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/pc_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/csr_op_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/csr_wdata_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/csr_rdata_i -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/csr_exception_i -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/commit_lsu_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/commit_csr_o -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/irq_enable_i -add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/exception -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/clk_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/rst_ni -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/flush_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/core_id_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/cluster_id_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/boot_addr_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/ex_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_op_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_addr_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_wdata_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_rdata_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/pc_i -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_exception_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/irq_enable_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/epc_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/trap_vector_base_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/priv_lvl_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/enable_translation_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/flag_pum_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/flag_mxr_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/pd_ppn_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/asid_o -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_addr -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/read_access_exception -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/update_access_exception -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_we -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_wdata -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/csr_rdata -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/priv_lvl_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/priv_lvl_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/prev_priv_lvl_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/prev_priv_lvl_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mstatus_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mstatus_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mtvec_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mtvec_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/medeleg_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/medeleg_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mideleg_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mideleg_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mip_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mip_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mie_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mie_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mscratch_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mscratch_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mepc_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mepc_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mcause_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mcause_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mtval_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/mtval_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/stvec_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/stvec_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/sscratch_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/sscratch_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/sepc_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/sepc_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/scause_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/scause_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/stval_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/stval_n -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/satp_q -add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/satp_n + +add wave -noupdate -group pcgen_stage -group btb /core_tb/dut/pcgen_i/btb_i/* +add wave -noupdate -group pcgen_stage /core_tb/dut/pcgen_i/* +add wave -noupdate -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/* +add wave -noupdate -group if_stage -group prefetch_buffer /core_tb/dut/if_stage_i/prefetch_buffer_i/* +add wave -noupdate -group if_stage /core_tb/dut/if_stage_i/* +add wave -noupdate -group id_stage -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/* +add wave -noupdate -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/* +add wave -noupdate -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/* +add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/* +add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/* +add wave -noupdate -group ex_stage -expand -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/* +add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/* +add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/* +add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/* + TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 1} {207 ns} 0} {{Cursor 2} {278 ns} 1} -quietly wave cursor active 1 -configure wave -namecolwidth 241 -configure wave -valuecolwidth 258 -configure wave -justifyvalue left -configure wave -signalnamewidth 1 -configure wave -snapdistance 10 -configure wave -datasetprefix 0 -configure wave -rowmargin 4 -configure wave -childrowmargin 2 -configure wave -gridoffset 0 -configure wave -gridperiod 1 -configure wave -griddelta 40 -configure wave -timeline 0 -configure wave -timelineunits ns -update -WaveRestoreZoom {0 ns} {1580 ns} From 5a3c0e3fe2d113abe939acd4e64c10afbf8cd16a Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 9 May 2017 16:09:05 +0200 Subject: [PATCH 02/43] Implement branching logic and address calc --- src/alu.sv | 5 +++- src/branch_engine.sv | 66 ++++++++++++++++++++++++++++++++++++++++++++ src/ex_stage.sv | 3 +- 3 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 src/branch_engine.sv diff --git a/src/alu.sv b/src/alu.sv index af23f5fb9..c22f048e1 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -181,7 +181,10 @@ module alu always_comb begin - cmp_result = is_equal; + // this is used only for branches + // as we are also using it for unconditional branches + // set it to 1 as a default + cmp_result = 1'b1; unique case (operator_i) EQ: cmp_result = is_equal; diff --git a/src/branch_engine.sv b/src/branch_engine.sv new file mode 100644 index 000000000..b7847c5f6 --- /dev/null +++ b/src/branch_engine.sv @@ -0,0 +1,66 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 09.05.2017 +// Description: Branch target calculation and comparison +// +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +import ariane_pkg::*; + +module branch_engine ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input logic [63:0] operand_a, + input logic [63:0] operand_b, + input logic valid_i, + + input logic comparison_result_i, // result of comparison + input logic [63:0] predict_address_i, // this is the address we predicted + output mispredict mispredict_o, // this is the actual address we are targeting + output exception branch_ex_o // branch exception out +); + logic [63:0] target_address; + + always_comb begin : target_address_calc + target_address = 64'b0; + mispredict_o.pc = 64'b0; + mispredict_o.target_address = 64'b0; + mispredict_o.is_taken = 1'b0; + mispredict_o.valid = 1'b0; + + if (valid_i) begin + // calculate target address simple 64 bit addition + target_address = $signed(operand_a) + $signed(operand_b); + // we mis-predicted e.g.: the predicted address is unequal to the actual address + if (target_address != predict_address_i && target_address[1:0] == 2'b0) begin + // write target address + mispredict_o.target_address = target_address; + mispredict_o.is_taken = comparison_result_i; + mispredict_o.valid = 1'b1; + end + end + end + // use ALU exception signal for storing instruction fetch exceptions if + // the target address is not aligned to a 4 byte boundary + always_comb begin : exception_handling + branch_ex_o.cause = INSTR_ADDR_MISALIGNED; + branch_ex_o.tval = 64'b0; // TODO + branch_ex_o.valid = 1'b0; + + if (target_address[1:0] != 2'b0) + branch_ex_o.valid = 1'b1; + end +endmodule \ No newline at end of file diff --git a/src/ex_stage.sv b/src/ex_stage.sv index f70e9a180..b69707fb3 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -110,8 +110,7 @@ module ex_stage #( // -------------------- // Control Flow Change // ------------------- - // use ALU exception signal for storing instruction fetch exceptions if - // the target address is not aligned to a 4 byte boundary + // ---------------- // Multiplication From a07a8f0a0057ac9560032947aa2a4188dd43cb2d Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 9 May 2017 17:16:11 +0200 Subject: [PATCH 03/43] Integrate branch prediction, fix #2 --- include/ariane_pkg.svh | 11 +++++---- src/ariane.sv | 52 +++++++++++++++++++++++++++--------------- src/branch_engine.sv | 39 ++++++++++++++++--------------- src/btb.sv | 16 ++++++------- src/controller.sv | 10 ++++---- src/ex_stage.sv | 39 ++++++++++++++++++++++--------- src/id_stage.sv | 5 ++++ src/pcgen.sv | 26 ++++++++++----------- 8 files changed, 118 insertions(+), 80 deletions(-) diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 44ef6442b..5df63e111 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -40,11 +40,12 @@ package ariane_pkg; // miss-predict typedef struct packed { - logic [63:0] pc; - logic [63:0] target_address; - logic is_taken; - logic valid; // is miss-predict - } mispredict; + logic [63:0] pc; // pc of predict or mis-predict + logic [63:0] target_address; // target address at which to jump, or not + logic is_mispredict; // set if this was a mis-predict + logic is_taken; // branch is taken + logic valid; // prediction with all its values is valid + } branchpredict; typedef enum logic[3:0] { NONE, LSU, ALU, MULT, CSR diff --git a/src/ariane.sv b/src/ariane.sv index c4f0fab93..afd763f20 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -93,7 +93,7 @@ module ariane // -------------- // PCGEN <-> EX // -------------- - mispredict mispredict_ex_pcgen; + branchpredict branchpredict_ex_pcgen; // -------------- // PCGEN <-> CSR // -------------- @@ -121,12 +121,18 @@ module ariane fu_op operator_id_ex; logic [63:0] operand_a_id_ex; logic [63:0] operand_b_id_ex; + logic [63:0] operand_c_id_ex; // ALU logic alu_ready_ex_id; logic alu_valid_id_ex; logic [TRANS_ID_BITS-1:0] alu_trans_id_ex_id; logic alu_valid_ex_id; logic [63:0] alu_result_ex_id; + exception alu_exception_ex_id; + // Branches and Jumps + logic branch_valid_id_ex; + logic [63:0] predict_address_id_ex; + logic predict_taken_id_ex; // LSU logic [TRANS_ID_BITS-1:0] lsu_trans_id_ex_id; logic lsu_valid_id_ex; @@ -205,7 +211,7 @@ module ariane pcgen pcgen_i ( .flush_i ( flush ), .pc_if_i ( pc_if ), - .mispredict_i ( mispredict_ex_pcgen ), + .branchpredict_i ( branchpredict_ex_pcgen ), .pc_if_o ( pc_pcgen_if ), .set_pc_o ( set_pc_pcgen_if ), .is_branch_o ( is_branch_o ), @@ -262,25 +268,30 @@ module ariane .operator_o ( operator_id_ex ), .operand_a_o ( operand_a_id_ex ), .operand_b_o ( operand_b_id_ex ), + .operand_c_o ( operand_c_id_ex ), .imm_o ( imm_id_ex ), .trans_id_o ( trans_id_id_ex ), - + // ALU .alu_ready_i ( alu_ready_ex_id ), .alu_valid_o ( alu_valid_id_ex ), - + // Branches and Jumps + .branch_valid_o ( branch_valid_id_ex ), + .predict_address_o ( predict_address_id_ex ), + .predict_taken_o ( predict_taken_id_ex ), + // LSU .lsu_ready_i ( lsu_ready_ex_id ), .lsu_valid_o ( lsu_valid_id_ex ), - + // Multiplier .mult_ready_i ( mult_ready_ex_id ), .mult_valid_o ( mult_valid_id_ex ), - + // CSR .csr_ready_i ( csr_ready_ex_id ), .csr_valid_o ( csr_valid_id_ex ), - .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id , csr_trans_id_ex_id} ), - .wdata_i ( {alu_result_ex_id, lsu_result_ex_id, csr_result_ex_id} ), - .ex_ex_i ( {{$bits(exception){1'b0}}, lsu_exception_ex_id, {$bits(exception){1'b0}} } ), - .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, csr_valid_ex_id} ), + .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, csr_trans_id_ex_id }), + .wdata_i ( {alu_result_ex_id, lsu_result_ex_id, csr_result_ex_id }), + .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception){1'b0}} }), + .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, csr_valid_ex_id }), .waddr_a_i ( waddr_a_commit_id ), .wdata_a_i ( wdata_a_commit_id ), @@ -298,15 +309,21 @@ module ariane .operator_i ( operator_id_ex ), .operand_a_i ( operand_a_id_ex ), .operand_b_i ( operand_b_id_ex ), + .operand_c_i ( operand_c_id_ex ), .imm_i ( imm_id_ex ), .trans_id_i ( trans_id_id_ex ), - .comparison_result_o ( ), // ALU .alu_ready_o ( alu_ready_ex_id ), .alu_valid_i ( alu_valid_id_ex ), .alu_result_o ( alu_result_ex_id ), .alu_trans_id_o ( alu_trans_id_ex_id ), .alu_valid_o ( alu_valid_ex_id ), + .alu_exception_o ( alu_exception_ex_id ), + // Branches and Jumps + .branch_valid_i ( branch_valid_id_ex ), + .predict_address_i ( predict_address_id_ex ), + .predict_taken_i ( predict_taken_id_ex ), + .branchpredict_o ( branchpredict_ex_pcgen ), // LSU .lsu_ready_o ( lsu_ready_ex_id ), .lsu_valid_i ( lsu_valid_id_ex ), @@ -393,14 +410,13 @@ module ariane // Controller // ------------ logic flush_commit_i; - logic mispredict_i; - mispredict mispredict_o; + logic branchpredict_i; + controller i_controller ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .flush_commit_i(flush_commit_i), - .mispredict_i (mispredict_i ), - .mispredict_o (mispredict_o ) + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .flush_commit_i (flush_commit_i), + .branchpredict_i (branchpredict_i ) ); diff --git a/src/branch_engine.sv b/src/branch_engine.sv index b7847c5f6..b9dbe81f9 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -20,36 +20,37 @@ import ariane_pkg::*; module branch_engine ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low - input logic [63:0] operand_a, - input logic [63:0] operand_b, - input logic valid_i, + input logic [63:0] operand_a_i, + input logic [63:0] operand_b_i, + input logic valid_i, - input logic comparison_result_i, // result of comparison - input logic [63:0] predict_address_i, // this is the address we predicted - output mispredict mispredict_o, // this is the actual address we are targeting - output exception branch_ex_o // branch exception out + input logic comparison_result_i, // result of comparison + input logic [63:0] predict_address_i, // this is the address we predicted + output branchpredict branchpredict_o, // this is the actual address we are targeting + output exception branch_ex_o // branch exception out ); logic [63:0] target_address; always_comb begin : target_address_calc - target_address = 64'b0; - mispredict_o.pc = 64'b0; - mispredict_o.target_address = 64'b0; - mispredict_o.is_taken = 1'b0; - mispredict_o.valid = 1'b0; + target_address = 64'b0; + branchpredict_o.pc = 64'b0; + branchpredict_o.target_address = 64'b0; + branchpredict_o.is_taken = 1'b0; + branchpredict_o.valid = valid_i; + branchpredict_o.is_mispredict = 1'b0; if (valid_i) begin // calculate target address simple 64 bit addition - target_address = $signed(operand_a) + $signed(operand_b); + target_address = $signed(operand_a_i) + $signed(operand_b_i); + // write target address + branchpredict_o.target_address = target_address; + branchpredict_o.is_taken = comparison_result_i; // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address != predict_address_i && target_address[1:0] == 2'b0) begin - // write target address - mispredict_o.target_address = target_address; - mispredict_o.is_taken = comparison_result_i; - mispredict_o.valid = 1'b1; + branchpredict_o.is_mispredict = 1'b0; end end end diff --git a/src/btb.sv b/src/btb.sv index 58b71b9cb..8df46cb06 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -28,7 +28,7 @@ module btb #( input logic flush_i, // flush the btb input logic [63:0] vpc_i, // virtual PC from IF stage - input mispredict mispredict_i, // a miss-predict happened -> update data structure + input branchpredict branchpredict_i, // a miss-predict happened -> update data structure output logic is_branch_o, // instruction at vpc_i is a branch output logic predict_taken_o, // the branch is taken @@ -51,7 +51,7 @@ module btb #( // get actual index positions // we ignore the 0th bit since all instructions are aligned on // a half word boundary - assign update_pc = mispredict_i.pc[$clog2(NR_ENTRIES) + OFFSET - 1:OFFSET]; + assign update_pc = branchpredict_i.pc[$clog2(NR_ENTRIES) + OFFSET - 1:OFFSET]; assign index = vpc_i[$clog2(NR_ENTRIES) + OFFSET - 1:OFFSET]; // we combinatorially predict the branch and the target address @@ -60,29 +60,29 @@ module btb #( assign branch_target_address_o = btb_q[$unsigned(index)].target_address; // update on a miss-predict - always_comb begin : update_mispredict + always_comb begin : update_branchpredict btb_n = btb_q; saturation_counter = btb_q[$unsigned(update_pc)].saturation_counter; - if (mispredict_i.valid) begin + if (branchpredict_i.valid) begin btb_n[$unsigned(update_pc)].valid = 1'b1; // update saturation counter // first check if counter is already saturated in the positive regime e.g.: branch taken - if (saturation_counter == {BITS_SATURATION_COUNTER{1'b1}} && ~mispredict_i.is_taken) begin + if (saturation_counter == {BITS_SATURATION_COUNTER{1'b1}} && ~branchpredict_i.is_taken) begin // we can safely decrease it btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter - 1; // then check if it saturated in the negative regime e.g.: branch not taken - end else if (saturation_counter == {BITS_SATURATION_COUNTER{1'b0}} && mispredict_i.is_taken) begin + end else if (saturation_counter == {BITS_SATURATION_COUNTER{1'b0}} && branchpredict_i.is_taken) begin // we can safely increase it btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; end else begin // otherwise we are not in any boundaries and can decrease or increase it - if (mispredict_i.is_taken) + if (branchpredict_i.is_taken) btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; else btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter - 1; end // the target address is simply updated - btb_n[$unsigned(update_pc)].target_address = mispredict_i.target_address; + btb_n[$unsigned(update_pc)].target_address = branchpredict_i.target_address; end end diff --git a/src/controller.sv b/src/controller.sv index 3f489f4f2..3db649da1 100644 --- a/src/controller.sv +++ b/src/controller.sv @@ -20,13 +20,11 @@ import ariane_pkg::*; module controller ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low - input logic flush_commit_i, // flush request from commit stage in - input logic mispredict_i, - - output mispredict mispredict_o // to pcgen update branch history table + input logic flush_commit_i, // flush request from commit stage in + input logic branchpredict_i ); // flush on mispredict diff --git a/src/ex_stage.sv b/src/ex_stage.sv index b69707fb3..5650b662d 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -29,6 +29,7 @@ module ex_stage #( input fu_op operator_i, input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, + input logic [63:0] operand_c_i, input logic [63:0] imm_i, input logic [TRANS_ID_BITS-1:0] trans_id_i, @@ -37,8 +38,13 @@ module ex_stage #( input logic alu_valid_i, // Output is valid output logic alu_valid_o, // ALU result is valid output logic [63:0] alu_result_o, - output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back - output logic comparison_result_o, + output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back + output exception alu_exception_o, + // Branches and Jumps + input logic branch_valid_i, + input logic [63:0] predict_address_i, + output branchpredict branchpredict_o, + input logic predict_taken_i, // LSU output logic lsu_ready_o, // FU is ready input logic lsu_valid_i, // Input is valid @@ -90,7 +96,8 @@ module ex_stage #( output logic mult_ready_o, // FU is ready input logic mult_valid_i // Output is valid ); - + // Wires + logic comparison_result_alu_branch; // ALU is a single cycle instructions, hence it is always ready assign alu_ready_o = 1'b1; @@ -100,17 +107,27 @@ module ex_stage #( // ALU // ----- alu alu_i ( - .adder_result_o ( ), - .adder_result_ext_o ( ), - .result_o ( alu_result_o ), - .comparison_result_o ( comparison_result_o ), - .is_equal_result_o ( ), + .adder_result_o ( ), + .adder_result_ext_o ( ), + .result_o ( alu_result_o ), + .comparison_result_o ( comparison_result_alu_branch ), + .is_equal_result_o ( ), .* ); - // -------------------- - // Control Flow Change - // ------------------- + // -------------------- + // Branch Engine + // -------------------- + branch_engine branch_engine_i ( + .operand_a_i ( operand_c_i ), + .operand_b_i ( imm_i ), + .valid_i ( branch_valid_i ), + .comparison_result_i ( comparison_result_alu_branch ), + .predict_address_i ( predict_address_i ), + .branchpredict_o ( branchpredict_o ), + .branch_ex_o ( alu_exception_o ), + .* + ); // ---------------- // Multiplication diff --git a/src/id_stage.sv b/src/id_stage.sv index 58408a52d..bbf454805 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -39,12 +39,17 @@ module id_stage #( output fu_op operator_o, output logic [63:0] operand_a_o, output logic [63:0] operand_b_o, + output logic [63:0] operand_c_o, output logic [63:0] imm_o, output logic [TRANS_ID_BITS-1:0] trans_id_o, input logic alu_ready_i, output logic alu_valid_o, + output logic branch_valid_o, + output logic [63:0] predict_address_o, + output logic predict_taken_o, + input logic lsu_ready_i, output logic lsu_valid_o, diff --git a/src/pcgen.sv b/src/pcgen.sv index cce5fd650..7a4d1e91c 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -20,22 +20,22 @@ import ariane_pkg::*; module pcgen ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low - input logic flush_i, - input logic [63:0] pc_if_i, - input mispredict mispredict_i, // from controller signaling a mispredict -> update BTB + input logic flush_i, + input logic [63:0] pc_if_i, + input branchpredict branchpredict_i, // from controller signaling a branchpredict -> update BTB // to IF - output logic [63:0] pc_if_o, // new PC - output logic set_pc_o, // request the PC to be set to pc_if_o - output logic is_branch_o, // to check if we mispredicted we need to save whether this was a branch or not + output logic [63:0] pc_if_o, // new PC + output logic set_pc_o, // request the PC to be set to pc_if_o + output logic is_branch_o, // to check if we branchpredicted we need to save whether this was a branch or not // global input - input logic [63:0] boot_addr_i, + input logic [63:0] boot_addr_i, // CSR input - input logic [63:0] epc_i, // return from exception - input logic [63:0] trap_vector_base_i, // base of trap vector - input exception ex_i // exception in - from commit + input logic [63:0] epc_i, // return from exception + input logic [63:0] trap_vector_base_i, // base of trap vector + input exception ex_i // exception in - from commit ); logic [63:0] branch_target_address; @@ -56,7 +56,7 @@ module pcgen ( btb_i ( .vpc_i ( pc_if_i ), - .mispredict_i ( mispredict_i ), + .branchpredict_i ( branchpredict_i ), .is_branch_o ( is_branch ), .predict_taken_o ( predict_taken ), .branch_target_address_o ( branch_target_address ), From 1e76a021ce80944473d540752d0186964c6dcb05 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 9 May 2017 18:06:09 +0200 Subject: [PATCH 04/43] Rework decode logic for new branch engine --- include/ariane_pkg.svh | 4 +- src/ariane.sv | 17 +++---- src/branch_engine.sv | 3 -- src/controller.sv | 9 ++-- src/decoder.sv | 14 +++--- src/issue_read_operands.sv | 95 ++++++++++++++++++++++++++++---------- 6 files changed, 94 insertions(+), 48 deletions(-) diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 5df63e111..8e892d4d6 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -48,7 +48,7 @@ package ariane_pkg; } branchpredict; typedef enum logic[3:0] { - NONE, LSU, ALU, MULT, CSR + NONE, LSU, ALU, CTRL_FLOW, MULT, CSR } fu_t; localparam EXC_OFF_RST = 8'h80; @@ -64,6 +64,8 @@ package ariane_pkg; SRA, SRL, SLL, SRLW, SLLW, SRAW, // comparisons LTS, LTU, LES, LEU, GTS, GTU, GES, GEU, EQ, NE, + // jumps + JAL, JALR, // set lower than operations SLTS, SLTU, SLETS, SLETU, // CSR functions diff --git a/src/ariane.sv b/src/ariane.sv index afd763f20..93681f8e6 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -84,6 +84,7 @@ module ariane logic halt_if; logic [63:0] pc_if; exception ex_commit; // exception from commit stage + branchpredict branchpredict; // -------------- // PCGEN <-> IF // -------------- @@ -93,7 +94,6 @@ module ariane // -------------- // PCGEN <-> EX // -------------- - branchpredict branchpredict_ex_pcgen; // -------------- // PCGEN <-> CSR // -------------- @@ -211,7 +211,7 @@ module ariane pcgen pcgen_i ( .flush_i ( flush ), .pc_if_i ( pc_if ), - .branchpredict_i ( branchpredict_ex_pcgen ), + .branchpredict_i ( branchpredict ), .pc_if_o ( pc_pcgen_if ), .set_pc_o ( set_pc_pcgen_if ), .is_branch_o ( is_branch_o ), @@ -323,7 +323,7 @@ module ariane .branch_valid_i ( branch_valid_id_ex ), .predict_address_i ( predict_address_id_ex ), .predict_taken_i ( predict_taken_id_ex ), - .branchpredict_o ( branchpredict_ex_pcgen ), + .branchpredict_o ( branchpredict ), // LSU .lsu_ready_o ( lsu_ready_ex_id ), .lsu_valid_i ( lsu_valid_id_ex ), @@ -386,7 +386,7 @@ module ariane .ASID_WIDTH ( ASID_WIDTH ) ) csr_regfile_i ( - .flush_o ( ), + .flush_o ( flus_csr_ctrl ), .ex_i ( ex_commit ), .csr_op_i ( csr_op_commit_csr ), .csr_addr_i ( csr_addr_ex_csr ), @@ -413,10 +413,11 @@ module ariane logic branchpredict_i; controller i_controller ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .flush_commit_i (flush_commit_i), - .branchpredict_i (branchpredict_i ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_commit_i ( flush_commit_i ), + .flush_csr_i ( flus_csr_ctrl ), + .branchpredict_i ( branchpredict ) ); diff --git a/src/branch_engine.sv b/src/branch_engine.sv index b9dbe81f9..238ea84d1 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -20,9 +20,6 @@ import ariane_pkg::*; module branch_engine ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, input logic valid_i, diff --git a/src/controller.sv b/src/controller.sv index 3db649da1..2510caf7e 100644 --- a/src/controller.sv +++ b/src/controller.sv @@ -20,11 +20,12 @@ import ariane_pkg::*; module controller ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low - input logic flush_commit_i, // flush request from commit stage in - input logic branchpredict_i + input logic flush_commit_i, // flush request from commit stage in + input logic flush_csr_i, + input branchpredict branchpredict_i ); // flush on mispredict diff --git a/src/decoder.sv b/src/decoder.sv index e56f4c9a1..55d6fdd9b 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -297,7 +297,7 @@ module decoder ( OPCODE_BRANCH: begin // TODO: Implement imm_select = BIMM; - instruction_o.fu = ALU; + instruction_o.fu = CTRL_FLOW; is_control_flow_instr_o = 1'b1; case (instr.stype.funct3) @@ -313,19 +313,19 @@ module decoder ( end endcase end - + // Jump and link register OPCODE_JALR: begin - // TODO: Implement - instruction_o.fu = ALU; + instruction_o.fu = CTRL_FLOW; + instruction_o.op = JALR; imm_select = UIMM; instruction_o.use_pc = 1'b1; instruction_o.rd = instr.itype.rd; is_control_flow_instr_o = 1'b1; end - + // Jump and link OPCODE_JAL: begin - // TODO: Implement - instruction_o.fu = ALU; + instruction_o.fu = CTRL_FLOW; + instruction_o.op = JAL; imm_select = JIMM; instruction_o.use_pc = 1'b1; instruction_o.rd = instr.utype.rd; diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 232b63ebe..cbfbff651 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -42,11 +42,14 @@ module issue_read_operands ( output fu_op operator_o, output logic [63:0] operand_a_o, output logic [63:0] operand_b_o, + output logic [63:0] operand_c_o, output logic [63:0] imm_o, // output immediate for the LSU output logic [TRANS_ID_BITS-1:0] trans_id_o, // ALU 1 input logic alu_ready_i, // FU is ready output logic alu_valid_o, // Output is valid + // Branches and Jumps + output logic branch_valid_o, // this is a valid branch instruction // LSU input logic lsu_ready_i, // FU is ready output logic lsu_valid_o, // Output is valid @@ -66,27 +69,33 @@ module issue_read_operands ( logic [63:0] operand_a_regfile, operand_b_regfile; // operands coming from regfile // output flipflop (ID <-> EX) - logic [63:0] operand_a_n, operand_a_q, operand_b_n, operand_b_q, imm_n, imm_q; - logic alu_valid_n, alu_valid_q; - logic mult_valid_n, mult_valid_q; - logic lsu_valid_n, lsu_valid_q; - logic csr_valid_n, csr_valid_q; + logic [63:0] operand_a_n, operand_a_q, + operand_b_n, operand_b_q, + operand_c_n, operand_c_q, + imm_n, imm_q; + logic alu_valid_n, alu_valid_q; + logic mult_valid_n, mult_valid_q; + logic lsu_valid_n, lsu_valid_q; + logic csr_valid_n, csr_valid_q; + logic branch_valid_n, branch_valid_q; logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; fu_op operator_n, operator_q; // forwarding signals logic forward_rs1, forward_rs2; - - assign operand_a_o = operand_a_q; - assign operand_b_o = operand_b_q; - assign operator_o = operator_q; - assign alu_valid_o = alu_valid_q; - assign lsu_valid_o = lsu_valid_q; - assign csr_valid_o = csr_valid_q; - assign mult_valid_o = mult_valid_q; - assign trans_id_o = trans_id_q; - assign imm_o = imm_q; + // ID <-> EX registers + assign operand_a_o = operand_a_q; + assign operand_b_o = operand_b_q; + assign operand_c_o = operand_c_q; + assign operator_o = operator_q; + assign alu_valid_o = alu_valid_q; + assign branch_valid_o = branch_valid_q; + assign lsu_valid_o = lsu_valid_q; + assign csr_valid_o = csr_valid_q; + assign mult_valid_o = mult_valid_q; + assign trans_id_o = trans_id_q; + assign imm_o = imm_q; // --------------- // Issue Stage // --------------- @@ -127,7 +136,8 @@ module issue_read_operands ( unique case (issue_instr_i.fu) NONE: fu_busy = 1'b0; - ALU: + ALU, CTRL_FLOW: // control flow instruction also need the ALU + // and they are always ready if the ALU is ready fu_busy = ~alu_ready_i; MULT: fu_busy = ~mult_ready_i; @@ -175,10 +185,16 @@ module issue_read_operands ( end end // Forwarding/Output MUX - always_comb begin : forwarding + always_comb begin : forwarding_operand_select // default is regfile operand_a_n = operand_a_regfile; operand_b_n = operand_b_regfile; + // set PC as default operand c + operand_c_n = issue_instr_i.pc; + // immediates are the third operands in the store case + imm_n = issue_instr_i.result; + trans_id_n = issue_instr_i.trans_id; + operator_n = issue_instr_i.op; // or should we forward if (forward_rs1) begin @@ -203,17 +219,38 @@ module issue_read_operands ( if (issue_instr_i.use_imm && ~(issue_instr_i.op inside {SD, SW, SH, SB})) begin operand_b_n = issue_instr_i.result; end - // immediates are the third operands in the store case - imm_n = issue_instr_i.result; - trans_id_n = issue_instr_i.trans_id; - operator_n = issue_instr_i.op; + // special assignments in the JAL and JALR case + case (issue_instr_i.op) + // re-write the operator since + // we need the ALU for addition + JAL: begin + operator_n = ADD; + // output 4 as operand b as we + // need to save PC + 4 + operand_b_n = 64'h4; + end + + JALR: begin + operator_n = ADD; + // output 4 as operand b as we + // need to save PC + 4 + operand_b_n = 64'h4; + // get RS1 as operand C + operand_c_n = operand_a_regfile; + // forward rs1 + if (forward_rs1) begin + operand_c_n = rs1_i; + end + end + endcase end // FU select always_comb begin : unit_valid - alu_valid_n = 1'b0; - lsu_valid_n = 1'b0; - mult_valid_n = 1'b0; - csr_valid_n = 1'b0; + alu_valid_n = 1'b0; + lsu_valid_n = 1'b0; + mult_valid_n = 1'b0; + csr_valid_n = 1'b0; + branch_valid_n = 1'b0; // Exception pass through // if an exception has occurred simply pass it through // we do not want to issue this instruction @@ -227,6 +264,10 @@ module issue_read_operands ( lsu_valid_n = 1'b1; CSR: csr_valid_n = 1'b1; + CTRL_FLOW: begin + alu_valid_n = 1'b1; + branch_valid_n = 1'b1; + end default: begin end @@ -259,7 +300,9 @@ module issue_read_operands ( if(~rst_ni) begin operand_a_q <= '{default: 0}; operand_b_q <= '{default: 0}; + operand_c_q <= '{default: 0}; alu_valid_q <= 1'b0; + branch_valid_q <= 1'b0; mult_valid_q <= 1'b0; lsu_valid_q <= 1'b0; csr_valid_q <= 1'b0; @@ -268,7 +311,9 @@ module issue_read_operands ( end else begin operand_a_q <= operand_a_n; operand_b_q <= operand_b_n; + operand_c_q <= operand_c_n; alu_valid_q <= alu_valid_n; + branch_valid_q <= branch_valid_n; mult_valid_q <= mult_valid_n; lsu_valid_q <= lsu_valid_n; csr_valid_q <= csr_valid_n; From fac352baafe6b4b23fd5f81a9183d108e528b822 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 9 May 2017 18:44:09 +0200 Subject: [PATCH 05/43] Add unresolved branch handling --- src/ariane.sv | 1 + src/id_stage.sv | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 93681f8e6..47b13ab26 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -278,6 +278,7 @@ module ariane .branch_valid_o ( branch_valid_id_ex ), .predict_address_o ( predict_address_id_ex ), .predict_taken_o ( predict_taken_id_ex ), + .branchpredict_i ( branchpredict ), // in order to resolve the branch // LSU .lsu_ready_i ( lsu_ready_ex_id ), .lsu_valid_o ( lsu_valid_id_ex ), diff --git a/src/id_stage.sv b/src/id_stage.sv index bbf454805..3983b569a 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -49,6 +49,8 @@ module id_stage #( output logic branch_valid_o, output logic [63:0] predict_address_o, output logic predict_taken_o, + // ex just resolved our predicted branch, we are ready to accept new requests + input branchpredict branchpredict_i, input logic lsu_ready_i, output logic lsu_valid_o, @@ -92,9 +94,29 @@ module id_stage #( // Decoder (DC) <-> Scoreboard (SB) // --------------------------------------------------- scoreboard_entry decoded_instr_dc_sb; + // --------------------------------------------------- + // Decoder (DC) <-> Branch Logic + // --------------------------------------------------- + logic is_control_flow_instr; - // TODO: Branching logic - assign ready_o = ~full; + // ----------------- + // Branch logic + // ----------------- + logic unresolved_branch_n, unresolved_branch_q; + + always_comb begin : unresolved_branch + unresolved_branch_n = unresolved_branch_q; + // we just resolved the branch + if (branchpredict_i.valid) begin + unresolved_branch_n = 1'b0; + end + // if the instruction is valid and it is a control flow instruction + if (instruction_valid_i && is_control_flow_instr) begin + unresolved_branch_n = 1'b1; + end + end + // we are ready if we are not full and don't have any unresolved branches + assign ready_o = ~full & ~unresolved_branch_q; decoder decoder_i ( .clk_i ( clk_i ), @@ -104,7 +126,7 @@ module id_stage #( .instruction_i ( instruction_i ), .ex_i ( ex_if_i ), .instruction_o ( decoded_instr_dc_sb ), - .is_control_flow_instr_o ( ) + .is_control_flow_instr_o ( is_control_flow_instr ) ); scoreboard #( @@ -150,4 +172,12 @@ module id_stage #( .* ); + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + unresolved_branch_q <= 1'b0; + end else begin + unresolved_branch_q <= unresolved_branch_n; + end + end + endmodule \ No newline at end of file From 6b9ba5b3144a103494f30fa60aac3331ea43c9bf Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 9 May 2017 18:49:34 +0200 Subject: [PATCH 06/43] :memo: Add some further comments on resolved branches --- src/id_stage.sv | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/id_stage.sv b/src/id_stage.sv index 3983b569a..b7f9109b9 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -74,8 +74,9 @@ module id_stage #( output scoreboard_entry commit_instr_o, input logic commit_ack_i ); - + // --------------------------------------------------- // Global signals + // --------------------------------------------------- logic full; // --------------------------------------------------- // Scoreboard (SB) <-> Issue and Read Operands (iro) @@ -102,6 +103,10 @@ module id_stage #( // ----------------- // Branch logic // ----------------- + // This should basically prevent the scoreboard from accepting + // instructions past a branch. We need to resolve the branch beforehand. + // This limitation is in place to ease the backtracking of mis-predicted branches as they + // can simply be in the front-end of the processor. logic unresolved_branch_n, unresolved_branch_q; always_comb begin : unresolved_branch From 61afdb9e30c113e0c4f7a4cf43f6809dac9f5a21 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Wed, 10 May 2017 15:08:39 +0200 Subject: [PATCH 07/43] Complete branch prediction path through IF, ID, EX --- src/ariane.sv | 18 +++- src/branch_engine.sv | 7 +- src/ex_stage.sv | 4 +- src/id_stage.sv | 34 +++++- src/if_stage.sv | 243 ++++++++++++++++++++++++------------------- src/pcgen.sv | 2 +- 6 files changed, 191 insertions(+), 117 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 47b13ab26..66adcbf32 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -112,6 +112,9 @@ module ariane logic illegal_c_insn_id_if; logic [63:0] pc_id_if_id; exception exception_if_id; + logic branch_valid_if_id; + logic [63:0] predict_address_if_id; + logic predict_taken_if_id; // -------------- // ID <-> EX // -------------- @@ -199,6 +202,8 @@ module ariane // EX <-> CSR // -------------- + // * -> CTRL + logic flush_csr_ctrl; // TODO: Preliminary signal assignments logic flush_tlb; assign flush_tlb = 1'b0; @@ -214,7 +219,7 @@ module ariane .branchpredict_i ( branchpredict ), .pc_if_o ( pc_pcgen_if ), .set_pc_o ( set_pc_pcgen_if ), - .is_branch_o ( is_branch_o ), + .is_branch_o ( is_branch_pcgen_if ), .boot_addr_i ( boot_addr_i ), .epc_i ( epc_commit_pcgen ), .trap_vector_base_i ( trap_vector_base_commit_pcgen ), @@ -231,6 +236,10 @@ module ariane .id_ready_i ( ready_id_if ), .halt_if_i ( halt_if ), .set_pc_i ( set_pc_pcgen_if ), + .is_branch_i ( is_branch_pcgen_if ), + .branch_valid_o ( branch_valid_if_id ), + .predict_address_o ( predict_address_if_id ), + .predict_taken_o ( predict_taken_if_id ), .fetch_addr_i ( pc_pcgen_if ), .instr_req_o ( fetch_req_if_ex ), .instr_addr_o ( fetch_vaddr_if_ex ), @@ -275,6 +284,9 @@ module ariane .alu_ready_i ( alu_ready_ex_id ), .alu_valid_o ( alu_valid_id_ex ), // Branches and Jumps + .branch_valid_i ( branch_valid_if_id ), + .predict_address_i ( predict_address_if_id ), + .predict_taken_i ( predict_taken_if_id ), .branch_valid_o ( branch_valid_id_ex ), .predict_address_o ( predict_address_id_ex ), .predict_taken_o ( predict_taken_id_ex ), @@ -387,7 +399,7 @@ module ariane .ASID_WIDTH ( ASID_WIDTH ) ) csr_regfile_i ( - .flush_o ( flus_csr_ctrl ), + .flush_o ( flush_csr_ctrl ), .ex_i ( ex_commit ), .csr_op_i ( csr_op_commit_csr ), .csr_addr_i ( csr_addr_ex_csr ), @@ -417,7 +429,7 @@ module ariane .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_commit_i ( flush_commit_i ), - .flush_csr_i ( flus_csr_ctrl ), + .flush_csr_i ( flsh_csr_ctrl ), .branchpredict_i ( branchpredict ) ); diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 238ea84d1..beadaf860 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -26,6 +26,7 @@ module branch_engine ( input logic comparison_result_i, // result of comparison input logic [63:0] predict_address_i, // this is the address we predicted + input logic predict_taken_i, output branchpredict branchpredict_o, // this is the actual address we are targeting output exception branch_ex_o // branch exception out ); @@ -46,8 +47,10 @@ module branch_engine ( branchpredict_o.target_address = target_address; branchpredict_o.is_taken = comparison_result_i; // we mis-predicted e.g.: the predicted address is unequal to the actual address - if (target_address != predict_address_i && target_address[1:0] == 2'b0) begin - branchpredict_o.is_mispredict = 1'b0; + if (target_address[1:0] == 2'b0) begin + if (target_address != predict_address_i || predict_taken_i != comparison_result_i) begin + branchpredict_o.is_mispredict = 1'b1; + end end end end diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 5650b662d..6dd965189 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -43,8 +43,8 @@ module ex_stage #( // Branches and Jumps input logic branch_valid_i, input logic [63:0] predict_address_i, - output branchpredict branchpredict_o, input logic predict_taken_i, + output branchpredict branchpredict_o, // LSU output logic lsu_ready_o, // FU is ready input logic lsu_valid_i, // Input is valid @@ -123,8 +123,6 @@ module ex_stage #( .operand_b_i ( imm_i ), .valid_i ( branch_valid_i ), .comparison_result_i ( comparison_result_alu_branch ), - .predict_address_i ( predict_address_i ), - .branchpredict_o ( branchpredict_o ), .branch_ex_o ( alu_exception_o ), .* ); diff --git a/src/id_stage.sv b/src/id_stage.sv index b7f9109b9..b0579abcc 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -45,7 +45,11 @@ module id_stage #( input logic alu_ready_i, output logic alu_valid_o, - + // Branch predict In + input logic branch_valid_i, + input logic [63:0] predict_address_i, + input logic predict_taken_i, + // Branch predict Out output logic branch_valid_o, output logic [63:0] predict_address_o, output logic predict_taken_o, @@ -108,6 +112,10 @@ module id_stage #( // This limitation is in place to ease the backtracking of mis-predicted branches as they // can simply be in the front-end of the processor. logic unresolved_branch_n, unresolved_branch_q; + // branch predict registers + logic branch_valid_n, branch_valid_q; + logic [63:0] predict_address_n, predict_address_q; + logic predict_taken_n, predict_taken_q; always_comb begin : unresolved_branch unresolved_branch_n = unresolved_branch_q; @@ -119,9 +127,23 @@ module id_stage #( if (instruction_valid_i && is_control_flow_instr) begin unresolved_branch_n = 1'b1; end + + branch_valid_n = branch_valid_q; + predict_address_n = predict_address_q; + predict_taken_n = predict_taken_q; + // save branch prediction information until the ex stage resolves the prediction + if (~unresolved_branch_q) begin + branch_valid_n = branch_valid_i; + predict_address_n = predict_address_i; + predict_taken_n = predict_taken_i; + end end // we are ready if we are not full and don't have any unresolved branches - assign ready_o = ~full & ~unresolved_branch_q; + assign ready_o = ~full & ~unresolved_branch_q; + // output branch prediction bits + assign branch_valid_o = branch_valid_q; + assign predict_address_o = predict_address_q; + assign predict_taken_o = predict_taken_q; decoder decoder_i ( .clk_i ( clk_i ), @@ -178,10 +200,16 @@ module id_stage #( ); always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin + if (~rst_ni) begin unresolved_branch_q <= 1'b0; + branch_valid_q <= 1'b0; + predict_address_q <= 64'b0; + predict_taken_q <= 1'b0; end else begin unresolved_branch_q <= unresolved_branch_n; + branch_valid_q <= branch_valid_n; + predict_address_q <= predict_address_n; + predict_taken_q <= predict_taken_n; end end diff --git a/src/if_stage.sv b/src/if_stage.sv index b46ad891b..bf63d94dd 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -36,8 +36,14 @@ module if_stage ( output logic if_busy_o, // is the IF stage busy fetching instructions? input logic id_ready_i, input logic halt_if_i, // pipeline stall - input logic set_pc_i, // set new PC + // ctrl flow instruction in input logic [63:0] fetch_addr_i, + input logic set_pc_i, // set new PC + input logic is_branch_i, // the new PC was a branch e.g.: branch or jump + // branchpredict out + output logic branch_valid_o, + output logic [63:0] predict_address_o, + output logic predict_taken_o, // instruction cache interface output logic instr_req_o, output logic [63:0] instr_addr_o, @@ -64,6 +70,11 @@ module if_stage ( logic [31:0] fetch_rdata; logic [63:0] fetch_addr; + // branch predict registers + logic branch_valid_n, branch_valid_q; + logic [63:0] predict_address_n, predict_address_q; + logic predict_taken_n, predict_taken_q; + // offset FSM enum logic[0:0] {WAIT, IDLE} offset_fsm_cs, offset_fsm_ns; logic [31:0] instr_decompressed; @@ -71,26 +82,20 @@ module if_stage ( logic instr_compressed_int; logic clear_instr_valid_i; - - assign pc_if_o = fetch_addr; - // id stage acknowledged - assign clear_instr_valid_i = id_ready_i; // compressed instruction decoding, or more precisely compressed instruction // expander // // since it does not matter where we decompress instructions, we do it here // to ease timing closure - compressed_decoder compressed_decoder_i - ( + compressed_decoder compressed_decoder_i ( .instr_i ( fetch_rdata ), .instr_o ( instr_decompressed ), .is_compressed_o ( instr_compressed_int ), .illegal_instr_o ( illegal_c_insn ) - ); + ); - // prefetch buffer, caches a fixed number of instructions - prefetch_buffer prefetch_buffer_i - ( + // Pre-fetch buffer, caches a fixed number of instructions + prefetch_buffer prefetch_buffer_i ( .clk ( clk_i ), .rst_n ( rst_ni ), @@ -113,110 +118,138 @@ module if_stage ( // Prefetch Buffer Status .busy_o ( prefetch_busy ) - ); + ); - // offset FSM state - always_ff @(posedge clk_i, negedge rst_ni) - begin - if (rst_ni == 1'b0) begin - offset_fsm_cs <= IDLE; - end else begin - offset_fsm_cs <= offset_fsm_ns; - end - end + // offset FSM state transition logic + always_comb begin + offset_fsm_ns = offset_fsm_cs; - // offset FSM state transition logic - always_comb - begin - offset_fsm_ns = offset_fsm_cs; + fetch_ready = 1'b0; + branch_req = 1'b0; + valid = 1'b0; - fetch_ready = 1'b0; - branch_req = 1'b0; - valid = 1'b0; - - unique case (offset_fsm_cs) - // no valid instruction data for ID stage - // assume aligned - IDLE: begin - if (req_i) begin - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end - end - - // serving aligned 32 bit or 16 bit instruction, we don't know yet - WAIT: begin - if (fetch_valid) begin - valid = 1'b1; // an instruction is ready for ID stage - - if (req_i && if_valid) begin - fetch_ready = 1'b1; - offset_fsm_ns = WAIT; - end - end - end - - default: begin - offset_fsm_ns = IDLE; - end - endcase - - - // take care of control flow changes - if (set_pc_i) begin - valid = 1'b0; - - // switch to new PC from ID stage - branch_req = 1'b1; + unique case (offset_fsm_cs) + // no valid instruction data for ID stage + // assume aligned + IDLE: begin + if (req_i) begin + branch_req = 1'b1; offset_fsm_ns = WAIT; end end - // IF-ID pipeline registers, frozen when the ID stage is stalled - always_ff @(posedge clk_i, negedge rst_ni) - begin : IF_ID_PIPE_REGISTERS - if (rst_ni == 1'b0) - begin - instr_valid_id_o <= 1'b0; - instr_rdata_id_o <= '0; - illegal_c_insn_id_o <= 1'b0; - is_compressed_id_o <= 1'b0; - pc_id_o <= '0; - ex_o <= '{default: 0}; - end - else - begin - - if (if_valid) - begin - instr_valid_id_o <= 1'b1; - instr_rdata_id_o <= instr_decompressed; - illegal_c_insn_id_o <= illegal_c_insn; - is_compressed_id_o <= instr_compressed_int; - pc_id_o <= pc_if_o; - ex_o.cause <= 64'b0; // TODO: Output exception - ex_o.tval <= 64'b0; // TODO: Output exception - ex_o.valid <= 1'b0; // TODO: Output exception - end else if (clear_instr_valid_i) begin - instr_valid_id_o <= 1'b0; - end + // serving aligned 32 bit or 16 bit instruction, we don't know yet + WAIT: begin + if (fetch_valid) begin + valid = 1'b1; // an instruction is ready for ID stage + if (req_i && if_valid) begin + fetch_ready = 1'b1; + offset_fsm_ns = WAIT; end + end end + default: begin + offset_fsm_ns = IDLE; + end + endcase - assign if_ready = valid & id_ready_i; - assign if_valid = (~halt_if_i) & if_ready; - assign if_busy_o = prefetch_busy; - //------------- - // Assertions - //------------- - `ifndef SYNTHESIS - `ifndef VERILATOR - // there should never be a grant when there was no request - assert property ( - @(posedge clk_i) (instr_gnt_i) |-> (instr_req_o) ) - else $warning("There was a grant without a request"); - `endif - `endif + // take care of control flow changes + if (set_pc_i) begin + valid = 1'b0; + // switch to new PC from ID stage + branch_req = 1'b1; + offset_fsm_ns = WAIT; + end + end + + // ------------- + // Branch Logic + // ------------- + // We need to pass those registers on to ID in the case we've set + // a new branch target (or jump) and we got a valid instruction + always_comb begin + branch_valid_n = branch_valid_q; + predict_address_n = predict_address_q; + predict_taken_n = predict_taken_q; + // we got a branch redirect from PCGEN + if (is_branch_i) begin + // set the registers to the correct address + branch_valid_n = 1'b1; + predict_address_n = fetch_addr_i; + // whether we took the branch or not can be seen from the set PC + // nevertheless we also need to keep branches not taken + predict_taken_n = set_pc_i; + end + // we have a valid instruction and id excepted it so we consider all the + // branch information to be sampled correctly + if (if_valid && clear_instr_valid_i) begin + branch_valid_n = 1'b0; + end + + end + + // -------------------------------------------------------------- + // IF-ID pipeline registers, frozen when the ID stage is stalled + // -------------------------------------------------------------- + always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS + if (~rst_ni) begin + // offset FSM state + offset_fsm_cs <= IDLE; + instr_valid_id_o <= 1'b0; + instr_rdata_id_o <= '0; + illegal_c_insn_id_o <= 1'b0; + is_compressed_id_o <= 1'b0; + pc_id_o <= '0; + ex_o <= '{default: 0}; + branch_valid_q <= 1'b0; + predict_address_q <= 64'b0; + predict_taken_q <= 1'b0; + end + else + begin + offset_fsm_cs <= offset_fsm_ns; + branch_valid_q <= branch_valid_n; + predict_address_q <= predict_address_n; + predict_taken_q <= predict_taken_n; + + if (if_valid) begin + instr_valid_id_o <= 1'b1; + instr_rdata_id_o <= instr_decompressed; + illegal_c_insn_id_o <= illegal_c_insn; + is_compressed_id_o <= instr_compressed_int; + pc_id_o <= pc_if_o; + ex_o.cause <= 64'b0; // TODO: Output exception + ex_o.tval <= 64'b0; // TODO: Output exception + ex_o.valid <= 1'b0; // TODO: Output exception + end else if (clear_instr_valid_i) begin + instr_valid_id_o <= 1'b0; + end + + end + end + + // Assignments + assign pc_if_o = fetch_addr; + // id stage acknowledged + assign clear_instr_valid_i = id_ready_i; + assign if_ready = valid & id_ready_i; + assign if_valid = (~halt_if_i) & if_ready; + assign if_busy_o = prefetch_busy; + assign branch_valid_o = branch_valid_q; + assign predict_address_o = predict_address_q; + assign predict_taken_o = predict_taken_q; + + //------------- + // Assertions + //------------- + `ifndef SYNTHESIS + `ifndef VERILATOR + // there should never be a grant when there was no request + assert property ( + @(posedge clk_i) (instr_gnt_i) |-> (instr_req_o) ) + else $warning("There was a grant without a request"); + `endif + `endif endmodule \ No newline at end of file diff --git a/src/pcgen.sv b/src/pcgen.sv index 7a4d1e91c..d5619d69d 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -25,7 +25,7 @@ module pcgen ( input logic flush_i, input logic [63:0] pc_if_i, - input branchpredict branchpredict_i, // from controller signaling a branchpredict -> update BTB + input branchpredict branchpredict_i, // from controller signaling a branchpredict -> update BTB // to IF output logic [63:0] pc_if_o, // new PC output logic set_pc_o, // request the PC to be set to pc_if_o From 0f1e08fb91f0ff794ccf526e8ed0ed226d655b50 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Wed, 10 May 2017 20:09:03 +0200 Subject: [PATCH 08/43] :bug: Fixes in scoreboard, issue and branches --- src/ariane.sv | 178 +++++++++++++++++++------------------ src/branch_engine.sv | 17 ++-- src/decoder.sv | 6 +- src/ex_stage.sv | 4 +- src/id_stage.sv | 21 +++-- src/issue_read_operands.sv | 14 ++- src/scoreboard.sv | 8 +- tb/wave/wave_core.do | 21 +---- test/add_test.S | 12 ++- 9 files changed, 155 insertions(+), 126 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 66adcbf32..9f7e39b64 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -125,6 +125,7 @@ module ariane logic [63:0] operand_a_id_ex; logic [63:0] operand_b_id_ex; logic [63:0] operand_c_id_ex; + logic [63:0] pc_id_ex; // ALU logic alu_ready_ex_id; logic alu_valid_id_ex; @@ -134,6 +135,7 @@ module ariane exception alu_exception_ex_id; // Branches and Jumps logic branch_valid_id_ex; + logic predict_branch_valid_id_ex; logic [63:0] predict_address_id_ex; logic predict_taken_id_ex; // LSU @@ -265,111 +267,115 @@ module ariane .NR_WB_PORTS ( NR_WB_PORTS ) ) id_stage_i ( - .test_en_i ( test_en_i ), - .flush_i ( flush ), - .instruction_i ( instr_rdata_if_id ), - .instruction_valid_i ( instr_valid_if_id ), - .is_compressed_i ( is_compressed_if_id ), - .pc_if_i ( pc_if ), // PC from if - .ex_if_i ( exception_if_id ), // exception from if - .ready_o ( ready_id_if ), + .test_en_i ( test_en_i ), + .flush_i ( flush ), + .instruction_i ( instr_rdata_if_id ), + .instruction_valid_i ( instr_valid_if_id ), + .is_compressed_i ( is_compressed_if_id ), + .pc_if_i ( pc_id_if_id ), // PC from if + .ex_if_i ( exception_if_id ), // exception from if + .ready_o ( ready_id_if ), // Functional Units - .operator_o ( operator_id_ex ), - .operand_a_o ( operand_a_id_ex ), - .operand_b_o ( operand_b_id_ex ), - .operand_c_o ( operand_c_id_ex ), - .imm_o ( imm_id_ex ), - .trans_id_o ( trans_id_id_ex ), + .operator_o ( operator_id_ex ), + .operand_a_o ( operand_a_id_ex ), + .operand_b_o ( operand_b_id_ex ), + .operand_c_o ( operand_c_id_ex ), + .imm_o ( imm_id_ex ), + .trans_id_o ( trans_id_id_ex ), + .pc_o ( pc_id_ex ), // ALU - .alu_ready_i ( alu_ready_ex_id ), - .alu_valid_o ( alu_valid_id_ex ), + .alu_ready_i ( alu_ready_ex_id ), + .alu_valid_o ( alu_valid_id_ex ), // Branches and Jumps - .branch_valid_i ( branch_valid_if_id ), - .predict_address_i ( predict_address_if_id ), - .predict_taken_i ( predict_taken_if_id ), - .branch_valid_o ( branch_valid_id_ex ), - .predict_address_o ( predict_address_id_ex ), - .predict_taken_o ( predict_taken_id_ex ), - .branchpredict_i ( branchpredict ), // in order to resolve the branch + .branch_valid_i ( branch_valid_if_id ), + .predict_address_i ( predict_address_if_id ), + .predict_taken_i ( predict_taken_if_id ), + .branch_valid_o ( branch_valid_id_ex ), + .predict_branch_valid_o ( predict_branch_valid_id_ex ), + .predict_address_o ( predict_address_id_ex ), + .predict_taken_o ( predict_taken_id_ex ), + .branchpredict_i ( branchpredict ), // in order to resolve the branch // LSU - .lsu_ready_i ( lsu_ready_ex_id ), - .lsu_valid_o ( lsu_valid_id_ex ), + .lsu_ready_i ( lsu_ready_ex_id ), + .lsu_valid_o ( lsu_valid_id_ex ), // Multiplier - .mult_ready_i ( mult_ready_ex_id ), - .mult_valid_o ( mult_valid_id_ex ), + .mult_ready_i ( mult_ready_ex_id ), + .mult_valid_o ( mult_valid_id_ex ), // CSR - .csr_ready_i ( csr_ready_ex_id ), - .csr_valid_o ( csr_valid_id_ex ), + .csr_ready_i ( csr_ready_ex_id ), + .csr_valid_o ( csr_valid_id_ex ), - .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, csr_trans_id_ex_id }), - .wdata_i ( {alu_result_ex_id, lsu_result_ex_id, csr_result_ex_id }), - .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception){1'b0}} }), - .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, csr_valid_ex_id }), + .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, csr_trans_id_ex_id }), + .wdata_i ( {alu_result_ex_id, lsu_result_ex_id, csr_result_ex_id }), + .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception){1'b0}} }), + .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, csr_valid_ex_id }), - .waddr_a_i ( waddr_a_commit_id ), - .wdata_a_i ( wdata_a_commit_id ), - .we_a_i ( we_a_commit_id ), + .waddr_a_i ( waddr_a_commit_id ), + .wdata_a_i ( wdata_a_commit_id ), + .we_a_i ( we_a_commit_id ), - .commit_instr_o ( commit_instr_id_commit ), - .commit_ack_i ( commit_ack_commit_id ), + .commit_instr_o ( commit_instr_id_commit ), + .commit_ack_i ( commit_ack_commit_id ), .* ); // --------- // EX // --------- ex_stage ex_stage_i ( - .flush_i ( flush ), - .operator_i ( operator_id_ex ), - .operand_a_i ( operand_a_id_ex ), - .operand_b_i ( operand_b_id_ex ), - .operand_c_i ( operand_c_id_ex ), - .imm_i ( imm_id_ex ), - .trans_id_i ( trans_id_id_ex ), + .flush_i ( flush ), + .operator_i ( operator_id_ex ), + .operand_a_i ( operand_a_id_ex ), + .operand_b_i ( operand_b_id_ex ), + .operand_c_i ( operand_c_id_ex ), + .imm_i ( imm_id_ex ), + .trans_id_i ( trans_id_id_ex ), + .pc_i ( pc_id_ex ), // ALU - .alu_ready_o ( alu_ready_ex_id ), - .alu_valid_i ( alu_valid_id_ex ), - .alu_result_o ( alu_result_ex_id ), - .alu_trans_id_o ( alu_trans_id_ex_id ), - .alu_valid_o ( alu_valid_ex_id ), - .alu_exception_o ( alu_exception_ex_id ), + .alu_ready_o ( alu_ready_ex_id ), + .alu_valid_i ( alu_valid_id_ex ), + .alu_result_o ( alu_result_ex_id ), + .alu_trans_id_o ( alu_trans_id_ex_id ), + .alu_valid_o ( alu_valid_ex_id ), + .alu_exception_o ( alu_exception_ex_id ), // Branches and Jumps - .branch_valid_i ( branch_valid_id_ex ), - .predict_address_i ( predict_address_id_ex ), - .predict_taken_i ( predict_taken_id_ex ), - .branchpredict_o ( branchpredict ), + .branch_valid_i ( branch_valid_id_ex ), + .predict_branch_valid_i ( predict_branch_valid_id_ex ), + .predict_address_i ( predict_address_id_ex ), + .predict_taken_i ( predict_taken_id_ex ), + .branchpredict_o ( branchpredict ), // LSU - .lsu_ready_o ( lsu_ready_ex_id ), - .lsu_valid_i ( lsu_valid_id_ex ), - .lsu_result_o ( lsu_result_ex_id ), - .lsu_trans_id_o ( lsu_trans_id_ex_id ), - .lsu_valid_o ( lsu_valid_ex_id ), - .lsu_commit_i ( lsu_commit_commit_ex ), // from commit - .lsu_exception_o ( lsu_exception_ex_id ), + .lsu_ready_o ( lsu_ready_ex_id ), + .lsu_valid_i ( lsu_valid_id_ex ), + .lsu_result_o ( lsu_result_ex_id ), + .lsu_trans_id_o ( lsu_trans_id_ex_id ), + .lsu_valid_o ( lsu_valid_ex_id ), + .lsu_commit_i ( lsu_commit_commit_ex ), // from commit + .lsu_exception_o ( lsu_exception_ex_id ), // CSR - .csr_ready_o ( csr_ready_ex_id ), - .csr_valid_i ( csr_valid_id_ex ), - .csr_trans_id_o ( csr_trans_id_ex_id ), - .csr_result_o ( csr_result_ex_id ), - .csr_valid_o ( csr_valid_ex_id ), - .csr_addr_o ( csr_addr_ex_csr ), - .csr_commit_i ( csr_commit_commit_ex ), // from commit + .csr_ready_o ( csr_ready_ex_id ), + .csr_valid_i ( csr_valid_id_ex ), + .csr_trans_id_o ( csr_trans_id_ex_id ), + .csr_result_o ( csr_result_ex_id ), + .csr_valid_o ( csr_valid_ex_id ), + .csr_addr_o ( csr_addr_ex_csr ), + .csr_commit_i ( csr_commit_commit_ex ), // from commit // memory management - .enable_translation_i ( enable_translation_csr_ex ), // from CSR - .fetch_req_i ( fetch_req_if_ex ), - .fetch_gnt_o ( fetch_gnt_ex_if ), - .fetch_valid_o ( fetch_valid_ex_if ), - .fetch_err_o ( fetch_err_ex_if ), - .fetch_vaddr_i ( fetch_vaddr_if_ex ), - .fetch_rdata_o ( fetch_rdata_ex_if ), - .priv_lvl_i ( priv_lvl ), // from CSR - .flag_pum_i ( flag_pum_csr_ex ), // from CSR - .flag_mxr_i ( flag_mxr_csr_ex ), // from CSR - .pd_ppn_i ( pd_ppn_csr_ex ), // from CSR - .asid_i ( asid_csr_ex ), // from CSR - .flush_tlb_i ( flush_tlb ), + .enable_translation_i ( enable_translation_csr_ex ), // from CSR + .fetch_req_i ( fetch_req_if_ex ), + .fetch_gnt_o ( fetch_gnt_ex_if ), + .fetch_valid_o ( fetch_valid_ex_if ), + .fetch_err_o ( fetch_err_ex_if ), + .fetch_vaddr_i ( fetch_vaddr_if_ex ), + .fetch_rdata_o ( fetch_rdata_ex_if ), + .priv_lvl_i ( priv_lvl ), // from CSR + .flag_pum_i ( flag_pum_csr_ex ), // from CSR + .flag_mxr_i ( flag_mxr_csr_ex ), // from CSR + .pd_ppn_i ( pd_ppn_csr_ex ), // from CSR + .asid_i ( asid_csr_ex ), // from CSR + .flush_tlb_i ( flush_tlb ), - .mult_ready_o ( mult_ready_ex_id ), - .mult_valid_i ( mult_valid_id_ex ), + .mult_ready_o ( mult_ready_ex_id ), + .mult_valid_i ( mult_valid_id_ex ), .* ); // --------- @@ -425,11 +431,11 @@ module ariane logic flush_commit_i; logic branchpredict_i; - controller i_controller ( + controller controller_i ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_commit_i ( flush_commit_i ), - .flush_csr_i ( flsh_csr_ctrl ), + .flush_csr_i ( flush_csr_ctrl ), .branchpredict_i ( branchpredict ) ); diff --git a/src/branch_engine.sv b/src/branch_engine.sv index beadaf860..cc5521fa4 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -22,13 +22,15 @@ import ariane_pkg::*; module branch_engine ( input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, + input logic [63:0] pc_i, input logic valid_i, - input logic comparison_result_i, // result of comparison - input logic [63:0] predict_address_i, // this is the address we predicted + input logic comparison_result_i, // result of comparison + input logic [63:0] predict_address_i, // this is the address we predicted + input logic predict_branch_valid_i, // we predicted that this was a valid branch input logic predict_taken_i, - output branchpredict branchpredict_o, // this is the actual address we are targeting - output exception branch_ex_o // branch exception out + output branchpredict branchpredict_o, // this is the actual address we are targeting + output exception branch_ex_o // branch exception out ); logic [63:0] target_address; @@ -41,6 +43,8 @@ module branch_engine ( branchpredict_o.is_mispredict = 1'b0; if (valid_i) begin + // save pc + branchpredict_o.pc = pc_i; // calculate target address simple 64 bit addition target_address = $signed(operand_a_i) + $signed(operand_b_i); // write target address @@ -48,7 +52,10 @@ module branch_engine ( branchpredict_o.is_taken = comparison_result_i; // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address[1:0] == 2'b0) begin - if (target_address != predict_address_i || predict_taken_i != comparison_result_i) begin + if ( target_address != predict_address_i // we mis-predicted the address of the branch + || predict_taken_i != comparison_result_i // we mis-predicted the outcome of the branch + || predict_branch_valid_i == 1'b0 // this means branch-prediction thought it was no branch but in real it was one + ) begin branchpredict_o.is_mispredict = 1'b1; end end diff --git a/src/decoder.sv b/src/decoder.sv index 55d6fdd9b..01c10f8a3 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -402,8 +402,8 @@ module decoder ( // Exception handling // -------------------------------- always_comb begin : exception_handling - instruction_o.ex = ex_i; - instruction_o.valid = 1'b0; + instruction_o.ex = ex_i; + instruction_o.valid = 1'b0; // look if we didn't already get an exception in any previous // stage - we should not overwrite it as we retain order regarding the exception if (~ex_i.valid && illegal_instr) begin @@ -413,6 +413,8 @@ module decoder ( instruction_o.ex.valid = 1'b1; // we decoded an illegal exception here instruction_o.ex.cause = ILLEGAL_INSTR; + // if we decoded an illegal instruction save the faulting instruction to tval + instruction_o.ex.tval = instruction_i; end end endmodule \ No newline at end of file diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 6dd965189..ba8dc6f18 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -32,7 +32,7 @@ module ex_stage #( input logic [63:0] operand_c_i, input logic [63:0] imm_i, input logic [TRANS_ID_BITS-1:0] trans_id_i, - + input logic [63:0] pc_i, // PC of current instruction // ALU 1 output logic alu_ready_o, // FU is ready input logic alu_valid_i, // Output is valid @@ -42,6 +42,7 @@ module ex_stage #( output exception alu_exception_o, // Branches and Jumps input logic branch_valid_i, + input logic predict_branch_valid_i, input logic [63:0] predict_address_i, input logic predict_taken_i, output branchpredict branchpredict_o, @@ -139,6 +140,7 @@ module ex_stage #( .commit_i ( lsu_commit_i ), .* ); + // ----- // CSR // ----- diff --git a/src/id_stage.sv b/src/id_stage.sv index b0579abcc..725a034d0 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -42,6 +42,7 @@ module id_stage #( output logic [63:0] operand_c_o, output logic [63:0] imm_o, output logic [TRANS_ID_BITS-1:0] trans_id_o, + output logic [63:0] pc_o, input logic alu_ready_i, output logic alu_valid_o, @@ -51,6 +52,7 @@ module id_stage #( input logic predict_taken_i, // Branch predict Out output logic branch_valid_o, + output logic predict_branch_valid_o, // this is a valid prediction output logic [63:0] predict_address_o, output logic predict_taken_o, // ex just resolved our predicted branch, we are ready to accept new requests @@ -111,11 +113,11 @@ module id_stage #( // instructions past a branch. We need to resolve the branch beforehand. // This limitation is in place to ease the backtracking of mis-predicted branches as they // can simply be in the front-end of the processor. - logic unresolved_branch_n, unresolved_branch_q; + logic unresolved_branch_n, unresolved_branch_q; // branch predict registers - logic branch_valid_n, branch_valid_q; - logic [63:0] predict_address_n, predict_address_q; - logic predict_taken_n, predict_taken_q; + logic branch_valid_n, branch_valid_q; + logic [63:0] predict_address_n, predict_address_q; + logic predict_taken_n, predict_taken_q; always_comb begin : unresolved_branch unresolved_branch_n = unresolved_branch_q; @@ -138,12 +140,13 @@ module id_stage #( predict_taken_n = predict_taken_i; end end - // we are ready if we are not full and don't have any unresolved branches - assign ready_o = ~full & ~unresolved_branch_q; + // we are ready if we are not full and don't have any unresolved branches, but it can be + // the case that we have an unresolved branch which is cleared in that cycle (branchpredict_i.valid == 1) + assign ready_o = ~full & (~unresolved_branch_q || branchpredict_i.valid); // output branch prediction bits - assign branch_valid_o = branch_valid_q; - assign predict_address_o = predict_address_q; - assign predict_taken_o = predict_taken_q; + assign predict_branch_valid_o = branch_valid_q; + assign predict_address_o = predict_address_q; + assign predict_taken_o = predict_taken_q; decoder decoder_i ( .clk_i ( clk_i ), diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index cbfbff651..504941c09 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -45,6 +45,7 @@ module issue_read_operands ( output logic [63:0] operand_c_o, output logic [63:0] imm_o, // output immediate for the LSU output logic [TRANS_ID_BITS-1:0] trans_id_o, + output logic [63:0] pc_o, // ALU 1 input logic alu_ready_i, // FU is ready output logic alu_valid_o, // Output is valid @@ -102,7 +103,7 @@ module issue_read_operands ( // We can issue an instruction if we do not detect that any other instruction is writing the same // destination register. // We also need to check if there is an unresolved branch in the scoreboard. - always_comb begin : issue + always_comb begin : issue_scoreboard // default assignment issue_ack_o = 1'b0; // check that we didn't stall, that the instruction we got is valid @@ -114,6 +115,11 @@ module issue_read_operands ( if (rd_clobber_i[issue_instr_i.rd] == NONE) begin issue_ack_o = 1'b1; end + // or check that the target destination register will be written in this cycle by the + // commit stage + if (we_a_i && waddr_a_i == issue_instr_i.rd) begin + issue_ack_o = 1'b1; + end end // we can also issue the instruction under the following two circumstances: // we can do this even if we are stalled or no functional unit is ready (as we don't need one) @@ -254,7 +260,7 @@ module issue_read_operands ( // Exception pass through // if an exception has occurred simply pass it through // we do not want to issue this instruction - if (~issue_instr_i.ex.valid && issue_instr_valid_i) begin + if (~issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin case (issue_instr_i.fu) ALU: alu_valid_n = 1'b1; @@ -301,6 +307,7 @@ module issue_read_operands ( operand_a_q <= '{default: 0}; operand_b_q <= '{default: 0}; operand_c_q <= '{default: 0}; + imm_q <= 64'b0; alu_valid_q <= 1'b0; branch_valid_q <= 1'b0; mult_valid_q <= 1'b0; @@ -308,10 +315,12 @@ module issue_read_operands ( csr_valid_q <= 1'b0; operator_q <= ADD; trans_id_q <= 5'b0; + pc_o <= 64'b0; end else begin operand_a_q <= operand_a_n; operand_b_q <= operand_b_n; operand_c_q <= operand_c_n; + imm_q <= imm_n; alu_valid_q <= alu_valid_n; branch_valid_q <= branch_valid_n; mult_valid_q <= mult_valid_n; @@ -319,6 +328,7 @@ module issue_read_operands ( csr_valid_q <= csr_valid_n; operator_q <= operator_n; trans_id_q <= trans_id_n; + pc_o <= issue_instr_i.pc; end end endmodule diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 345c55043..2060621a2 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -100,6 +100,8 @@ always_comb begin : clobber_output if (i[BITS_ENTRIES-1:0] >= commit_pointer_q && i[BITS_ENTRIES-1:0] < issue_pointer_q) rd_clobber_o[mem_q[i].rd] = mem_q[i].fu; end + end else if (commit_pointer_q == issue_pointer_q) begin // everything committed + rd_clobber_o = '{default: NONE}; end else begin // the issue pointer has overflowed, invert logic, depicted on the right for (int unsigned i = 0; i < NR_ENTRIES; i++) begin if (i[BITS_ENTRIES-1:0] >= commit_pointer_q || i[BITS_ENTRIES-1:0] < issue_pointer_q) @@ -209,7 +211,7 @@ always_comb begin : issue_instruction // provide a combinatorial path in case the scoreboard is empty - if (top_pointer_q == issue_pointer_q) begin + if (top_pointer_q == issue_pointer_q && ~full_o) begin issue_instr_o = decoded_instr_i; issue_instr_o.trans_id = issue_pointer_q; issue_instr_valid_o = decoded_instr_valid_i; @@ -218,7 +220,7 @@ always_comb begin : issue_instruction issue_instr_o = mem_q[$unsigned(issue_pointer_q)]; // we have not reached the top of the buffer // issue pointer has overflowed - if (issue_pointer_q <= commit_pointer_q) begin + if (issue_pointer_q < commit_pointer_q) begin if (issue_pointer_q < top_pointer_q) issue_instr_valid_o = 1'b1; else @@ -272,7 +274,7 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : sequential commit_pointer_q <= commit_pointer_n; top_pointer_q <= top_pointer_n; mem_q <= mem_n; - if (decoded_instr_valid_i) // only advance if we decoded instruction + if (decoded_instr_valid_i && ~full_o) // only advance if we decoded instruction and we are not full top_pointer_qq <= top_pointer_q; end end diff --git a/tb/wave/wave_core.do b/tb/wave/wave_core.do index f916e25f9..0508adff1 100644 --- a/tb/wave/wave_core.do +++ b/tb/wave/wave_core.do @@ -1,20 +1,4 @@ -add wave -noupdate -group instr_if /core_tb/instr_if/* - -add wave -noupdate -group Core /core_tb/dut/clk_i -add wave -noupdate -group Core /core_tb/dut/clock_en_i -add wave -noupdate -group Core /core_tb/dut/test_en_i -add wave -noupdate -group Core /core_tb/dut/fetch_enable_i -add wave -noupdate -group Core /core_tb/dut/core_busy_o -add wave -noupdate -group Core /core_tb/dut/ext_perf_counters_i -add wave -noupdate -group Core /core_tb/dut/boot_addr_i -add wave -noupdate -group Core /core_tb/dut/core_id_i -add wave -noupdate -group Core /core_tb/dut/cluster_id_i -add wave -noupdate -group Core /core_tb/dut/irq_i -add wave -noupdate -group Core /core_tb/dut/irq_id_i -add wave -noupdate -group Core /core_tb/dut/irq_ack_o -add wave -noupdate -group Core /core_tb/dut/irq_sec_i -add wave -noupdate -group Core /core_tb/dut/sec_lvl_o - +add wave -noupdate -group core /core_tb/dut/* add wave -noupdate -group pcgen_stage -group btb /core_tb/dut/pcgen_i/btb_i/* add wave -noupdate -group pcgen_stage /core_tb/dut/pcgen_i/* add wave -noupdate -group if_stage -group prefetch_buffer -group fifo /core_tb/dut/if_stage_i/prefetch_buffer_i/fifo_i/* @@ -23,11 +7,14 @@ add wave -noupdate -group if_stage /core_tb/dut/if_stage_i/* add wave -noupdate -group id_stage -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/* add wave -noupdate -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/* add wave -noupdate -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/* +add wave -noupdate -group id_stage /core_tb/dut/id_stage_i/* add wave -noupdate -group ex_stage -group ALU /core_tb/dut/ex_stage_i/alu_i/* add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/* +add wave -noupdate -group ex_stage -group branch_engine /core_tb/dut/ex_stage_i/branch_engine_i/* add wave -noupdate -group ex_stage -expand -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/* add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/* add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/* add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/* +add wave -noupdate -group controller /core_tb/dut/controller_i/* TreeUpdate [SetDefaultTree] diff --git a/test/add_test.S b/test/add_test.S index aa79ea501..e90108cec 100755 --- a/test/add_test.S +++ b/test/add_test.S @@ -5,6 +5,7 @@ addi x1, x0, 1 addi x2, x0, 1 add x3, x1, x2 + add x3, x1, x2 add x4, x2, x3 add x5, x3, x4 add x6, x4, x5 @@ -13,6 +14,15 @@ csrw mstatus, x7 add x9, x7, x8 csrr x1, mstatus + jal L1 nop nop - nop \ No newline at end of file + nop + nop + nop + nop + nop +L1: nop + nop + nop + addi x1, x0, 55 \ No newline at end of file From b0ebde437c36b0a0bdcf7a04e94ee7abaa7cbcd2 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 11 May 2017 10:46:53 +0200 Subject: [PATCH 09/43] :green_heart: Fix scoreboard underflow bug --- src/scoreboard.sv | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 2060621a2..d789c767a 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -225,6 +225,10 @@ always_comb begin : issue_instruction issue_instr_valid_o = 1'b1; else issue_instr_valid_o = 1'b0; + end else if (issue_pointer_q == commit_pointer_q) begin + // commit and issue pointer are the same, so we are waiting + // for instructions to be written back + issue_instr_valid_o = 1'b0; end else begin // issue pointer has not overflowed if (pointer_overflow) issue_instr_valid_o = 1'b1; From d624a4163c2bd202391da8df03026abb79eedd1c Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 11 May 2017 10:57:26 +0200 Subject: [PATCH 10/43] Increase prefetch depth from 3 to 4 --- Makefile | 2 +- src/fetch_fifo.sv | 2 +- tb/scoreboard_tb.sv | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 352f75230..b6aeff4c6 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ tbs = $(wildcard tb/*_tb.sv) # Search here for include files (e.g.: non-standalone components) incdir = ./includes # Test case to run -test_case = alu_test +test_case = core_test # QuestaSim Version questa_version = -10.5c compile_flag = +cover=bcfst+/dut diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index ec9e404bb..455613a90 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -49,7 +49,7 @@ module fetch_fifo // included. This signal is available immediately as it comes directly out of FFs ); - localparam DEPTH = 3; // must be 3 or greater + localparam DEPTH = 4; // must be 3 or greater /* verilator lint_off LITENDIAN */ // index 0 is used for output logic [0:DEPTH-1] [63:0] addr_n, addr_int, addr_Q; diff --git a/tb/scoreboard_tb.sv b/tb/scoreboard_tb.sv index ded47b41c..fff661d9f 100644 --- a/tb/scoreboard_tb.sv +++ b/tb/scoreboard_tb.sv @@ -110,7 +110,6 @@ module scoreboard_tb; @(scoreboard_if.mck); - // if we are not full then load another instruction if (scoreboard_if.issue_instr_valid == 1'b1) begin scoreboard_if.mck.issue_ack <= 1'b1; issue_instruction <= scoreboard_if.mck.issue_instr; From 2b790db79bdc11cbcc6d60e809171289fbc30896 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 11 May 2017 11:11:51 +0200 Subject: [PATCH 11/43] Add support for compressed branches --- src/ariane.sv | 2 ++ src/branch_engine.sv | 7 +++-- src/ex_stage.sv | 2 ++ src/id_stage.sv | 1 + src/issue_read_operands.sv | 60 ++++++++++++++++++++------------------ 5 files changed, 41 insertions(+), 31 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 9f7e39b64..1a2abc6e5 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -283,6 +283,7 @@ module ariane .imm_o ( imm_id_ex ), .trans_id_o ( trans_id_id_ex ), .pc_o ( pc_id_ex ), + .is_compressed_instr_o ( is_compressed_instr_id_ex ), // ALU .alu_ready_i ( alu_ready_ex_id ), .alu_valid_o ( alu_valid_id_ex ), @@ -330,6 +331,7 @@ module ariane .imm_i ( imm_id_ex ), .trans_id_i ( trans_id_id_ex ), .pc_i ( pc_id_ex ), + .is_compressed_instr_i ( is_compressed_instr_id_ex ), // ALU .alu_ready_o ( alu_ready_ex_id ), .alu_valid_i ( alu_valid_id_ex ), diff --git a/src/branch_engine.sv b/src/branch_engine.sv index cc5521fa4..ef92f0f86 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -23,6 +23,7 @@ module branch_engine ( input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, input logic [63:0] pc_i, + input logic is_compressed_instr_i, input logic valid_i, input logic comparison_result_i, // result of comparison @@ -47,14 +48,14 @@ module branch_engine ( branchpredict_o.pc = pc_i; // calculate target address simple 64 bit addition target_address = $signed(operand_a_i) + $signed(operand_b_i); - // write target address - branchpredict_o.target_address = target_address; + // write target address which goes to pc gen + branchpredict_o.target_address = (comparison_result_i) ? target_address : pc_i + (is_compressed_instr_i) ? 64'h2 : 64'h4; branchpredict_o.is_taken = comparison_result_i; // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address[1:0] == 2'b0) begin if ( target_address != predict_address_i // we mis-predicted the address of the branch || predict_taken_i != comparison_result_i // we mis-predicted the outcome of the branch - || predict_branch_valid_i == 1'b0 // this means branch-prediction thought it was no branch but in real it was one + || predict_branch_valid_i == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one ) begin branchpredict_o.is_mispredict = 1'b1; end diff --git a/src/ex_stage.sv b/src/ex_stage.sv index ba8dc6f18..81b212739 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -33,6 +33,8 @@ module ex_stage #( input logic [63:0] imm_i, input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic [63:0] pc_i, // PC of current instruction + input logic is_compressed_instr_i, // we need to know if this was a compressed instruction + // in order to calculate the next PC on a mis-predict // ALU 1 output logic alu_ready_o, // FU is ready input logic alu_valid_i, // Output is valid diff --git a/src/id_stage.sv b/src/id_stage.sv index 725a034d0..ec57bfd69 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -43,6 +43,7 @@ module id_stage #( output logic [63:0] imm_o, output logic [TRANS_ID_BITS-1:0] trans_id_o, output logic [63:0] pc_o, + output logic is_compressed_instr_o, input logic alu_ready_i, output logic alu_valid_o, diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 504941c09..9a161d0ee 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -46,6 +46,7 @@ module issue_read_operands ( output logic [63:0] imm_o, // output immediate for the LSU output logic [TRANS_ID_BITS-1:0] trans_id_o, output logic [63:0] pc_o, + output logic is_compressed_instr_o, // ALU 1 input logic alu_ready_i, // FU is ready output logic alu_valid_o, // Output is valid @@ -74,6 +75,7 @@ module issue_read_operands ( operand_b_n, operand_b_q, operand_c_n, operand_c_q, imm_n, imm_q; + logic alu_valid_n, alu_valid_q; logic mult_valid_n, mult_valid_q; logic lsu_valid_n, lsu_valid_q; @@ -232,15 +234,15 @@ module issue_read_operands ( JAL: begin operator_n = ADD; // output 4 as operand b as we - // need to save PC + 4 - operand_b_n = 64'h4; + // need to save PC + 4 or in case of a compressed instruction PC + 4 + operand_b_n = (issue_instr_i.is_compressed) ? 64'h2 : 64'h4; end JALR: begin operator_n = ADD; // output 4 as operand b as we - // need to save PC + 4 - operand_b_n = 64'h4; + // need to save PC + 4 or in case of a compressed instruction PC + 4 + operand_b_n = (issue_instr_i.is_compressed) ? 64'h2 : 64'h4; // get RS1 as operand C operand_c_n = operand_a_regfile; // forward rs1 @@ -304,31 +306,33 @@ module issue_read_operands ( // Registers always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin - operand_a_q <= '{default: 0}; - operand_b_q <= '{default: 0}; - operand_c_q <= '{default: 0}; - imm_q <= 64'b0; - alu_valid_q <= 1'b0; - branch_valid_q <= 1'b0; - mult_valid_q <= 1'b0; - lsu_valid_q <= 1'b0; - csr_valid_q <= 1'b0; - operator_q <= ADD; - trans_id_q <= 5'b0; - pc_o <= 64'b0; + operand_a_q <= '{default: 0}; + operand_b_q <= '{default: 0}; + operand_c_q <= '{default: 0}; + imm_q <= 64'b0; + alu_valid_q <= 1'b0; + branch_valid_q <= 1'b0; + mult_valid_q <= 1'b0; + lsu_valid_q <= 1'b0; + csr_valid_q <= 1'b0; + operator_q <= ADD; + trans_id_q <= 5'b0; + pc_o <= 64'b0; + is_compressed_instr_o <= 1'b0; end else begin - operand_a_q <= operand_a_n; - operand_b_q <= operand_b_n; - operand_c_q <= operand_c_n; - imm_q <= imm_n; - alu_valid_q <= alu_valid_n; - branch_valid_q <= branch_valid_n; - mult_valid_q <= mult_valid_n; - lsu_valid_q <= lsu_valid_n; - csr_valid_q <= csr_valid_n; - operator_q <= operator_n; - trans_id_q <= trans_id_n; - pc_o <= issue_instr_i.pc; + operand_a_q <= operand_a_n; + operand_b_q <= operand_b_n; + operand_c_q <= operand_c_n; + imm_q <= imm_n; + alu_valid_q <= alu_valid_n; + branch_valid_q <= branch_valid_n; + mult_valid_q <= mult_valid_n; + lsu_valid_q <= lsu_valid_n; + csr_valid_q <= csr_valid_n; + operator_q <= operator_n; + trans_id_q <= trans_id_n; + pc_o <= issue_instr_i.pc; + is_compressed_instr_o <= issue_instr_i.is_compressed; end end endmodule From 742fb2b307fa18234f16778607413aa7e3344651 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 11 May 2017 13:26:20 +0200 Subject: [PATCH 12/43] Basic branching in place --- src/ariane.sv | 25 ++++++++++++++++----- src/controller.sv | 24 +++++++++++++++++--- src/id_stage.sv | 4 +++- src/if_stage.sv | 50 +++++++++++++++++++++++++++--------------- src/pcgen.sv | 6 +++++ src/prefetch_buffer.sv | 6 ++--- src/scoreboard.sv | 20 +++++++++++++---- 7 files changed, 99 insertions(+), 36 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 1a2abc6e5..cda25df5d 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -206,6 +206,10 @@ module ariane // * -> CTRL logic flush_csr_ctrl; + logic flush_unissued_instr_ctrl_id; + logic flush_scoreboard_ctrl_id; + logic flush_ctrl_if; + // TODO: Preliminary signal assignments logic flush_tlb; assign flush_tlb = 1'b0; @@ -232,7 +236,7 @@ module ariane // IF // --------- if_stage if_stage_i ( - .flush_i ( flush ), + .flush_i ( flush_ctrl_if ), .req_i ( fetch_enable ), .if_busy_o ( ), // ? .id_ready_i ( ready_id_if ), @@ -269,6 +273,8 @@ module ariane id_stage_i ( .test_en_i ( test_en_i ), .flush_i ( flush ), + .flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ), + .flush_scoreboard_i ( flush_scoreboard_ctrl_id ), .instruction_i ( instr_rdata_if_id ), .instruction_valid_i ( instr_valid_if_id ), .is_compressed_i ( is_compressed_if_id ), @@ -434,11 +440,18 @@ module ariane logic branchpredict_i; controller controller_i ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_commit_i ( flush_commit_i ), - .flush_csr_i ( flush_csr_ctrl ), - .branchpredict_i ( branchpredict ) + .flush_bp_o ( ), + .flush_scoreboard_o ( flush_scoreboard_ctrl_id ), + .flush_unissued_instr_o ( flush_unissued_instr_ctrl_id ), + .flush_if_o ( flush_ctrl_if ), + .flush_id_o ( ), + .flush_ex_o ( ), + + .flush_ready_lsu_i ( ), + .flush_commit_i ( flush_commit_i ), + .flush_csr_i ( flush_csr_ctrl ), + .branchpredict_i ( branchpredict ), + .* ); diff --git a/src/controller.sv b/src/controller.sv index 2510caf7e..4dd49968c 100644 --- a/src/controller.sv +++ b/src/controller.sv @@ -23,12 +23,30 @@ module controller ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low - input logic flush_commit_i, // flush request from commit stage in + output logic flush_bp_o, // flush branch prediction data structures + output logic flush_unissued_instr_o, + output logic flush_scoreboard_o, + output logic flush_if_o, + output logic flush_id_o, + output logic flush_ex_o, + + input logic flush_ready_lsu_i, // we need to wait for this signal from LSU + input logic flush_commit_i, // flush request from commit stage in input logic flush_csr_i, input branchpredict branchpredict_i ); + assign flush_bp_o = 1'b0; -// flush on mispredict + always_comb begin : flush_ctrl + flush_unissued_instr_o = 1'b0; + flush_scoreboard_o = 1'b0; + flush_if_o = 1'b0; + // flush on mispredict + if (branchpredict_i.is_mispredict) begin + flush_unissued_instr_o = 1'b1; + flush_if_o = 1'b1; + end -// flush on exception + end + // flush on exception endmodule diff --git a/src/id_stage.sv b/src/id_stage.sv index ec57bfd69..ed8c964c7 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -28,6 +28,8 @@ module id_stage #( input logic test_en_i, // Test Enable input logic flush_i, + input logic flush_unissued_instr_i, + input logic flush_scoreboard_i, // from IF input logic [31:0] instruction_i, input logic instruction_valid_i, @@ -167,7 +169,7 @@ module id_stage #( scoreboard_i ( .full_o ( full ), - .flush_i ( flush_i ), + .flush_i ( flush_scoreboard_i ), .rd_clobber_o ( rd_clobber_sb_iro ), .rs1_i ( rs1_iro_sb ), .rs1_o ( rs1_sb_iro ), diff --git a/src/if_stage.sv b/src/if_stage.sv index bf63d94dd..12e7d1ac6 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -97,8 +97,8 @@ module if_stage ( // Pre-fetch buffer, caches a fixed number of instructions prefetch_buffer prefetch_buffer_i ( .clk ( clk_i ), - .rst_n ( rst_ni ), - + .rst_n ( rst_ni ), + .flush_i ( flush_i ), .req_i ( req_i ), .branch_i ( branch_req ), // kill everything @@ -209,24 +209,38 @@ module if_stage ( end else begin - offset_fsm_cs <= offset_fsm_ns; - branch_valid_q <= branch_valid_n; - predict_address_q <= predict_address_n; - predict_taken_q <= predict_taken_n; + if (flush_i) begin + // offset FSM state + offset_fsm_cs <= IDLE; + instr_valid_id_o <= 1'b0; + instr_rdata_id_o <= '0; + illegal_c_insn_id_o <= 1'b0; + is_compressed_id_o <= 1'b0; + pc_id_o <= '0; + ex_o <= '{default: 0}; + branch_valid_q <= 1'b0; + predict_address_q <= 64'b0; + predict_taken_q <= 1'b0; + end else begin - if (if_valid) begin - instr_valid_id_o <= 1'b1; - instr_rdata_id_o <= instr_decompressed; - illegal_c_insn_id_o <= illegal_c_insn; - is_compressed_id_o <= instr_compressed_int; - pc_id_o <= pc_if_o; - ex_o.cause <= 64'b0; // TODO: Output exception - ex_o.tval <= 64'b0; // TODO: Output exception - ex_o.valid <= 1'b0; // TODO: Output exception - end else if (clear_instr_valid_i) begin - instr_valid_id_o <= 1'b0; + offset_fsm_cs <= offset_fsm_ns; + branch_valid_q <= branch_valid_n; + predict_address_q <= predict_address_n; + predict_taken_q <= predict_taken_n; + + if (if_valid) begin + instr_valid_id_o <= 1'b1; + instr_rdata_id_o <= instr_decompressed; + illegal_c_insn_id_o <= illegal_c_insn; + is_compressed_id_o <= instr_compressed_int; + pc_id_o <= pc_if_o; + ex_o.cause <= 64'b0; // TODO: Output exception + ex_o.tval <= 64'b0; // TODO: Output exception + ex_o.valid <= 1'b0; // TODO: Output exception + end else if (clear_instr_valid_i) begin + instr_valid_id_o <= 1'b0; + end end - end end diff --git a/src/pcgen.sv b/src/pcgen.sv index d5619d69d..ac67543fc 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -84,6 +84,12 @@ module pcgen ( end // 1.Debug + // 3. Control flow change request + if (branchpredict_i.is_mispredict) begin + set_pc_n = 1'b1; + // we already got the correct target address + npc_n = branchpredict_i.target_address; + end // 2. Exception if (ex_i.valid) begin npc_n = trap_vector_base_i; diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index 86b520cc6..fc47edbe4 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -26,6 +26,7 @@ module prefetch_buffer ( input logic clk, input logic rst_n, + input logic flush_i, input logic req_i, @@ -97,11 +98,8 @@ module prefetch_buffer //--------------- assign fetch_addr = {instr_addr_q[63:2], 2'b00} + 64'd4; + assign fifo_clear = branch_i || flush_i; - always_comb - begin - fifo_clear = branch_i; - end //------------------------- // Instruction fetch FSM diff --git a/src/scoreboard.sv b/src/scoreboard.sv index d789c767a..697762bda 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -29,7 +29,8 @@ module scoreboard #( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low output logic full_o, // We can't take anymore data - input logic flush_i, + input logic flush_i, // flush whole scoreboard + input logic flush_unissued_instr_i, // list of clobbered registers to issue stage output fu_t [31:0] rd_clobber_o, @@ -199,8 +200,12 @@ always_comb begin : push_instruction_and_wb end end end - - // flush signal + // flush all instructions which are not issued, e.g. set the top pointer back to the issue pointer + // -> everything we decoded so far was garbage + if (flush_unissued_instr_i) begin + top_pointer_n = issue_pointer_q; + end + // flush signal, e.g.: flush everything we need to backtrack after an exception if (flush_i) mem_n = '{default: 0}; @@ -246,6 +251,10 @@ always_comb begin : issue_instruction issue_pointer_n = issue_pointer_q + 1; end + // if we are flushing we should not issue the current instruction + if (flush_unissued_instr_i) + issue_instr_valid_o = 1'b0; + end // commit instruction: remove from scoreboard, advance pointer @@ -279,7 +288,10 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : sequential top_pointer_q <= top_pointer_n; mem_q <= mem_n; if (decoded_instr_valid_i && ~full_o) // only advance if we decoded instruction and we are not full - top_pointer_qq <= top_pointer_q; + if (flush_unissued_instr_i) + top_pointer_qq <= top_pointer_n; + else + top_pointer_qq <= top_pointer_q; end end From 5ec741da4cd7e898aa33907686e9a0611c50a43f Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 11 May 2017 18:18:13 +0200 Subject: [PATCH 13/43] :bug: Branching in frontend works now --- src/btb.sv | 15 +++++++----- src/if_stage.sv | 62 +++++++++++++++++++++---------------------------- src/lsu.sv | 2 +- src/pcgen.sv | 25 +++++++++++++++++--- src/tlb.sv | 21 ++++------------- test/add_test.S | 6 ++--- 6 files changed, 65 insertions(+), 66 deletions(-) diff --git a/src/btb.sv b/src/btb.sv index 8df46cb06..0f669c611 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -68,13 +68,15 @@ module btb #( btb_n[$unsigned(update_pc)].valid = 1'b1; // update saturation counter // first check if counter is already saturated in the positive regime e.g.: branch taken - if (saturation_counter == {BITS_SATURATION_COUNTER{1'b1}} && ~branchpredict_i.is_taken) begin + if (saturation_counter == {BITS_SATURATION_COUNTER{1'b1}}) begin // we can safely decrease it - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter - 1; + if (~branchpredict_i.is_taken) + btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter - 1; // then check if it saturated in the negative regime e.g.: branch not taken - end else if (saturation_counter == {BITS_SATURATION_COUNTER{1'b0}} && branchpredict_i.is_taken) begin + end else if (saturation_counter == {BITS_SATURATION_COUNTER{1'b0}}) begin // we can safely increase it - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; + if (branchpredict_i.is_taken) + btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; end else begin // otherwise we are not in any boundaries and can decrease or increase it if (branchpredict_i.is_taken) btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; @@ -89,8 +91,9 @@ module btb #( // sequential process always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin - // TODO: think about the reset value - btb_q <= '{default: 0}; + // Bias the branches to be taken upon first arrival + for (int i = 0; i < NR_ENTRIES; i++) + btb_q[i] <= '{1'b0, 64'b0, 2'b10}; end else begin // evict all entries if (flush_i) begin diff --git a/src/if_stage.sv b/src/if_stage.sv index 12e7d1ac6..14b05ef19 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -38,7 +38,7 @@ module if_stage ( input logic halt_if_i, // pipeline stall // ctrl flow instruction in input logic [63:0] fetch_addr_i, - input logic set_pc_i, // set new PC + input logic set_pc_i, // set new PC input logic is_branch_i, // the new PC was a branch e.g.: branch or jump // branchpredict out output logic branch_valid_o, @@ -170,24 +170,23 @@ module if_stage ( // We need to pass those registers on to ID in the case we've set // a new branch target (or jump) and we got a valid instruction always_comb begin - branch_valid_n = branch_valid_q; + // this is the latch case we keep the values predict_address_n = predict_address_q; predict_taken_n = predict_taken_q; - // we got a branch redirect from PCGEN - if (is_branch_i) begin - // set the registers to the correct address - branch_valid_n = 1'b1; + branch_valid_n = branch_valid_q; + // a new branch target has been set by PCGEN + // save this in the register stage + if (set_pc_i && is_branch_i) begin predict_address_n = fetch_addr_i; // whether we took the branch or not can be seen from the set PC // nevertheless we also need to keep branches not taken predict_taken_n = set_pc_i; - end - // we have a valid instruction and id excepted it so we consider all the - // branch information to be sampled correctly - if (if_valid && clear_instr_valid_i) begin - branch_valid_n = 1'b0; + branch_valid_n = is_branch_i; end + if (if_valid) begin + branch_valid_n = is_branch_i; + end end // -------------------------------------------------------------- @@ -209,38 +208,29 @@ module if_stage ( end else begin - if (flush_i) begin - // offset FSM state - offset_fsm_cs <= IDLE; - instr_valid_id_o <= 1'b0; - instr_rdata_id_o <= '0; - illegal_c_insn_id_o <= 1'b0; - is_compressed_id_o <= 1'b0; - pc_id_o <= '0; - ex_o <= '{default: 0}; - branch_valid_q <= 1'b0; - predict_address_q <= 64'b0; - predict_taken_q <= 1'b0; - end else begin - offset_fsm_cs <= offset_fsm_ns; - branch_valid_q <= branch_valid_n; predict_address_q <= predict_address_n; predict_taken_q <= predict_taken_n; + branch_valid_q <= branch_valid_n; if (if_valid) begin - instr_valid_id_o <= 1'b1; - instr_rdata_id_o <= instr_decompressed; - illegal_c_insn_id_o <= illegal_c_insn; - is_compressed_id_o <= instr_compressed_int; - pc_id_o <= pc_if_o; - ex_o.cause <= 64'b0; // TODO: Output exception - ex_o.tval <= 64'b0; // TODO: Output exception - ex_o.valid <= 1'b0; // TODO: Output exception + // in case of a flush simply say that the next instruction + // is not valid anymore + if (flush_i) begin + instr_valid_id_o <= 1'b0; + end else + instr_valid_id_o <= 1'b1; + instr_rdata_id_o <= instr_decompressed; + illegal_c_insn_id_o <= illegal_c_insn; + is_compressed_id_o <= instr_compressed_int; + pc_id_o <= pc_if_o; + ex_o.cause <= 64'b0; // TODO: Output exception + ex_o.tval <= 64'b0; // TODO: Output exception + ex_o.valid <= 1'b0; // TODO: Output exception end else if (clear_instr_valid_i) begin - instr_valid_id_o <= 1'b0; + instr_valid_id_o <= 1'b0; end - end + end end diff --git a/src/lsu.sv b/src/lsu.sv index 6ec80f70b..228c3c31b 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -170,7 +170,7 @@ module lsu #( .INSTR_TLB_ENTRIES ( 16 ), .DATA_TLB_ENTRIES ( 16 ), .ASID_WIDTH ( ASID_WIDTH ) - ) i_mmu ( + ) mmu_i ( .lsu_req_i ( translation_req ), .lsu_vaddr_i ( vaddr ), .lsu_valid_o ( translation_valid ), diff --git a/src/pcgen.sv b/src/pcgen.sv index ac67543fc..71acd2349 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -29,7 +29,7 @@ module pcgen ( // to IF output logic [63:0] pc_if_o, // new PC output logic set_pc_o, // request the PC to be set to pc_if_o - output logic is_branch_o, // to check if we branchpredicted we need to save whether this was a branch or not + output logic is_branch_o, // to check if we branchpredicted we need to save whether this was a branch or not <- LOL // global input input logic [63:0] boot_addr_i, // CSR input @@ -44,18 +44,36 @@ module pcgen ( logic is_branch; logic is_branch_n, is_branch_q; logic set_pc_n, set_pc_q; + // pc which is used to look up the prediction in the BTB + logic [63:0] predict_pc; assign pc_if_o = npc_q; assign set_pc_o = set_pc_q; assign is_branch_o = is_branch_q; + // Predict PC source select + // the PC which we use for lookup in the BTB can come from two sources: + // 1. PC from if stage plus + 4 + // 2. or PC which we just predicted + 4 + always_comb begin : pc_btb_lookup + // Ad 2: From PC of previous cycle (which is now in IF) + if (set_pc_q) begin + predict_pc = npc_q; + // Ad 1: + // in the previous cycle we set the PC to npc_q + // calculate the plus one version + end else begin + predict_pc = {pc_if_i[62:2], 2'b0} + 64'h4; + end + end + btb #( .NR_ENTRIES(64), .BITS_SATURATION_COUNTER(2) ) btb_i ( - .vpc_i ( pc_if_i ), + .vpc_i ( predict_pc ), .branchpredict_i ( branchpredict_i ), .is_branch_o ( is_branch ), .predict_taken_o ( predict_taken ), @@ -79,7 +97,8 @@ module pcgen ( set_pc_n = 1'b0; is_branch_n = is_branch; // 4. Predict taken - if (predict_taken) begin + if (is_branch && predict_taken) begin + set_pc_n = 1'b1; npc_n = branch_target_address; end // 1.Debug diff --git a/src/tlb.sv b/src/tlb.sv index 1bdc9bbf2..c464a9227 100644 --- a/src/tlb.sv +++ b/src/tlb.sv @@ -61,16 +61,13 @@ module tlb #( logic [8:0] vpn0, vpn1, vpn2; logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy - // register signals - logic lu_access_q; - logic [63:0] lu_vaddr_q; //------------- // Translation //------------- always_comb begin : translation - vpn0 = lu_vaddr_q[20:12]; - vpn1 = lu_vaddr_q[29:21]; - vpn2 = lu_vaddr_q[38:30]; + vpn0 = lu_vaddr_i[20:12]; + vpn1 = lu_vaddr_i[29:21]; + vpn2 = lu_vaddr_i[38:30]; // default assignment lu_hit = '{default: 0}; @@ -168,7 +165,7 @@ module tlb #( // endcase for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin // we got a hit so update the pointer as it was least recently used - if (lu_hit[i] & lu_access_q) begin + if (lu_hit[i] & lu_access_i) begin // Set the nodes to the values we would expect for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin automatic int unsigned idx_base = $unsigned((2**lvl)-1); @@ -225,16 +222,6 @@ module tlb #( plru_tree_q <= plru_tree_n; end end - // sequential process - always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ - if(~rst_ni) begin - lu_access_q <= 1'b0; - lu_vaddr_q <= 64'b0; - end else begin - lu_access_q <= lu_access_i; - lu_vaddr_q <= lu_vaddr_i; - end - end //-------------- // Sanity checks //-------------- diff --git a/test/add_test.S b/test/add_test.S index e90108cec..64c23bbcc 100755 --- a/test/add_test.S +++ b/test/add_test.S @@ -14,15 +14,15 @@ csrw mstatus, x7 add x9, x7, x8 csrr x1, mstatus - jal L1 + nop +L0: jal L1 nop nop nop nop nop nop - nop -L1: nop +L1: jal L0 nop nop addi x1, x0, 55 \ No newline at end of file From 0a81031e5a94bc76aef9159c26a7ac7ec042a2ec Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 11 May 2017 20:31:14 +0200 Subject: [PATCH 14/43] Add IF <-> ID acknowledge signal --- include/ariane_pkg.svh | 10 +++++- src/ariane.sv | 3 ++ src/id_stage.sv | 5 +-- src/if_stage.sv | 78 ++++++++++++++++++++++++++++-------------- src/scoreboard.sv | 10 ++++-- 5 files changed, 75 insertions(+), 31 deletions(-) diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 8e892d4d6..c1aa822e3 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -38,7 +38,7 @@ package ariane_pkg; logic valid; } exception; - // miss-predict + // branch-predict typedef struct packed { logic [63:0] pc; // pc of predict or mis-predict logic [63:0] target_address; // target address at which to jump, or not @@ -47,6 +47,13 @@ package ariane_pkg; logic valid; // prediction with all its values is valid } branchpredict; + // branchpredict scoreboard entry + typedef struct packed { + logic [63:0] predict_address_i; // target address at which to jump, or not + logic predict_taken_i; // set if this was a mis-predict + logic valid; // branch is taken + } branchpredict_sbe; + typedef enum logic[3:0] { NONE, LSU, ALU, CTRL_FLOW, MULT, CSR } fu_t; @@ -92,6 +99,7 @@ package ariane_pkg; logic use_zimm; // use zimm as operand a logic use_pc; // set if we need to use the PC as operand a, PC from exception exception ex; // exception has occurred + branchpredict_sbe bp; // branch predict scoreboard data structure logic is_compressed; // signals a compressed instructions, we need this information at the commit stage if // we want jump accordingly e.g.: +4, +2 } scoreboard_entry; diff --git a/src/ariane.sv b/src/ariane.sv index cda25df5d..78bbcb104 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -107,6 +107,7 @@ module ariane logic [31:0] fetch_rdata_id_if; logic instr_valid_if_id; logic [31:0] instr_rdata_if_id; + logic decode_ack_id_if; logic illegal_c_insn_if_id; logic is_compressed_if_id; logic illegal_c_insn_id_if; @@ -252,6 +253,7 @@ module ariane .instr_gnt_i ( fetch_gnt_ex_if ), .instr_rvalid_i ( fetch_valid_ex_if ), .instr_rdata_i ( fetch_rdata_ex_if ), + .instr_ack_i ( decode_ack_id_if ), .instr_valid_id_o ( instr_valid_if_id ), .instr_rdata_id_o ( instr_rdata_if_id ), @@ -277,6 +279,7 @@ module ariane .flush_scoreboard_i ( flush_scoreboard_ctrl_id ), .instruction_i ( instr_rdata_if_id ), .instruction_valid_i ( instr_valid_if_id ), + .decoded_instr_ack_o ( decode_ack_id_if ), .is_compressed_i ( is_compressed_if_id ), .pc_if_i ( pc_id_if_id ), // PC from if .ex_if_i ( exception_if_id ), // exception from if diff --git a/src/id_stage.sv b/src/id_stage.sv index ed8c964c7..6d62156dc 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -33,6 +33,7 @@ module id_stage #( // from IF input logic [31:0] instruction_i, input logic instruction_valid_i, + output logic decoded_instr_ack_o, input logic is_compressed_i, input logic [63:0] pc_if_i, input exception ex_if_i, // we already got an exception in IF @@ -54,7 +55,7 @@ module id_stage #( input logic [63:0] predict_address_i, input logic predict_taken_i, // Branch predict Out - output logic branch_valid_o, + output logic branch_valid_o, // use the branch engine output logic predict_branch_valid_o, // this is a valid prediction output logic [63:0] predict_address_o, output logic predict_taken_o, @@ -145,7 +146,7 @@ module id_stage #( end // we are ready if we are not full and don't have any unresolved branches, but it can be // the case that we have an unresolved branch which is cleared in that cycle (branchpredict_i.valid == 1) - assign ready_o = ~full & (~unresolved_branch_q || branchpredict_i.valid); + assign ready_o = ~full && (~unresolved_branch_q || branchpredict_i.valid) && ~(instruction_valid_i && is_control_flow_instr); // output branch prediction bits assign predict_branch_valid_o = branch_valid_q; assign predict_address_o = predict_address_q; diff --git a/src/if_stage.sv b/src/if_stage.sv index 14b05ef19..dc4e7e0cf 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -53,6 +53,7 @@ module if_stage ( // Output of IF Pipeline stage output logic instr_valid_id_o, // instruction in IF/ID pipeline is valid output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding + input logic instr_ack_i, output logic is_compressed_id_o, // compressed decoder thinks this is a compressed instruction output logic illegal_c_insn_id_o, // compressed decoder thinks this is an invalid instruction output logic [63:0] pc_if_o, @@ -76,7 +77,7 @@ module if_stage ( logic predict_taken_n, predict_taken_q; // offset FSM - enum logic[0:0] {WAIT, IDLE} offset_fsm_cs, offset_fsm_ns; + enum logic[1:0] {WAIT, IDLE, WAIT_BRANCHED} offset_fsm_cs, offset_fsm_ns; logic [31:0] instr_decompressed; logic illegal_c_insn; logic instr_compressed_int; @@ -132,22 +133,55 @@ module if_stage ( // no valid instruction data for ID stage // assume aligned IDLE: begin - if (req_i) begin - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end + if (req_i) begin + branch_req = 1'b1; + offset_fsm_ns = WAIT; + end + + // take care of control flow changes + if (set_pc_i) begin + valid = 1'b0; + // switch to new PC from ID stage + branch_req = 1'b1; + offset_fsm_ns = WAIT; + end end // serving aligned 32 bit or 16 bit instruction, we don't know yet WAIT: begin - if (fetch_valid) begin - valid = 1'b1; // an instruction is ready for ID stage + if (fetch_valid) begin + valid = 1'b1; // an instruction is ready for ID stage - if (req_i && if_valid) begin - fetch_ready = 1'b1; - offset_fsm_ns = WAIT; + if (req_i && if_valid) begin + fetch_ready = 1'b1; + offset_fsm_ns = WAIT; + end + end + // take care of control flow changes + if (set_pc_i) begin + valid = 1'b0; + // switch to new PC from ID stage + branch_req = 1'b1; + offset_fsm_ns = WAIT_BRANCHED; + end + end + // we just branched so keep this instruction as valid + WAIT_BRANCHED: begin + if (fetch_valid) begin + valid = 1'b1; // an instruction is ready for ID stage + + if (req_i && if_valid) begin + fetch_ready = 1'b1; + offset_fsm_ns = WAIT; + end + end + + // take care of control flow changes + if (set_pc_i) begin + // switch to new PC from ID stage + branch_req = 1'b1; + offset_fsm_ns = WAIT_BRANCHED; end - end end default: begin @@ -155,13 +189,7 @@ module if_stage ( end endcase - // take care of control flow changes - if (set_pc_i) begin - valid = 1'b0; - // switch to new PC from ID stage - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end + end // ------------- @@ -208,10 +236,10 @@ module if_stage ( end else begin - offset_fsm_cs <= offset_fsm_ns; - predict_address_q <= predict_address_n; - predict_taken_q <= predict_taken_n; - branch_valid_q <= branch_valid_n; + offset_fsm_cs <= offset_fsm_ns; + predict_address_q <= predict_address_n; + predict_taken_q <= predict_taken_n; + branch_valid_q <= branch_valid_n; if (if_valid) begin // in case of a flush simply say that the next instruction @@ -227,7 +255,8 @@ module if_stage ( ex_o.cause <= 64'b0; // TODO: Output exception ex_o.tval <= 64'b0; // TODO: Output exception ex_o.valid <= 1'b0; // TODO: Output exception - end else if (clear_instr_valid_i) begin + // id stage acknowledged + end else if (instr_ack_i) begin instr_valid_id_o <= 1'b0; end @@ -236,8 +265,7 @@ module if_stage ( // Assignments assign pc_if_o = fetch_addr; - // id stage acknowledged - assign clear_instr_valid_i = id_ready_i; + assign if_ready = valid & id_ready_i; assign if_valid = (~halt_if_i) & if_ready; assign if_busy_o = prefetch_busy; diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 697762bda..7058cbbb3 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -51,6 +51,7 @@ module scoreboard #( // we can always put this instruction to the to p unless we signal with asserted full_o input dtype decoded_instr_i, input logic decoded_instr_valid_i, + output logic decoded_instr_ack_o, // instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer output dtype issue_instr_o, @@ -176,14 +177,17 @@ end // write-back instruction: update value of RD register in scoreboard always_comb begin : push_instruction_and_wb // default assignment - top_pointer_n = top_pointer_q; - mem_n = mem_q; + top_pointer_n = top_pointer_q; + mem_n = mem_q; + // acknowledge decoded instruction + decoded_instr_ack_o = 1'b0; // if we are not full we can push a new instruction if (~full_o && decoded_instr_valid_i) begin mem_n[$unsigned(top_pointer_q)] = decoded_instr_i; // label the transaction ID with the current top pointer mem_n[$unsigned(top_pointer_q)].trans_id = top_pointer_q; - top_pointer_n = top_pointer_q + 1; + top_pointer_n = top_pointer_q + 1; + decoded_instr_ack_o = 1'b1; end // write back: From 74c33bf1ba95bce77c6284d706503caeaf8840c3 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 12 May 2017 00:14:48 +0200 Subject: [PATCH 15/43] Removing unnecessary branch operators --- include/ariane_pkg.svh | 4 ++-- src/alu.sv | 37 ++++++++++--------------------- src/ariane.sv | 1 + src/branch_engine.sv | 18 +++++++++------ src/lsu.sv | 50 +++++++++++++++++++++--------------------- 5 files changed, 50 insertions(+), 60 deletions(-) diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index c1aa822e3..6443eb8ca 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -63,14 +63,14 @@ package ariane_pkg; // --------------- // EX Stage // --------------- - typedef enum logic [7:0] { // basic ALU op + typedef enum logic [5:0] { // basic ALU op ADD, SUB, ADDW, SUBW, // logic operations XORL, ORL, ANDL, // shifts SRA, SRL, SLL, SRLW, SLLW, SRAW, // comparisons - LTS, LTU, LES, LEU, GTS, GTU, GES, GEU, EQ, NE, + LTS, LTU, GES, GEU, EQ, NE, // jumps JAL, JALR, // set lower than operations diff --git a/src/alu.sv b/src/alu.sv index c22f048e1..f11f75f97 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -58,11 +58,8 @@ module alu SUB, SUBW, // COMPARATOR OPs EQ, NE, - GTU, GEU, - LTU, LEU, - GTS, GES, - LTS, LES, - SLTS, SLTU, + GEU, LTU, + GES, LTS, SLETS, SLETU: adder_op_b_negate = 1'b1; default: ; @@ -150,12 +147,7 @@ module alu cmp_signed = 1'b0; unique case (operator_i) - GTS, - GES, - LTS, - LES, - SLTS, - SLETS: begin + GES, LTS: begin cmp_signed = 1'b1; end @@ -187,15 +179,12 @@ module alu cmp_result = 1'b1; unique case (operator_i) - EQ: cmp_result = is_equal; - NE: cmp_result = (~is_equal); - GTS, GTU: cmp_result = is_greater_equal && (~is_equal); + EQ: cmp_result = is_equal; + NE: cmp_result = (~is_equal); + // GTS, GTU: cmp_result = is_greater_equal && (~is_equal); GES, GEU: cmp_result = is_greater_equal; - LTS, SLTS, - LTU, SLTU: cmp_result = (~is_greater_equal); - SLETS, - SLETU, - LES, LEU: cmp_result = (~is_greater_equal) || is_equal; + LTS, LTU: cmp_result = (~is_greater_equal); + // LES, LEU: cmp_result = (~is_greater_equal) || is_equal; default: ; endcase @@ -228,13 +217,9 @@ module alu SRLW, SRAW: result_o = {{32{shift_result32[31]}}, shift_result32[31:0]}; // Comparison Operations - EQ, NE, - GTU, GEU, - LTU, LEU, - GTS, GES, - LTS, LES, - SLTS, SLTU, - SLETS, SLETU: result_o = {63'b0, cmp_result}; + EQ, NE, + LTU, GEU, + GES, LTS : result_o = {63'b0, cmp_result}; default: ; // default case to suppress unique warning endcase diff --git a/src/ariane.sv b/src/ariane.sv index 78bbcb104..85dc67927 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -127,6 +127,7 @@ module ariane logic [63:0] operand_b_id_ex; logic [63:0] operand_c_id_ex; logic [63:0] pc_id_ex; + logic is_compressed_instr_id_ex; // ALU logic alu_ready_ex_id; logic alu_valid_id_ex; diff --git a/src/branch_engine.sv b/src/branch_engine.sv index ef92f0f86..6eecc2227 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -34,6 +34,7 @@ module branch_engine ( output exception branch_ex_o // branch exception out ); logic [63:0] target_address; + logic [63:0] next_pc; always_comb begin : target_address_calc target_address = 64'b0; @@ -42,17 +43,20 @@ module branch_engine ( branchpredict_o.is_taken = 1'b0; branchpredict_o.valid = valid_i; branchpredict_o.is_mispredict = 1'b0; + // calculate next PC, depending on whether the instruction is compressed or not this may be different + next_pc = pc_i + (is_compressed_instr_i) ? 64'h2 : 64'h4; + // calculate target address simple 64 bit addition + target_address = $signed(operand_a_i) + $signed(operand_b_i); + // save pc + branchpredict_o.pc = pc_i; + // write target address which goes to pc gen + branchpredict_o.target_address = (comparison_result_i) ? target_address : next_pc; + branchpredict_o.is_taken = comparison_result_i; if (valid_i) begin - // save pc - branchpredict_o.pc = pc_i; - // calculate target address simple 64 bit addition - target_address = $signed(operand_a_i) + $signed(operand_b_i); - // write target address which goes to pc gen - branchpredict_o.target_address = (comparison_result_i) ? target_address : pc_i + (is_compressed_instr_i) ? 64'h2 : 64'h4; - branchpredict_o.is_taken = comparison_result_i; // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address[1:0] == 2'b0) begin + // TODO in case of branch which is not taken it is not necessary to check for the address if ( target_address != predict_address_i // we mis-predicted the address of the branch || predict_taken_i != comparison_result_i // we mis-predicted the outcome of the branch || predict_branch_valid_i == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one diff --git a/src/lsu.sv b/src/lsu.sv index 228c3c31b..d56e7a062 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -353,32 +353,32 @@ module lsu #( // essentially the same part as in IDLE but we can't accept a new store // as the store could immediately be performed and we would collide on the // trans id part (e.g.: a structural hazard) - if (op == LD_OP & lsu_valid_i) begin - translation_req = 1'b1; - // we can never handle a load in a single cycle - // but at least on a tlb hit we can output it to the memory - if (translation_valid) begin - // check if the address is in the store buffer otherwise we need - // to wait until the store buffer has cleared its entry - if (~address_match) begin - // lets request this read - data_req_i[1] = 1'b1; - // we already got a grant here so lets wait for the rvalid - if (data_gnt_o[1]) begin - NS = LOAD_WAIT_RVALID; - end else begin // we didn't get a grant so wait for it in a separate stage - NS = LOAD_WAIT_GNT; - end - end - end else begin// otherwise we need to wait for the translation - NS = LOAD_WAIT_TRANSLATION; - end - // STORE - end else if (op == ST_OP & lsu_valid_i) begin - NS = STORE; - end else begin + // if (op == LD_OP & lsu_valid_i) begin + // translation_req = 1'b1; + // // we can never handle a load in a single cycle + // // but at least on a tlb hit we can output it to the memory + // if (translation_valid) begin + // // check if the address is in the store buffer otherwise we need + // // to wait until the store buffer has cleared its entry + // if (~address_match) begin + // // lets request this read + // data_req_i[1] = 1'b1; + // // we already got a grant here so lets wait for the rvalid + // if (data_gnt_o[1]) begin + // NS = LOAD_WAIT_RVALID; + // end else begin // we didn't get a grant so wait for it in a separate stage + // NS = LOAD_WAIT_GNT; + // end + // end + // end else begin// otherwise we need to wait for the translation + // NS = LOAD_WAIT_TRANSLATION; + // end + // // STORE + // end else if (op == ST_OP & lsu_valid_i) begin + // NS = STORE; + // end else begin NS = IDLE; - end + // end end else begin // and stall From 5ffb9ef36cff401b47c5e3d93d998a02c904c1df Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 12 May 2017 10:42:03 +0200 Subject: [PATCH 16/43] :bug: Fix remove ALU operators --- include/ariane_pkg.svh | 2 +- src/alu.sv | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 6443eb8ca..8522d7bd9 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -74,7 +74,7 @@ package ariane_pkg; // jumps JAL, JALR, // set lower than operations - SLTS, SLTU, SLETS, SLETU, + SLTS, SLTU, // CSR functions MRET, SRET, URET, ECALL, CSR_WRITE, CSR_READ, CSR_SET, CSR_CLEAR, // LSU functions diff --git a/src/alu.sv b/src/alu.sv index f11f75f97..cdef04797 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -59,8 +59,7 @@ module alu // COMPARATOR OPs EQ, NE, GEU, LTU, - GES, LTS, - SLETS, SLETU: adder_op_b_negate = 1'b1; + GES, LTS: adder_op_b_negate = 1'b1; default: ; endcase @@ -147,7 +146,9 @@ module alu cmp_signed = 1'b0; unique case (operator_i) - GES, LTS: begin + GES, + LTS, + SLTS: begin cmp_signed = 1'b1; end @@ -181,11 +182,9 @@ module alu unique case (operator_i) EQ: cmp_result = is_equal; NE: cmp_result = (~is_equal); - // GTS, GTU: cmp_result = is_greater_equal && (~is_equal); GES, GEU: cmp_result = is_greater_equal; - LTS, LTU: cmp_result = (~is_greater_equal); - // LES, LEU: cmp_result = (~is_greater_equal) || is_equal; - + LTS, SLTS, + LTU, SLTU: cmp_result = (~is_greater_equal); default: ; endcase end @@ -217,9 +216,10 @@ module alu SRLW, SRAW: result_o = {{32{shift_result32[31]}}, shift_result32[31:0]}; // Comparison Operations - EQ, NE, - LTU, GEU, - GES, LTS : result_o = {63'b0, cmp_result}; + EQ, NE, + GEU, LTU, + LTS, GES, + SLTS, SLTU: result_o = {63'b0, cmp_result}; default: ; // default case to suppress unique warning endcase From ea8fde0a3f79f29250336acc6be65a487f2838ec Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 12 May 2017 12:22:56 +0200 Subject: [PATCH 17/43] Move branch comparisons to branch engine --- src/branch_engine.sv | 34 +++++++++++++++++++++++++++++----- src/ex_stage.sv | 11 +++-------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 6eecc2227..3542b45e9 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -20,13 +20,15 @@ import ariane_pkg::*; module branch_engine ( + input fu_op operator_i, input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, + input logic [63:0] operand_c_i, + input logic [63:0] imm_i, input logic [63:0] pc_i, input logic is_compressed_instr_i, input logic valid_i, - input logic comparison_result_i, // result of comparison input logic [63:0] predict_address_i, // this is the address we predicted input logic predict_branch_valid_i, // we predicted that this was a valid branch input logic predict_taken_i, @@ -35,6 +37,28 @@ module branch_engine ( ); logic [63:0] target_address; logic [63:0] next_pc; + logic comparison_result; // result of comparison + logic sgn; // sign extend + + always_comb begin : branch_resolve + // by default e.g.: when this is a jump, the branch is taken + // so set the comparison result to 1 + comparison_result = 1'b1; + // sign switch + sgn = 1'b1; + // if this is an unsigned operation clear the sign bit + // this should ease data-path extraction + if (operator_i inside {LTU, GEU}) + sgn = 1'b0; + // get the right comparison result + case (operator_i) + EQ: comparison_result = operand_a_i == operand_b_i; + NE: comparison_result = operand_a_i != operand_b_i; + LTS: comparison_result = ($signed({sgn & operand_a_i[63], operand_a_i}) < $signed({sgn & operand_b_i[63], operand_b_i})); + GES: comparison_result = ($signed({sgn & operand_a_i[63], operand_a_i}) >= $signed({sgn & operand_b_i[63], operand_b_i})); + default: comparison_result = 1'b1; + endcase + end always_comb begin : target_address_calc target_address = 64'b0; @@ -46,19 +70,19 @@ module branch_engine ( // calculate next PC, depending on whether the instruction is compressed or not this may be different next_pc = pc_i + (is_compressed_instr_i) ? 64'h2 : 64'h4; // calculate target address simple 64 bit addition - target_address = $signed(operand_a_i) + $signed(operand_b_i); + target_address = $signed(operand_c_i) + $signed(imm_i); // save pc branchpredict_o.pc = pc_i; // write target address which goes to pc gen - branchpredict_o.target_address = (comparison_result_i) ? target_address : next_pc; - branchpredict_o.is_taken = comparison_result_i; + branchpredict_o.target_address = (comparison_result) ? target_address : next_pc; + branchpredict_o.is_taken = comparison_result; if (valid_i) begin // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address[1:0] == 2'b0) begin // TODO in case of branch which is not taken it is not necessary to check for the address if ( target_address != predict_address_i // we mis-predicted the address of the branch - || predict_taken_i != comparison_result_i // we mis-predicted the outcome of the branch + || predict_taken_i != comparison_result // we mis-predicted the outcome of the branch || predict_branch_valid_i == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one ) begin branchpredict_o.is_mispredict = 1'b1; diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 81b212739..e0d99af37 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -43,11 +43,11 @@ module ex_stage #( output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back output exception alu_exception_o, // Branches and Jumps - input logic branch_valid_i, + input logic branch_valid_i, // we are using the branch unit input logic predict_branch_valid_i, input logic [63:0] predict_address_i, input logic predict_taken_i, - output branchpredict branchpredict_o, + output branchpredict branchpredict_o, // the branch engine uses the write back from the ALU // LSU output logic lsu_ready_o, // FU is ready input logic lsu_valid_i, // Input is valid @@ -99,8 +99,6 @@ module ex_stage #( output logic mult_ready_o, // FU is ready input logic mult_valid_i // Output is valid ); - // Wires - logic comparison_result_alu_branch; // ALU is a single cycle instructions, hence it is always ready assign alu_ready_o = 1'b1; @@ -113,8 +111,8 @@ module ex_stage #( .adder_result_o ( ), .adder_result_ext_o ( ), .result_o ( alu_result_o ), - .comparison_result_o ( comparison_result_alu_branch ), .is_equal_result_o ( ), + .comparison_result_o( ), .* ); @@ -122,10 +120,7 @@ module ex_stage #( // Branch Engine // -------------------- branch_engine branch_engine_i ( - .operand_a_i ( operand_c_i ), - .operand_b_i ( imm_i ), .valid_i ( branch_valid_i ), - .comparison_result_i ( comparison_result_alu_branch ), .branch_ex_o ( alu_exception_o ), .* ); From 03cc04f96abda2d89975f831a35eb2527de740be Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 12 May 2017 14:51:13 +0200 Subject: [PATCH 18/43] Change flush assignment in scoreboard --- src/scoreboard.sv | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 7058cbbb3..6a904dfd7 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -210,8 +210,11 @@ always_comb begin : push_instruction_and_wb top_pointer_n = issue_pointer_q; end // flush signal, e.g.: flush everything we need to backtrack after an exception - if (flush_i) - mem_n = '{default: 0}; + if (flush_i) begin + for (int i = 0; i < NR_ENTRIES; i++) begin + mem_n[i] = {$bits(scoreboard_entry){1'b0}}; + end + end end From b3173ab6f228cf373e089783cee0b7dad114312c Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 12 May 2017 14:54:21 +0200 Subject: [PATCH 19/43] Put flush to sequential process --- src/scoreboard.sv | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 6a904dfd7..9f945d27b 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -209,13 +209,6 @@ always_comb begin : push_instruction_and_wb if (flush_unissued_instr_i) begin top_pointer_n = issue_pointer_q; end - // flush signal, e.g.: flush everything we need to backtrack after an exception - if (flush_i) begin - for (int i = 0; i < NR_ENTRIES; i++) begin - mem_n[i] = {$bits(scoreboard_entry){1'b0}}; - end - end - end // issue instruction: advance the issue pointer @@ -282,13 +275,18 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : sequential commit_pointer_q <= '{default: 0}; top_pointer_q <= '{default: 0}; top_pointer_qq <= '{default: 0}; - mem_q <= '{default: 0}; + for (int i = 0; i < NR_ENTRIES; i++) begin + mem_q[i] <= {$bits(scoreboard_entry){1'b0}}; + end end else if (flush_i) begin // reset pointers on flush + // flush signal, e.g.: flush everything we need to backtrack after an exception issue_pointer_q <= '{default: 0}; commit_pointer_q <= '{default: 0}; top_pointer_q <= '{default: 0}; top_pointer_qq <= '{default: 0}; - mem_q <= '{default: 0}; + for (int i = 0; i < NR_ENTRIES; i++) begin + mem_q[i] <= {$bits(scoreboard_entry){1'b0}}; + end end else begin issue_pointer_q <= issue_pointer_n; commit_pointer_q <= commit_pointer_n; From 18090c8c1ccaee1fcbb9abfbdadaad48b461f044 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 12 May 2017 19:21:42 +0200 Subject: [PATCH 20/43] Fix issue #31 --- src/ariane.sv | 37 +++++++-------------- src/branch_engine.sv | 67 ++++++++++++++++++++------------------ src/controller.sv | 4 +-- src/decoder.sv | 18 +++++----- src/ex_stage.sv | 10 +++--- src/id_stage.sv | 52 +++++++---------------------- src/if_stage.sv | 9 ++--- src/issue_read_operands.sv | 3 ++ src/pcgen.sv | 8 ++--- 9 files changed, 84 insertions(+), 124 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 85dc67927..4d45b65e4 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -84,7 +84,7 @@ module ariane logic halt_if; logic [63:0] pc_if; exception ex_commit; // exception from commit stage - branchpredict branchpredict; + branchpredict resolved_branch; // -------------- // PCGEN <-> IF // -------------- @@ -113,9 +113,7 @@ module ariane logic illegal_c_insn_id_if; logic [63:0] pc_id_if_id; exception exception_if_id; - logic branch_valid_if_id; - logic [63:0] predict_address_if_id; - logic predict_taken_if_id; + branchpredict_sbe branch_predict_if_id; // -------------- // ID <-> EX // -------------- @@ -137,9 +135,7 @@ module ariane exception alu_exception_ex_id; // Branches and Jumps logic branch_valid_id_ex; - logic predict_branch_valid_id_ex; - logic [63:0] predict_address_id_ex; - logic predict_taken_id_ex; + branchpredict_sbe branch_predict_id_ex; // LSU logic [TRANS_ID_BITS-1:0] lsu_trans_id_ex_id; logic lsu_valid_id_ex; @@ -224,7 +220,7 @@ module ariane pcgen pcgen_i ( .flush_i ( flush ), .pc_if_i ( pc_if ), - .branchpredict_i ( branchpredict ), + .resolved_branch_i ( resolved_branch ), .pc_if_o ( pc_pcgen_if ), .set_pc_o ( set_pc_pcgen_if ), .is_branch_o ( is_branch_pcgen_if ), @@ -245,9 +241,7 @@ module ariane .halt_if_i ( halt_if ), .set_pc_i ( set_pc_pcgen_if ), .is_branch_i ( is_branch_pcgen_if ), - .branch_valid_o ( branch_valid_if_id ), - .predict_address_o ( predict_address_if_id ), - .predict_taken_o ( predict_taken_if_id ), + .branch_predict_o ( branch_predict_if_id ), .fetch_addr_i ( pc_pcgen_if ), .instr_req_o ( fetch_req_if_ex ), .instr_addr_o ( fetch_vaddr_if_ex ), @@ -298,14 +292,10 @@ module ariane .alu_ready_i ( alu_ready_ex_id ), .alu_valid_o ( alu_valid_id_ex ), // Branches and Jumps - .branch_valid_i ( branch_valid_if_id ), - .predict_address_i ( predict_address_if_id ), - .predict_taken_i ( predict_taken_if_id ), - .branch_valid_o ( branch_valid_id_ex ), - .predict_branch_valid_o ( predict_branch_valid_id_ex ), - .predict_address_o ( predict_address_id_ex ), - .predict_taken_o ( predict_taken_id_ex ), - .branchpredict_i ( branchpredict ), // in order to resolve the branch + .branch_valid_o ( branch_valid_id_ex ), // branch is valid + .branch_predict_i ( branch_predict_if_id ), // branch predict from if + .branch_predict_o ( branch_predict_id_ex ), // branch predict to ex + .resolved_branch_i ( resolved_branch ), // in order to resolve the branch // LSU .lsu_ready_i ( lsu_ready_ex_id ), .lsu_valid_o ( lsu_valid_id_ex ), @@ -351,10 +341,8 @@ module ariane .alu_exception_o ( alu_exception_ex_id ), // Branches and Jumps .branch_valid_i ( branch_valid_id_ex ), - .predict_branch_valid_i ( predict_branch_valid_id_ex ), - .predict_address_i ( predict_address_id_ex ), - .predict_taken_i ( predict_taken_id_ex ), - .branchpredict_o ( branchpredict ), + .branch_predict_i ( branch_predict_id_ex ), // branch predict to ex + .resolved_branch_o ( resolved_branch ), // LSU .lsu_ready_o ( lsu_ready_ex_id ), .lsu_valid_i ( lsu_valid_id_ex ), @@ -441,7 +429,6 @@ module ariane // Controller // ------------ logic flush_commit_i; - logic branchpredict_i; controller controller_i ( .flush_bp_o ( ), @@ -454,7 +441,7 @@ module ariane .flush_ready_lsu_i ( ), .flush_commit_i ( flush_commit_i ), .flush_csr_i ( flush_csr_ctrl ), - .branchpredict_i ( branchpredict ), + .resolved_branch_i ( resolved_branch ), .* ); diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 3542b45e9..b974efdc7 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -20,20 +20,18 @@ import ariane_pkg::*; module branch_engine ( - input fu_op operator_i, - input logic [63:0] operand_a_i, - input logic [63:0] operand_b_i, - input logic [63:0] operand_c_i, - input logic [63:0] imm_i, - input logic [63:0] pc_i, - input logic is_compressed_instr_i, - input logic valid_i, + input fu_op operator_i, + input logic [63:0] operand_a_i, + input logic [63:0] operand_b_i, + input logic [63:0] operand_c_i, + input logic [63:0] imm_i, + input logic [63:0] pc_i, + input logic is_compressed_instr_i, + input logic valid_i, - input logic [63:0] predict_address_i, // this is the address we predicted - input logic predict_branch_valid_i, // we predicted that this was a valid branch - input logic predict_taken_i, - output branchpredict branchpredict_o, // this is the actual address we are targeting - output exception branch_ex_o // branch exception out + input branchpredict_sbe branch_predict_i, // this is the address we predicted + output branchpredict resolved_branch_o, // this is the actual address we are targeting + output exception branch_ex_o // branch exception out ); logic [63:0] target_address; logic [63:0] next_pc; @@ -59,36 +57,43 @@ module branch_engine ( default: comparison_result = 1'b1; endcase end - - always_comb begin : target_address_calc - target_address = 64'b0; - branchpredict_o.pc = 64'b0; - branchpredict_o.target_address = 64'b0; - branchpredict_o.is_taken = 1'b0; - branchpredict_o.valid = valid_i; - branchpredict_o.is_mispredict = 1'b0; + // here we handle the various possibilities of mis-predicts + always_comb begin : mispredict_handler + target_address = 64'b0; + resolved_branch_o.pc = 64'b0; + resolved_branch_o.target_address = 64'b0; + resolved_branch_o.is_taken = 1'b0; + resolved_branch_o.valid = valid_i; + resolved_branch_o.is_mispredict = 1'b0; // calculate next PC, depending on whether the instruction is compressed or not this may be different next_pc = pc_i + (is_compressed_instr_i) ? 64'h2 : 64'h4; // calculate target address simple 64 bit addition target_address = $signed(operand_c_i) + $signed(imm_i); // save pc - branchpredict_o.pc = pc_i; + resolved_branch_o.pc = pc_i; // write target address which goes to pc gen - branchpredict_o.target_address = (comparison_result) ? target_address : next_pc; - branchpredict_o.is_taken = comparison_result; - + resolved_branch_o.target_address = (comparison_result) ? target_address : next_pc; + resolved_branch_o.is_taken = comparison_result; + // we've detected a branch in ID with the following parameters if (valid_i) begin // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address[1:0] == 2'b0) begin // TODO in case of branch which is not taken it is not necessary to check for the address - if ( target_address != predict_address_i // we mis-predicted the address of the branch - || predict_taken_i != comparison_result // we mis-predicted the outcome of the branch - || predict_branch_valid_i == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one + if ( target_address != branch_predict_i.predict_address_i // we mis-predicted the address of the branch + || branch_predict_i.predict_taken_i != comparison_result // we mis-predicted the outcome of the branch + || branch_predict_i.valid == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one ) begin - branchpredict_o.is_mispredict = 1'b1; + resolved_branch_o.is_mispredict = 1'b1; end end end + // the other case would be that this instruction was no branch but branchprediction thought that it was one + // this is essentially also a mis-predict + if (branch_predict_i.valid) begin + // re-set the branch to the next PC + resolved_branch_o.is_mispredict = 1'b1; + resolved_branch_o.target_address = next_pc; + end end // use ALU exception signal for storing instruction fetch exceptions if // the target address is not aligned to a 4 byte boundary @@ -96,8 +101,8 @@ module branch_engine ( branch_ex_o.cause = INSTR_ADDR_MISALIGNED; branch_ex_o.tval = 64'b0; // TODO branch_ex_o.valid = 1'b0; - - if (target_address[1:0] != 2'b0) + // only throw exception if this is indeed a branch + if (valid_i && target_address[1:0] != 2'b0) branch_ex_o.valid = 1'b1; end endmodule \ No newline at end of file diff --git a/src/controller.sv b/src/controller.sv index 4dd49968c..ba9b161c9 100644 --- a/src/controller.sv +++ b/src/controller.sv @@ -33,7 +33,7 @@ module controller ( input logic flush_ready_lsu_i, // we need to wait for this signal from LSU input logic flush_commit_i, // flush request from commit stage in input logic flush_csr_i, - input branchpredict branchpredict_i + input branchpredict resolved_branch_i ); assign flush_bp_o = 1'b0; @@ -42,7 +42,7 @@ module controller ( flush_scoreboard_o = 1'b0; flush_if_o = 1'b0; // flush on mispredict - if (branchpredict_i.is_mispredict) begin + if (resolved_branch_i.is_mispredict) begin flush_unissued_instr_o = 1'b1; flush_if_o = 1'b1; end diff --git a/src/decoder.sv b/src/decoder.sv index 01c10f8a3..7523c13c7 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -11,14 +11,15 @@ import ariane_pkg::*; module decoder ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic [63:0] pc_i, // PC from IF - input logic is_compressed_i, // is a compressed instruction - input logic [31:0] instruction_i, // instruction from IF - input exception ex_i, // if an exception occured in if - output scoreboard_entry instruction_o, // scoreboard entry to scoreboard - output logic is_control_flow_instr_o // this instruction will change the control flow + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic [63:0] pc_i, // PC from IF + input logic is_compressed_i, // is a compressed instruction + input logic [31:0] instruction_i, // instruction from IF + input branchpredict_sbe branch_predict_i, + input exception ex_i, // if an exception occured in if + output scoreboard_entry instruction_o, // scoreboard entry to scoreboard + output logic is_control_flow_instr_o // this instruction will change the control flow ); logic illegal_instr; instruction instr; @@ -58,6 +59,7 @@ module decoder ( instruction_o.trans_id = 5'b0; instruction_o.is_compressed = is_compressed_i; instruction_o.use_zimm = 1'b0; + instruction_o.bp = branch_predict_i; if (~ex_i.valid) begin case (instr.rtype.opcode) diff --git a/src/ex_stage.sv b/src/ex_stage.sv index e0d99af37..045a53bd5 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -44,10 +44,8 @@ module ex_stage #( output exception alu_exception_o, // Branches and Jumps input logic branch_valid_i, // we are using the branch unit - input logic predict_branch_valid_i, - input logic [63:0] predict_address_i, - input logic predict_taken_i, - output branchpredict branchpredict_o, // the branch engine uses the write back from the ALU + input branchpredict_sbe branch_predict_i, // branch prediction in + output branchpredict resolved_branch_o, // the branch engine uses the write back from the ALU // LSU output logic lsu_ready_o, // FU is ready input logic lsu_valid_i, // Input is valid @@ -112,7 +110,7 @@ module ex_stage #( .adder_result_ext_o ( ), .result_o ( alu_result_o ), .is_equal_result_o ( ), - .comparison_result_o( ), + .comparison_result_o ( ), .* ); @@ -121,7 +119,7 @@ module ex_stage #( // -------------------- branch_engine branch_engine_i ( .valid_i ( branch_valid_i ), - .branch_ex_o ( alu_exception_o ), + .branch_ex_o ( alu_exception_o ), // we use the ALU exception WB for the branch exception .* ); diff --git a/src/id_stage.sv b/src/id_stage.sv index 6d62156dc..8803def82 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -50,23 +50,18 @@ module id_stage #( input logic alu_ready_i, output logic alu_valid_o, + output logic branch_valid_o, // use branch prediction unit // Branch predict In - input logic branch_valid_i, - input logic [63:0] predict_address_i, - input logic predict_taken_i, - // Branch predict Out - output logic branch_valid_o, // use the branch engine - output logic predict_branch_valid_o, // this is a valid prediction - output logic [63:0] predict_address_o, - output logic predict_taken_o, + input branchpredict_sbe branch_predict_i, // ex just resolved our predicted branch, we are ready to accept new requests - input branchpredict branchpredict_i, + input branchpredict resolved_branch_i, input logic lsu_ready_i, output logic lsu_valid_o, input logic mult_ready_i, - output logic mult_valid_o, + output logic mult_valid_o, // Branch predict Out + output branchpredict_sbe branch_predict_o, input logic csr_ready_i, output logic csr_valid_o, @@ -117,50 +112,31 @@ module id_stage #( // instructions past a branch. We need to resolve the branch beforehand. // This limitation is in place to ease the backtracking of mis-predicted branches as they // can simply be in the front-end of the processor. - logic unresolved_branch_n, unresolved_branch_q; - // branch predict registers - logic branch_valid_n, branch_valid_q; - logic [63:0] predict_address_n, predict_address_q; - logic predict_taken_n, predict_taken_q; + logic unresolved_branch_n, unresolved_branch_q; always_comb begin : unresolved_branch unresolved_branch_n = unresolved_branch_q; // we just resolved the branch - if (branchpredict_i.valid) begin + if (resolved_branch_i.valid) begin unresolved_branch_n = 1'b0; end // if the instruction is valid and it is a control flow instruction if (instruction_valid_i && is_control_flow_instr) begin unresolved_branch_n = 1'b1; end - - branch_valid_n = branch_valid_q; - predict_address_n = predict_address_q; - predict_taken_n = predict_taken_q; - // save branch prediction information until the ex stage resolves the prediction - if (~unresolved_branch_q) begin - branch_valid_n = branch_valid_i; - predict_address_n = predict_address_i; - predict_taken_n = predict_taken_i; - end end // we are ready if we are not full and don't have any unresolved branches, but it can be - // the case that we have an unresolved branch which is cleared in that cycle (branchpredict_i.valid == 1) - assign ready_o = ~full && (~unresolved_branch_q || branchpredict_i.valid) && ~(instruction_valid_i && is_control_flow_instr); - // output branch prediction bits - assign predict_branch_valid_o = branch_valid_q; - assign predict_address_o = predict_address_q; - assign predict_taken_o = predict_taken_q; + // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i.valid == 1) + assign ready_o = ~full && (~unresolved_branch_q || resolved_branch_i.valid) && ~(instruction_valid_i && is_control_flow_instr); decoder decoder_i ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), .pc_i ( pc_if_i ), .is_compressed_i ( is_compressed_i ), .instruction_i ( instruction_i ), .ex_i ( ex_if_i ), .instruction_o ( decoded_instr_dc_sb ), - .is_control_flow_instr_o ( is_control_flow_instr ) + .is_control_flow_instr_o ( is_control_flow_instr ), + .* ); scoreboard #( @@ -209,14 +185,8 @@ module id_stage #( always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin unresolved_branch_q <= 1'b0; - branch_valid_q <= 1'b0; - predict_address_q <= 64'b0; - predict_taken_q <= 1'b0; end else begin unresolved_branch_q <= unresolved_branch_n; - branch_valid_q <= branch_valid_n; - predict_address_q <= predict_address_n; - predict_taken_q <= predict_taken_n; end end diff --git a/src/if_stage.sv b/src/if_stage.sv index dc4e7e0cf..b9771e14c 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -41,9 +41,7 @@ module if_stage ( input logic set_pc_i, // set new PC input logic is_branch_i, // the new PC was a branch e.g.: branch or jump // branchpredict out - output logic branch_valid_o, - output logic [63:0] predict_address_o, - output logic predict_taken_o, + output branchpredict_sbe branch_predict_o, // instruction cache interface output logic instr_req_o, output logic [63:0] instr_addr_o, @@ -269,10 +267,7 @@ module if_stage ( assign if_ready = valid & id_ready_i; assign if_valid = (~halt_if_i) & if_ready; assign if_busy_o = prefetch_busy; - assign branch_valid_o = branch_valid_q; - assign predict_address_o = predict_address_q; - assign predict_taken_o = predict_taken_q; - + assign branch_predict_o = {predict_address_q, predict_taken_q, branch_valid_q}; //------------- // Assertions //------------- diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 9a161d0ee..52e288314 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -52,6 +52,7 @@ module issue_read_operands ( output logic alu_valid_o, // Output is valid // Branches and Jumps output logic branch_valid_o, // this is a valid branch instruction + output branchpredict_sbe branch_predict_o, // LSU input logic lsu_ready_i, // FU is ready output logic lsu_valid_o, // Output is valid @@ -319,6 +320,7 @@ module issue_read_operands ( trans_id_q <= 5'b0; pc_o <= 64'b0; is_compressed_instr_o <= 1'b0; + branch_predict_o <= '{default: 0}; end else begin operand_a_q <= operand_a_n; operand_b_q <= operand_b_n; @@ -333,6 +335,7 @@ module issue_read_operands ( trans_id_q <= trans_id_n; pc_o <= issue_instr_i.pc; is_compressed_instr_o <= issue_instr_i.is_compressed; + branch_predict_o <= issue_instr_i.bp; end end endmodule diff --git a/src/pcgen.sv b/src/pcgen.sv index 71acd2349..ef3c85645 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -25,7 +25,7 @@ module pcgen ( input logic flush_i, input logic [63:0] pc_if_i, - input branchpredict branchpredict_i, // from controller signaling a branchpredict -> update BTB + input branchpredict resolved_branch_i, // from controller signaling a branchpredict -> update BTB // to IF output logic [63:0] pc_if_o, // new PC output logic set_pc_o, // request the PC to be set to pc_if_o @@ -74,7 +74,7 @@ module pcgen ( btb_i ( .vpc_i ( predict_pc ), - .branchpredict_i ( branchpredict_i ), + .branchpredict_i ( resolved_branch_i ), .is_branch_o ( is_branch ), .predict_taken_o ( predict_taken ), .branch_target_address_o ( branch_target_address ), @@ -104,10 +104,10 @@ module pcgen ( // 1.Debug // 3. Control flow change request - if (branchpredict_i.is_mispredict) begin + if (resolved_branch_i.is_mispredict) begin set_pc_n = 1'b1; // we already got the correct target address - npc_n = branchpredict_i.target_address; + npc_n = resolved_branch_i.target_address; end // 2. Exception if (ex_i.valid) begin From 1fba6db68800b154b496699989360ee82ad8ae7d Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 12 May 2017 23:56:04 +0200 Subject: [PATCH 21/43] :bug: Fix wrong next pc calculation in branch engine --- src/branch_engine.sv | 6 +-- src/ex_stage.sv | 5 ++- src/id_stage.sv | 4 +- src/if_stage.sv | 103 ++++++++++++++++++------------------------- src/pcgen.sv | 19 ++++---- test/add_test.S | 8 +++- 6 files changed, 67 insertions(+), 78 deletions(-) diff --git a/src/branch_engine.sv b/src/branch_engine.sv index b974efdc7..44ba8ebb0 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -27,6 +27,7 @@ module branch_engine ( input logic [63:0] imm_i, input logic [63:0] pc_i, input logic is_compressed_instr_i, + input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict input logic valid_i, input branchpredict_sbe branch_predict_i, // this is the address we predicted @@ -66,7 +67,7 @@ module branch_engine ( resolved_branch_o.valid = valid_i; resolved_branch_o.is_mispredict = 1'b0; // calculate next PC, depending on whether the instruction is compressed or not this may be different - next_pc = pc_i + (is_compressed_instr_i) ? 64'h2 : 64'h4; + next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); // calculate target address simple 64 bit addition target_address = $signed(operand_c_i) + $signed(imm_i); // save pc @@ -86,10 +87,9 @@ module branch_engine ( resolved_branch_o.is_mispredict = 1'b1; end end - end // the other case would be that this instruction was no branch but branchprediction thought that it was one // this is essentially also a mis-predict - if (branch_predict_i.valid) begin + end else if (fu_valid_i && branch_predict_i.valid) begin // re-set the branch to the next PC resolved_branch_o.is_mispredict = 1'b1; resolved_branch_o.target_address = next_pc; diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 045a53bd5..87e8b49df 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -118,8 +118,9 @@ module ex_stage #( // Branch Engine // -------------------- branch_engine branch_engine_i ( - .valid_i ( branch_valid_i ), - .branch_ex_o ( alu_exception_o ), // we use the ALU exception WB for the branch exception + .fu_valid_i ( alu_valid_i & lsu_valid_i & csr_valid_i ), + .valid_i ( branch_valid_i ), + .branch_ex_o ( alu_exception_o ), // we use the ALU exception WB for the branch exception .* ); diff --git a/src/id_stage.sv b/src/id_stage.sv index 8803def82..a2c251a43 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -121,13 +121,13 @@ module id_stage #( unresolved_branch_n = 1'b0; end // if the instruction is valid and it is a control flow instruction - if (instruction_valid_i && is_control_flow_instr) begin + if (instruction_valid_i && is_control_flow_instr && ~flush_unissued_instr_i) begin unresolved_branch_n = 1'b1; end end // we are ready if we are not full and don't have any unresolved branches, but it can be // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i.valid == 1) - assign ready_o = ~full && (~unresolved_branch_q || resolved_branch_i.valid) && ~(instruction_valid_i && is_control_flow_instr); + assign ready_o = ~full && (~unresolved_branch_q || resolved_branch_i.valid); decoder decoder_i ( .pc_i ( pc_if_i ), diff --git a/src/if_stage.sv b/src/if_stage.sv index b9771e14c..2303db867 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -79,7 +79,6 @@ module if_stage ( logic [31:0] instr_decompressed; logic illegal_c_insn; logic instr_compressed_int; - logic clear_instr_valid_i; // compressed instruction decoding, or more precisely compressed instruction // expander @@ -121,73 +120,55 @@ module if_stage ( // offset FSM state transition logic always_comb begin - offset_fsm_ns = offset_fsm_cs; + offset_fsm_ns = offset_fsm_cs; - fetch_ready = 1'b0; - branch_req = 1'b0; - valid = 1'b0; + fetch_ready = 1'b0; + branch_req = 1'b0; + valid = 1'b0; - unique case (offset_fsm_cs) - // no valid instruction data for ID stage - // assume aligned - IDLE: begin - if (req_i) begin - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end - - // take care of control flow changes - if (set_pc_i) begin - valid = 1'b0; - // switch to new PC from ID stage - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end - end - - // serving aligned 32 bit or 16 bit instruction, we don't know yet - WAIT: begin - if (fetch_valid) begin - valid = 1'b1; // an instruction is ready for ID stage - - if (req_i && if_valid) begin - fetch_ready = 1'b1; - offset_fsm_ns = WAIT; + unique case (offset_fsm_cs) + // no valid instruction data for ID stage + // assume aligned + IDLE: begin + if (req_i) begin + branch_req = 1'b1; + offset_fsm_ns = WAIT; end - end - // take care of control flow changes - if (set_pc_i) begin - valid = 1'b0; - // switch to new PC from ID stage - branch_req = 1'b1; - offset_fsm_ns = WAIT_BRANCHED; - end - end - // we just branched so keep this instruction as valid - WAIT_BRANCHED: begin - if (fetch_valid) begin - valid = 1'b1; // an instruction is ready for ID stage - if (req_i && if_valid) begin - fetch_ready = 1'b1; - offset_fsm_ns = WAIT; + // take care of control flow changes + if (set_pc_i) begin + valid = 1'b0; + // switch to new PC from ID stage + branch_req = 1'b1; + offset_fsm_ns = WAIT; end - end + end - // take care of control flow changes - if (set_pc_i) begin - // switch to new PC from ID stage - branch_req = 1'b1; - offset_fsm_ns = WAIT_BRANCHED; - end + // serving aligned 32 bit or 16 bit instruction, we don't know yet + WAIT: begin + if (fetch_valid) begin + valid = 1'b1; // an instruction is ready for ID stage + + if (req_i && if_valid) begin + fetch_ready = 1'b1; + offset_fsm_ns = WAIT; + end + end + + end + + default: begin + offset_fsm_ns = IDLE; + end + endcase + + // take care of control flow changes + if (set_pc_i) begin + valid = 1'b0; + // switch to new PC from PCGEN stage + branch_req = 1'b1; + offset_fsm_ns = WAIT; end - - default: begin - offset_fsm_ns = IDLE; - end - endcase - - end // ------------- diff --git a/src/pcgen.sv b/src/pcgen.sv index ef3c85645..ac7450ac7 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -57,14 +57,13 @@ module pcgen ( // 2. or PC which we just predicted + 4 always_comb begin : pc_btb_lookup // Ad 2: From PC of previous cycle (which is now in IF) - if (set_pc_q) begin - predict_pc = npc_q; - // Ad 1: - // in the previous cycle we set the PC to npc_q - // calculate the plus one version - end else begin + // predict_pc = npc_q; + // // Ad 1: + // // in the previous cycle we set the PC to npc_q + // // calculate the plus one version + // end else begin predict_pc = {pc_if_i[62:2], 2'b0} + 64'h4; - end + // end end btb #( @@ -96,8 +95,12 @@ module pcgen ( npc_n = npc_q; set_pc_n = 1'b0; is_branch_n = is_branch; + + // we already set the PC a cycle earlier + if (set_pc_q) + is_branch_n = 1'b0; // 4. Predict taken - if (is_branch && predict_taken) begin + if (is_branch && predict_taken && ~set_pc_q) begin set_pc_n = 1'b1; npc_n = branch_target_address; end diff --git a/test/add_test.S b/test/add_test.S index 64c23bbcc..49a2173ab 100755 --- a/test/add_test.S +++ b/test/add_test.S @@ -15,14 +15,18 @@ add x9, x7, x8 csrr x1, mstatus nop -L0: jal L1 +L0: nop nop nop nop + jal L1 + nop + nop +L1: nop nop nop nop -L1: jal L0 + jal L0 nop nop addi x1, x0, 55 \ No newline at end of file From ba6bacb708380e7dcb27691f9eff09534019ba7c Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 13 May 2017 10:16:38 +0200 Subject: [PATCH 22/43] Correct branch misaligned exception --- src/branch_engine.sv | 8 ++++---- src/fetch_fifo.sv | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 44ba8ebb0..fadbdfcc1 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -78,7 +78,7 @@ module branch_engine ( // we've detected a branch in ID with the following parameters if (valid_i) begin // we mis-predicted e.g.: the predicted address is unequal to the actual address - if (target_address[1:0] == 2'b0) begin + if (target_address[0] == 1'b0) begin // TODO in case of branch which is not taken it is not necessary to check for the address if ( target_address != branch_predict_i.predict_address_i // we mis-predicted the address of the branch || branch_predict_i.predict_taken_i != comparison_result // we mis-predicted the outcome of the branch @@ -96,13 +96,13 @@ module branch_engine ( end end // use ALU exception signal for storing instruction fetch exceptions if - // the target address is not aligned to a 4 byte boundary + // the target address is not aligned to a 2 byte boundary always_comb begin : exception_handling branch_ex_o.cause = INSTR_ADDR_MISALIGNED; - branch_ex_o.tval = 64'b0; // TODO branch_ex_o.valid = 1'b0; // only throw exception if this is indeed a branch - if (valid_i && target_address[1:0] != 2'b0) + if (valid_i && target_address[0] != 1'b0) + branch_ex_o.tval = pc_i; branch_ex_o.valid = 1'b1; end endmodule \ No newline at end of file diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 455613a90..1226ddfcc 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -192,8 +192,7 @@ module fetch_fifo end else begin // move to next entry in FIFO addr_n[0] = {addr_next[63:2], 2'b00}; - for (int i = 0; i < DEPTH - 1; i++) - begin + for (int i = 0; i < DEPTH - 1; i++) begin rdata_n[i] = rdata_int[i + 1]; end rdata_n[DEPTH - 1] = 32'b0; From da521c9f2612d7689b59ba0db16165b9a74ed4ea Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 13 May 2017 10:17:20 +0200 Subject: [PATCH 23/43] Removed accidental latch in branch engine --- src/branch_engine.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/branch_engine.sv b/src/branch_engine.sv index fadbdfcc1..2549f4568 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -100,9 +100,9 @@ module branch_engine ( always_comb begin : exception_handling branch_ex_o.cause = INSTR_ADDR_MISALIGNED; branch_ex_o.valid = 1'b0; + branch_ex_o.tval = pc_i; // only throw exception if this is indeed a branch if (valid_i && target_address[0] != 1'b0) - branch_ex_o.tval = pc_i; branch_ex_o.valid = 1'b1; end endmodule \ No newline at end of file From dc823ae54a8a84a3313b795b8dc6f65cdde428f8 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 13 May 2017 12:15:23 +0200 Subject: [PATCH 24/43] :art: Code cleanup --- src/fetch_fifo.sv | 315 ++++++++++++++++++----------------------- src/prefetch_buffer.sv | 32 ++--- test/add_test.S | 2 - 3 files changed, 150 insertions(+), 199 deletions(-) diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 1226ddfcc..911059d93 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -23,219 +23,178 @@ import ariane_pkg::*; // input port: send address one cycle before the data -// clear_i clears the FIFO for the following cycle. in_addr_i can be sent in -// this cycle already +// clear_i clears the FIFO for the following cycle. module fetch_fifo ( input logic clk, input logic rst_n, - // control signals input logic clear_i, // clears the contents of the fifo - // input port input logic [63:0] in_addr_i, input logic [31:0] in_rdata_i, input logic in_valid_i, output logic in_ready_o, - // output port output logic [63:0] out_addr_o, output logic [31:0] out_rdata_o, output logic out_valid_o, - input logic out_ready_i, + input logic out_ready_i +); - output logic out_valid_stored_o // same as out_valid_o, except that if something is incoming now it is not - // included. This signal is available immediately as it comes directly out of FFs - ); + localparam DEPTH = 4; // must be 3 or greater + /* verilator lint_off LITENDIAN */ + // index 0 is used for output + logic [0:DEPTH-1] [63:0] addr_n, addr_int, addr_Q; + logic [0:DEPTH-1] [31:0] rdata_n, rdata_int, rdata_Q; + logic [0:DEPTH-1] valid_n, valid_int, valid_Q; - localparam DEPTH = 4; // must be 3 or greater - /* verilator lint_off LITENDIAN */ - // index 0 is used for output - logic [0:DEPTH-1] [63:0] addr_n, addr_int, addr_Q; - logic [0:DEPTH-1] [31:0] rdata_n, rdata_int, rdata_Q; - logic [0:DEPTH-1] valid_n, valid_int, valid_Q; + logic [63:0] addr_next; + logic [31:0] rdata, rdata_unaligned; + logic valid, valid_unaligned; - logic [63:0] addr_next; - logic [31:0] rdata, rdata_unaligned; - logic valid, valid_unaligned; + logic aligned_is_compressed, unaligned_is_compressed; + logic aligned_is_compressed_st, unaligned_is_compressed_st; + /* lint_on */ - logic aligned_is_compressed, unaligned_is_compressed; - logic aligned_is_compressed_st, unaligned_is_compressed_st; - /* lint_on */ - //---------------------------------------------------------------------------- - // output port - //---------------------------------------------------------------------------- + //---------------------------------------------------------------------------- + // output port + //---------------------------------------------------------------------------- + assign rdata = (valid_Q[0]) ? rdata_Q[0] : in_rdata_i; + assign valid = valid_Q[0] || in_valid_i; + assign rdata_unaligned = (valid_Q[1]) ? {rdata_Q[1][15:0], rdata[31:16]} : {in_rdata_i[15:0], rdata[31:16]}; + // it is implied that rdata_valid_Q[0] is set + assign valid_unaligned = (valid_Q[1] || (valid_Q[0] && in_valid_i)); - assign rdata = (valid_Q[0]) ? rdata_Q[0] : in_rdata_i; - assign valid = valid_Q[0] || in_valid_i; + assign unaligned_is_compressed = rdata[17:16] != 2'b11; + assign aligned_is_compressed = rdata[1:0] != 2'b11; + assign unaligned_is_compressed_st = rdata_Q[0][17:16] != 2'b11; + assign aligned_is_compressed_st = rdata_Q[0][1:0] != 2'b11; - assign rdata_unaligned = (valid_Q[1]) ? {rdata_Q[1][15:0], rdata[31:16]} : {in_rdata_i[15:0], rdata[31:16]}; - // it is implied that rdata_valid_Q[0] is set - assign valid_unaligned = (valid_Q[1] || (valid_Q[0] && in_valid_i)); + //---------------------------------------------------------------------------- + // instruction aligner (if unaligned) + //---------------------------------------------------------------------------- + always_comb begin + // serve the aligned case even though the output address is unaligned when + // the next instruction will be from a hardware loop target + // in this case the current instruction is already prealigned in element 0 + if (out_addr_o[1]) begin + // unaligned case + out_rdata_o = rdata_unaligned; - assign unaligned_is_compressed = rdata[17:16] != 2'b11; - assign aligned_is_compressed = rdata[1:0] != 2'b11; - assign unaligned_is_compressed_st = rdata_Q[0][17:16] != 2'b11; - assign aligned_is_compressed_st = rdata_Q[0][1:0] != 2'b11; - - //---------------------------------------------------------------------------- - // instruction aligner (if unaligned) - //---------------------------------------------------------------------------- - - always_comb - begin - // serve the aligned case even though the output address is unaligned when - // the next instruction will be from a hardware loop target - // in this case the current instruction is already prealigned in element 0 - if (out_addr_o[1]) begin - // unaligned case - out_rdata_o = rdata_unaligned; - - if (unaligned_is_compressed) + if (unaligned_is_compressed) + out_valid_o = valid; + else + out_valid_o = valid_unaligned; + end else begin + // aligned case + out_rdata_o = rdata; out_valid_o = valid; - else - out_valid_o = valid_unaligned; - end else begin - // aligned case - out_rdata_o = rdata; - out_valid_o = valid; - end - end - - assign out_addr_o = (valid_Q[0]) ? addr_Q[0] : in_addr_i; - - // this valid signal must not depend on signals from outside! - always_comb - begin - out_valid_stored_o = 1'b1; - - if (out_addr_o[1]) begin - if (unaligned_is_compressed_st) - out_valid_stored_o = 1'b1; - else - out_valid_stored_o = valid_Q[1]; - end else begin - out_valid_stored_o = valid_Q[0]; - end - end - - - //---------------------------------------------------------------------------- - // input port - //---------------------------------------------------------------------------- - - // we accept data as long as our fifo is not full - // we don't care about clear here as the data will be received one cycle - // later anyway - assign in_ready_o = ~valid_Q[DEPTH-2]; - - - //---------------------------------------------------------------------------- - // FIFO management - //---------------------------------------------------------------------------- - - int j; - always_comb - begin - addr_int = addr_Q; - rdata_int = rdata_Q; - valid_int = valid_Q; - - - if (in_valid_i) begin - for(j = 0; j < DEPTH; j++) begin - if (~valid_Q[j]) begin - addr_int[j] = in_addr_i; - rdata_int[j] = in_rdata_i; - valid_int[j] = 1'b1; - - break; - end end - end - end - assign addr_next = {addr_int[0][63:2], 2'b00} + 64'h4; + assign out_addr_o = (valid_Q[0]) ? addr_Q[0] : in_addr_i; - // move everything by one step - always_comb - begin - addr_n = addr_int; - rdata_n = rdata_int; - valid_n = valid_int; + //---------------------------------------------------------------------------- + // input port + //---------------------------------------------------------------------------- + // we accept data as long as our fifo is not full + // we don't care about clear here as the data will be received one cycle + // later anyway + assign in_ready_o = ~valid_Q[DEPTH-2]; - if (out_ready_i && out_valid_o) begin - begin - if (addr_int[0][1]) begin - // unaligned case - if (unaligned_is_compressed) begin - addr_n[0] = {addr_next[63:2], 2'b00}; - end else begin - addr_n[0] = {addr_next[63:2], 2'b10}; - end + //---------------------------------------------------------------------------- + // FIFO management + //---------------------------------------------------------------------------- + always_comb begin + addr_int = addr_Q; + rdata_int = rdata_Q; + valid_int = valid_Q; - for (int i = 0; i < DEPTH - 1; i++) - begin - rdata_n[i] = rdata_int[i + 1]; - end - rdata_n[DEPTH - 1] = 32'b0; + if (in_valid_i) begin + for (int j = 0; j < DEPTH; j++) begin + if (~valid_Q[j]) begin + addr_int[j] = in_addr_i; + rdata_int[j] = in_rdata_i; + valid_int[j] = 1'b1; + break; + end + end + end + end - valid_n = {valid_int[1:DEPTH-1], 1'b0}; + assign addr_next = {addr_int[0][63:2], 2'b00} + 64'h4; + + // move everything by one step + always_comb begin + addr_n = addr_int; + rdata_n = rdata_int; + valid_n = valid_int; + + if (out_ready_i && out_valid_o) begin + if (addr_int[0][1]) begin + // unaligned case + if (unaligned_is_compressed) begin + addr_n[0] = {addr_next[63:2], 2'b00}; + end else begin + addr_n[0] = {addr_next[63:2], 2'b10}; + end + + // shift everything on ene step + for (int i = 0; i < DEPTH - 1; i++) + rdata_n[i] = rdata_int[i + 1]; + + rdata_n[DEPTH - 1] = 32'b0; + + valid_n = {valid_int[1:DEPTH-1], 1'b0}; + end else begin + if (aligned_is_compressed) begin + // just increase address, do not move to next entry in FIFO + addr_n[0] = {addr_int[0][63:2], 2'b10}; + end else begin + // move to next entry in FIFO + addr_n[0] = {addr_next[63:2], 2'b00}; + // shift entry + for (int i = 0; i < DEPTH - 1; i++) + rdata_n[i] = rdata_int[i + 1]; + + rdata_n[DEPTH - 1] = 32'b0; + valid_n = {valid_int[1:DEPTH-1], 1'b0}; + end + end + end + // on a clear signal from outside we invalidate the content of the FIFO + // completely and start from an empty state + if (clear_i) + valid_n = '0; + end + + //---------------------------------------------------------------------------- + // registers + //---------------------------------------------------------------------------- + + always_ff @(posedge clk, negedge rst_n) begin + if(rst_n == 1'b0) begin + addr_Q <= '{default: '0}; + rdata_Q <= '{default: '0}; + valid_Q <= '0; end else begin - if (aligned_is_compressed) begin - // just increase address, do not move to next entry in FIFO - addr_n[0] = {addr_int[0][63:2], 2'b10}; - end else begin - // move to next entry in FIFO - addr_n[0] = {addr_next[63:2], 2'b00}; - for (int i = 0; i < DEPTH - 1; i++) begin - rdata_n[i] = rdata_int[i + 1]; - end - rdata_n[DEPTH - 1] = 32'b0; - valid_n = {valid_int[1:DEPTH-1], 1'b0}; + addr_Q <= addr_n; + rdata_Q <= rdata_n; + valid_Q <= valid_n; end end - end end - end - //---------------------------------------------------------------------------- - // registers - //---------------------------------------------------------------------------- - - always_ff @(posedge clk, negedge rst_n) - begin - if(rst_n == 1'b0) - begin - addr_Q <= '{default: '0}; - rdata_Q <= '{default: '0}; - valid_Q <= '0; - end - else - begin - // on a clear signal from outside we invalidate the content of the FIFO - // completely and start from an empty state - if (clear_i) begin - valid_Q <= '0; - end else begin - addr_Q <= addr_n; - rdata_Q <= rdata_n; - valid_Q <= valid_n; - end - end - end - - //---------------------------------------------------------------------------- - // Assertions - //---------------------------------------------------------------------------- - `ifndef SYNTHESIS - `ifndef VERILATOR - assert property ( - @(posedge clk) (in_valid_i) |-> ((valid_Q[DEPTH-1] == 1'b0) || (clear_i == 1'b1)) ); - `endif - `endif + //---------------------------------------------------------------------------- + // Assertions + //---------------------------------------------------------------------------- + `ifndef SYNTHESIS + `ifndef VERILATOR + assert property ( + @(posedge clk) (in_valid_i) |-> ((valid_Q[DEPTH-1] == 1'b0) || (clear_i == 1'b1)) ); + `endif + `endif endmodule \ No newline at end of file diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index fc47edbe4..5dd07ead8 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -58,9 +58,6 @@ module prefetch_buffer logic fifo_ready; logic fifo_clear; - logic valid_stored; - - //--------------------------------- // Prefetch buffer status //--------------------------------- @@ -71,26 +68,23 @@ module prefetch_buffer // Fetch FIFO // consumes addresses and rdata //--------------------------------- - fetch_fifo fifo_i - ( - .clk ( clk ), - .rst_n ( rst_n ), + fetch_fifo fifo_i ( + .clk ( clk ), + .rst_n ( rst_n ), - .clear_i ( fifo_clear ), + .clear_i ( fifo_clear ), - .in_addr_i ( instr_addr_q ), - .in_rdata_i ( instr_rdata_i ), - .in_valid_i ( fifo_valid ), - .in_ready_o ( fifo_ready ), + .in_addr_i ( instr_addr_q ), + .in_rdata_i ( instr_rdata_i ), + .in_valid_i ( fifo_valid ), + .in_ready_o ( fifo_ready ), - .out_valid_o ( valid_o ), - .out_ready_i ( ready_i ), - .out_rdata_o ( rdata_o ), - .out_addr_o ( addr_o ), - - .out_valid_stored_o ( valid_stored ) - ); + .out_valid_o ( valid_o ), + .out_ready_i ( ready_i ), + .out_rdata_o ( rdata_o ), + .out_addr_o ( addr_o ) + ); //--------------- diff --git a/test/add_test.S b/test/add_test.S index 49a2173ab..1e26140ab 100755 --- a/test/add_test.S +++ b/test/add_test.S @@ -1,6 +1,4 @@ .text - nop - nop nop addi x1, x0, 1 addi x2, x0, 1 From 8332c41ddb0a435c2d48b4c3a734356370b5f80b Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 13 May 2017 21:45:14 +0200 Subject: [PATCH 25/43] Start implementing new prefetcher unit --- src/ariane.sv | 18 +-- src/branch_engine.sv | 18 ++- src/btb.sv | 2 +- src/fetch_fifo.sv | 344 ++++++++++++++++++++++++----------------- src/fifo.sv | 20 +-- src/id_stage.sv | 22 ++- src/if_stage.sv | 168 +++----------------- src/pcgen.sv | 63 +++----- src/prefetch_buffer.sv | 76 +++------ 9 files changed, 313 insertions(+), 418 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 4d45b65e4..b71bf58b9 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -91,6 +91,8 @@ module ariane logic [63:0] pc_pcgen_if; logic set_pc_pcgen_if; logic is_branch_pcgen_if; + logic if_ready_if_pcgen; + logic pc_valid_pcgen_if; // -------------- // PCGEN <-> EX // -------------- @@ -108,7 +110,6 @@ module ariane logic instr_valid_if_id; logic [31:0] instr_rdata_if_id; logic decode_ack_id_if; - logic illegal_c_insn_if_id; logic is_compressed_if_id; logic illegal_c_insn_id_if; logic [63:0] pc_id_if_id; @@ -218,11 +219,12 @@ module ariane // NPC Generation // -------------- pcgen pcgen_i ( + .fetch_enable_i ( fetch_enable ), .flush_i ( flush ), - .pc_if_i ( pc_if ), + .if_ready_i ( ~if_ready_if_pcgen ), .resolved_branch_i ( resolved_branch ), .pc_if_o ( pc_pcgen_if ), - .set_pc_o ( set_pc_pcgen_if ), + .pc_if_valid_o ( pc_valid_pcgen_if ), .is_branch_o ( is_branch_pcgen_if ), .boot_addr_i ( boot_addr_i ), .epc_i ( epc_commit_pcgen ), @@ -235,11 +237,9 @@ module ariane // --------- if_stage if_stage_i ( .flush_i ( flush_ctrl_if ), - .req_i ( fetch_enable ), - .if_busy_o ( ), // ? + .pc_if_valid_i ( pc_valid_pcgen_if ), + .if_busy_o ( if_ready_if_pcgen ), .id_ready_i ( ready_id_if ), - .halt_if_i ( halt_if ), - .set_pc_i ( set_pc_pcgen_if ), .is_branch_i ( is_branch_pcgen_if ), .branch_predict_o ( branch_predict_if_id ), .fetch_addr_i ( pc_pcgen_if ), @@ -252,9 +252,6 @@ module ariane .instr_valid_id_o ( instr_valid_if_id ), .instr_rdata_id_o ( instr_rdata_if_id ), - .is_compressed_id_o ( is_compressed_if_id ), - .illegal_c_insn_id_o ( illegal_c_insn_if_id ), - .pc_if_o ( pc_if ), .pc_id_o ( pc_id_if_id ), .ex_o ( exception_if_id ), .* @@ -275,7 +272,6 @@ module ariane .instruction_i ( instr_rdata_if_id ), .instruction_valid_i ( instr_valid_if_id ), .decoded_instr_ack_o ( decode_ack_id_if ), - .is_compressed_i ( is_compressed_if_id ), .pc_if_i ( pc_id_if_id ), // PC from if .ex_if_i ( exception_if_id ), // exception from if .ready_o ( ready_id_if ), diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 2549f4568..0c6c350c2 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -67,11 +67,19 @@ module branch_engine ( resolved_branch_o.valid = valid_i; resolved_branch_o.is_mispredict = 1'b0; // calculate next PC, depending on whether the instruction is compressed or not this may be different - next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); + next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); // calculate target address simple 64 bit addition - target_address = $signed(operand_c_i) + $signed(imm_i); - // save pc - resolved_branch_o.pc = pc_i; + target_address = $signed(operand_c_i) + $signed(imm_i); + // save PC - we need this to get the target row in the branch target buffer + // we play this trick with the branch instruction which wraps a byte boundary: + // |---------- Place the prediction on this PC + // \/ + // ____________________________________________________ + // |branch [15:0] | branch[31:16] | compressed 1[15:0] | + // |____________________________________________________ + // This will relief the prefetcher to re-fetch partially fetched unaligned branch instructions e.g.: + // we don't have a back arch between prefetcher and decoder/instruction FIFO. + resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : (pc_i[63:2] + 64'h4); // write target address which goes to pc gen resolved_branch_o.target_address = (comparison_result) ? target_address : next_pc; resolved_branch_o.is_taken = comparison_result; @@ -81,7 +89,7 @@ module branch_engine ( if (target_address[0] == 1'b0) begin // TODO in case of branch which is not taken it is not necessary to check for the address if ( target_address != branch_predict_i.predict_address_i // we mis-predicted the address of the branch - || branch_predict_i.predict_taken_i != comparison_result // we mis-predicted the outcome of the branch + || branch_predict_i.predict_taken_i != comparison_result // we mis-predicted the outcome of the branch || branch_predict_i.valid == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one ) begin resolved_branch_o.is_mispredict = 1'b1; diff --git a/src/btb.sv b/src/btb.sv index 0f669c611..78ef5e603 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -59,7 +59,7 @@ module btb #( assign predict_taken_o = btb_q[$unsigned(index)].saturation_counter[BITS_SATURATION_COUNTER-1]; assign branch_target_address_o = btb_q[$unsigned(index)].target_address; - // update on a miss-predict + // update on a mis-predict always_comb begin : update_branchpredict btb_n = btb_q; saturation_counter = btb_q[$unsigned(update_pc)].saturation_counter; diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 911059d93..f8e5d44f7 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -26,175 +26,233 @@ import ariane_pkg::*; // clear_i clears the FIFO for the following cycle. module fetch_fifo ( - input logic clk, - input logic rst_n, + input logic clk_i, + input logic rst_ni, // control signals - input logic clear_i, // clears the contents of the fifo + input logic clear_i, // clears the contents of the fifo // input port - input logic [63:0] in_addr_i, - input logic [31:0] in_rdata_i, - input logic in_valid_i, - output logic in_ready_o, + // branch prediction at in_addr_i address, as this is an address and not PC it can be the case + // that we have two compressed instruction (or one compressed instruction and one unaligned instruction) so we need + // keep two prediction inputs: [c1|c0] <- prediction for c1 and c0 + input branchpredict_sbe branch_predict_i, + input logic [63:0] in_addr_i, + input logic [31:0] in_rdata_i, + input logic in_valid_i, + output logic in_ready_o, // output port - output logic [63:0] out_addr_o, - output logic [31:0] out_rdata_o, - output logic out_valid_o, - input logic out_ready_i + output branchpredict_sbe [1:0] branch_predict_o, + output logic [63:0] out_addr_o, + output logic [31:0] out_rdata_o, + output logic out_valid_o, + input logic out_ready_i + ); localparam DEPTH = 4; // must be 3 or greater - /* verilator lint_off LITENDIAN */ - // index 0 is used for output - logic [0:DEPTH-1] [63:0] addr_n, addr_int, addr_Q; - logic [0:DEPTH-1] [31:0] rdata_n, rdata_int, rdata_Q; - logic [0:DEPTH-1] valid_n, valid_int, valid_Q; + typedef struct packed { + branchpredict_sbe branch_predict; + logic [63:0] address; + logic [31:0] instruction; + } fetch_entry; + // input registers - bounding the path from memory + branchpredict_sbe branch_predict_n, branch_predict_q; + logic [63:0] in_addr_n, in_addr_q; + logic [31:0] in_rdata_n, in_rdata_q; + logic in_valid_n, in_valid_q; - logic [63:0] addr_next; - logic [31:0] rdata, rdata_unaligned; - logic valid, valid_unaligned; + fetch_entry mem_n[DEPTH-1:0], mem_q[DEPTH-1:0]; + logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q; + logic [$clog2(DEPTH)-1:0] write_pointer_n, write_pointer_q; + int unsigned status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool - logic aligned_is_compressed, unaligned_is_compressed; - logic aligned_is_compressed_st, unaligned_is_compressed_st; - /* lint_on */ + // status signals + logic full, empty; + // the last instruction was unaligned + logic unaligned_n, unaligned_q; + // save the unaligned part of the instruction to this ff + logic [15:0] unaligned_instr_n, unaligned_instr_q; + // save the address of the unaligned instruction + logic [63:0] unaligned_address_n, unaligned_address_q; - //---------------------------------------------------------------------------- - // output port - //---------------------------------------------------------------------------- - assign rdata = (valid_Q[0]) ? rdata_Q[0] : in_rdata_i; - assign valid = valid_Q[0] || in_valid_i; + // we always need two empty places + // as it could happen that we get two compressed instructions/cycle + assign full = (status_cnt_q == DEPTH - 2); + assign empty = (status_cnt_q == 0); + assign out_valid_o = ~empty; + assign in_ready_o = ~full; - assign rdata_unaligned = (valid_Q[1]) ? {rdata_Q[1][15:0], rdata[31:16]} : {in_rdata_i[15:0], rdata[31:16]}; - // it is implied that rdata_valid_Q[0] is set - assign valid_unaligned = (valid_Q[1] || (valid_Q[0] && in_valid_i)); + // Output assignments + assign branch_predict_o = mem_q[read_pointer_q].branch_predict; + assign out_addr_o = mem_q[read_pointer_q].address; + assign out_rdata_o = mem_q[read_pointer_q].instruction; - assign unaligned_is_compressed = rdata[17:16] != 2'b11; - assign aligned_is_compressed = rdata[1:0] != 2'b11; - assign unaligned_is_compressed_st = rdata_Q[0][17:16] != 2'b11; - assign aligned_is_compressed_st = rdata_Q[0][1:0] != 2'b11; - - //---------------------------------------------------------------------------- - // instruction aligner (if unaligned) - //---------------------------------------------------------------------------- + // ---------------- + // Input Registers + // ---------------- always_comb begin - // serve the aligned case even though the output address is unaligned when - // the next instruction will be from a hardware loop target - // in this case the current instruction is already prealigned in element 0 - if (out_addr_o[1]) begin - // unaligned case - out_rdata_o = rdata_unaligned; - - if (unaligned_is_compressed) - out_valid_o = valid; - else - out_valid_o = valid_unaligned; - end else begin - // aligned case - out_rdata_o = rdata; - out_valid_o = valid; - end - end - - assign out_addr_o = (valid_Q[0]) ? addr_Q[0] : in_addr_i; - - //---------------------------------------------------------------------------- - // input port - //---------------------------------------------------------------------------- - // we accept data as long as our fifo is not full - // we don't care about clear here as the data will be received one cycle - // later anyway - assign in_ready_o = ~valid_Q[DEPTH-2]; - - //---------------------------------------------------------------------------- - // FIFO management - //---------------------------------------------------------------------------- - always_comb begin - addr_int = addr_Q; - rdata_int = rdata_Q; - valid_int = valid_Q; - - if (in_valid_i) begin - for (int j = 0; j < DEPTH; j++) begin - if (~valid_Q[j]) begin - addr_int[j] = in_addr_i; - rdata_int[j] = in_rdata_i; - valid_int[j] = 1'b1; - break; - end - end + // if we are not ready latch the values + in_addr_n = in_addr_q; + in_rdata_n = in_rdata_q; + in_valid_n = in_rdata_q; + branch_predict_n = branch_predict_q; + // if we are ready to accept new data - do so! + if (out_valid_o) begin + in_addr_n = in_addr_i; + in_rdata_n = in_rdata_i; + in_valid_n = in_valid_i; + branch_predict_n = branch_predict_i; + end + // flush the input registers + if (clear_i) begin + in_valid_n = 1'b0; end end - assign addr_next = {addr_int[0][63:2], 2'b00} + 64'h4; + // -------------- + // FIFO Management + // -------------- + always_comb begin : output_port + // counter + automatic int status_cnt = status_cnt_q; + automatic int write_pointer = write_pointer_q; - // move everything by one step - always_comb begin - addr_n = addr_int; - rdata_n = rdata_int; - valid_n = valid_int; + write_pointer_n = write_pointer_q; + read_pointer_n = read_pointer_q; + mem_n = mem_q; + unaligned_n = unaligned_q; + unaligned_instr_n = unaligned_instr_q; + unaligned_address_n = unaligned_address_q; + // --------------------------------- + // Input port & Instruction Aligner + // --------------------------------- + if (in_valid_i && !unaligned_q) begin + // we got a valid instruction so we can satisfy the unaligned instruction + unaligned_n = 1'b0; + // check if the instruction is compressed + if(in_rdata_i[1:0] != 2'b11) begin + // it is compressed + mem_n[write_pointer_q].branch_predict = branch_predict_q; + mem_n[write_pointer_q].address = in_addr_q; + mem_n[write_pointer_q].instruction = in_rdata_q[15:0]; - if (out_ready_i && out_valid_o) begin - if (addr_int[0][1]) begin - // unaligned case - if (unaligned_is_compressed) begin - addr_n[0] = {addr_next[63:2], 2'b00}; + status_cnt++; + write_pointer++; + // is the second instruction also compressed, like: + // _____________________________________________ + // | compressed 2 [31:16] | compressed 1[15:0] | + // |____________________________________________ + if (in_rdata_i[17:16] != 2'b11) begin + mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; + mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; + mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; + + status_cnt++; + write_pointer++; + // or is it an unaligned 32 bit instruction like + // ____________________________________________________ + // |instr [15:0] | instr [31:16] | compressed 1[15:0] | + // |____________________________________________________ end else begin - addr_n[0] = {addr_next[63:2], 2'b10}; + // we've got an unaligned 32 bit instruction + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + // this does not consume space in the FIFO end - - // shift everything on ene step - for (int i = 0; i < DEPTH - 1; i++) - rdata_n[i] = rdata_int[i + 1]; - - rdata_n[DEPTH - 1] = 32'b0; - - valid_n = {valid_int[1:DEPTH-1], 1'b0}; end else begin - if (aligned_is_compressed) begin - // just increase address, do not move to next entry in FIFO - addr_n[0] = {addr_int[0][63:2], 2'b10}; - end else begin - // move to next entry in FIFO - addr_n[0] = {addr_next[63:2], 2'b00}; - // shift entry - for (int i = 0; i < DEPTH - 1; i++) - rdata_n[i] = rdata_int[i + 1]; - - rdata_n[DEPTH - 1] = 32'b0; - valid_n = {valid_int[1:DEPTH-1], 1'b0}; - end + // this is a full 32 bit instruction like + // _______________________ + // | instruction [31:0] | + // |______________________ + mem_n[write_pointer_q].branch_predict = branch_predict_q; + mem_n[write_pointer_q].address = in_addr_q; + mem_n[write_pointer_q].instruction = in_rdata_q; + status_cnt++; + write_pointer++; end end - // on a clear signal from outside we invalidate the content of the FIFO - // completely and start from an empty state + // we have an outstanding unaligned instruction + if (in_valid_i && unaligned_q) begin + mem_n[write_pointer_q].branch_predict = branch_predict_q; + mem_n[write_pointer_q].address = unaligned_address_q; + mem_n[write_pointer_q].instruction = {in_rdata_q[15:0], unaligned_instr_q}; + status_cnt++; + write_pointer++; + // whats up with the other upper 16 bit of this instruction + // is the second instruction also compressed, like: + // _____________________________________________ + // | compressed 2 [31:16] | compressed 1[15:0] | + // |____________________________________________ + if (in_rdata_i[17:16] != 2'b11) begin + mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; + mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; + mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; + status_cnt++; + write_pointer++; + // unaligned access served + unaligned_n = 1'b0; + // or is it an unaligned 32 bit instruction like + // ____________________________________________________ + // |instr [15:0] | instr [31:16] | compressed 1[15:0] | + // |____________________________________________________ + end else begin + // we've got an unaligned 32 bit instruction + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + // this does not consume space in the FIFO + end + end + + // ------------- + // Output port + // ------------- + // we are ready to accept a new request if we still have two places in the queue + if (out_ready_i) begin + read_pointer_n = read_pointer_q + 1; + status_cnt--; + end + write_pointer_n = write_pointer; + status_cnt_n = status_cnt; + if (clear_i) - valid_n = '0; + status_cnt_n = '0; + end - //---------------------------------------------------------------------------- - // registers - //---------------------------------------------------------------------------- - - always_ff @(posedge clk, negedge rst_n) begin - if(rst_n == 1'b0) begin - addr_Q <= '{default: '0}; - rdata_Q <= '{default: '0}; - valid_Q <= '0; + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + status_cnt_q <= '{default: 0}; + mem_q <= '{default: 0}; + read_pointer_q <= '{default: 0}; + write_pointer_q <= '{default: 0}; + unaligned_q <= 1'b0; + unaligned_instr_q <= 16'b0; + unaligned_address_q <= 64'b0; + // input registers + in_addr_q <= 64'b0; + in_rdata_q <= 32'b0; + in_valid_q <= 1'b0; + branch_predict_q <= '{default: 0}; end else begin - - addr_Q <= addr_n; - rdata_Q <= rdata_n; - valid_Q <= valid_n; - end + status_cnt_q <= status_cnt_n; + mem_q <= mem_n; + read_pointer_q <= read_pointer_n; + write_pointer_q <= write_pointer_n; + unaligned_q <= unaligned_n; + unaligned_instr_q <= unaligned_instr_n; + unaligned_address_q <= unaligned_address_n; + // input registers + in_addr_q <= in_addr_n; + in_rdata_q <= in_rdata_n; + in_valid_q <= in_rdata_n; + branch_predict_q <= branch_predict_n; end end - - //---------------------------------------------------------------------------- - // Assertions - //---------------------------------------------------------------------------- - `ifndef SYNTHESIS - `ifndef VERILATOR - assert property ( - @(posedge clk) (in_valid_i) |-> ((valid_Q[DEPTH-1] == 1'b0) || (clear_i == 1'b1)) ); - `endif - `endif endmodule \ No newline at end of file diff --git a/src/fifo.sv b/src/fifo.sv index 7810465c4..2f70d0566 100644 --- a/src/fifo.sv +++ b/src/fifo.sv @@ -21,19 +21,19 @@ module fifo #( parameter type dtype = logic[63:0], parameter int unsigned DEPTH = 4 )( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic flush_i, // flush the queue + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue // status flags - output logic full_o, // queue is full - output logic empty_o, // queue is empty + output logic full_o, // queue is full + output logic empty_o, // queue is empty output logic single_element_o, // there is just a single element in the queue // as long as the queue is not full we can push new data - input dtype data_i, // data to push into the queue - input logic push_i, // data is valid and can be pushed to the queue + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue // as long as the queue is not empty we can pop new elements - output dtype data_o, // output data - input logic pop_i // pop head from queue + output dtype data_o, // output data + input logic pop_i // pop head from queue ); // pointer to the read and write section of the queue logic [$clog2(DEPTH) - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q; @@ -44,7 +44,7 @@ module fifo #( assign full_o = (status_cnt_q == DEPTH); assign empty_o = (status_cnt_q == 0); - assign single_element_o = (status_cnt_q == 1); + assign single_element_o = (status_cnt_q == 1); // read and write queue logic always_comb begin : read_write_comb // default assignment diff --git a/src/id_stage.sv b/src/id_stage.sv index a2c251a43..f7498ff97 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -34,7 +34,6 @@ module id_stage #( input logic [31:0] instruction_i, input logic instruction_valid_i, output logic decoded_instr_ack_o, - input logic is_compressed_i, input logic [63:0] pc_if_i, input exception ex_if_i, // we already got an exception in IF @@ -97,6 +96,11 @@ module id_stage #( logic issue_instr_valid_sb_iro; logic issue_ack_iro_sb; // --------------------------------------------------- + // Compressed Decoder <-> Decoder + // --------------------------------------------------- + logic [31:0] instruction_decompressed; + logic instructio_compressed; + // --------------------------------------------------- // Decoder (DC) <-> Scoreboard (SB) // --------------------------------------------------- scoreboard_entry decoded_instr_dc_sb; @@ -129,10 +133,22 @@ module id_stage #( // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i.valid == 1) assign ready_o = ~full && (~unresolved_branch_q || resolved_branch_i.valid); + // compressed instruction decoding, or more precisely compressed instruction + // expander + // + // since it does not matter where we decompress instructions, we do it here + // to ease timing closure + compressed_decoder compressed_decoder_i ( + .instr_i ( instruction_i ), + .instr_o ( instruction_decompressed ), + .is_compressed_o ( instr_compressed ), + .illegal_instr_o ( ) // TODO + ); + decoder decoder_i ( .pc_i ( pc_if_i ), - .is_compressed_i ( is_compressed_i ), - .instruction_i ( instruction_i ), + .is_compressed_i ( instr_compressed ), + .instruction_i ( instruction_decompressed ), .ex_i ( ex_if_i ), .instruction_o ( decoded_instr_dc_sb ), .is_control_flow_instr_o ( is_control_flow_instr ), diff --git a/src/if_stage.sv b/src/if_stage.sv index 2303db867..c66368970 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -32,13 +32,11 @@ module if_stage ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, - input logic req_i, // request new instructions output logic if_busy_o, // is the IF stage busy fetching instructions? input logic id_ready_i, - input logic halt_if_i, // pipeline stall // ctrl flow instruction in input logic [63:0] fetch_addr_i, - input logic set_pc_i, // set new PC + input logic pc_if_valid_i, input logic is_branch_i, // the new PC was a branch e.g.: branch or jump // branchpredict out output branchpredict_sbe branch_predict_o, @@ -52,60 +50,30 @@ module if_stage ( output logic instr_valid_id_o, // instruction in IF/ID pipeline is valid output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding input logic instr_ack_i, - output logic is_compressed_id_o, // compressed decoder thinks this is a compressed instruction - output logic illegal_c_insn_id_o, // compressed decoder thinks this is an invalid instruction - output logic [63:0] pc_if_o, output logic [63:0] pc_id_o, output exception ex_o ); - logic if_ready, if_valid; - logic branch_req; - logic valid; - logic prefetch_busy; - logic fetch_valid; - logic fetch_ready; - logic [31:0] fetch_rdata; - logic [63:0] fetch_addr; // branch predict registers logic branch_valid_n, branch_valid_q; logic [63:0] predict_address_n, predict_address_q; logic predict_taken_n, predict_taken_q; - // offset FSM - enum logic[1:0] {WAIT, IDLE, WAIT_BRANCHED} offset_fsm_cs, offset_fsm_ns; - logic [31:0] instr_decompressed; - logic illegal_c_insn; - logic instr_compressed_int; - - // compressed instruction decoding, or more precisely compressed instruction - // expander - // - // since it does not matter where we decompress instructions, we do it here - // to ease timing closure - compressed_decoder compressed_decoder_i ( - .instr_i ( fetch_rdata ), - .instr_o ( instr_decompressed ), - .is_compressed_o ( instr_compressed_int ), - .illegal_instr_o ( illegal_c_insn ) - ); - // Pre-fetch buffer, caches a fixed number of instructions prefetch_buffer prefetch_buffer_i ( .clk ( clk_i ), .rst_n ( rst_ni ), .flush_i ( flush_i ), - .req_i ( req_i ), - .branch_i ( branch_req ), // kill everything - .addr_i ( {fetch_addr_i[63:1], 1'b0} ), + .fetch_addr_i ( {fetch_addr_i[63:1], 1'b0} ), + .fetch_valid_i ( pc_if_valid_i ), - .ready_i ( fetch_ready ), + .ready_i ( instr_ack_i ), .valid_o ( fetch_valid ), - .rdata_o ( fetch_rdata ), - .addr_o ( fetch_addr ), + .rdata_o ( instr_rdata_id_o ), + .addr_o ( pc_id_o ), // goes to instruction memory / instruction cache .instr_req_o ( instr_req_o ), @@ -118,136 +86,36 @@ module if_stage ( .busy_o ( prefetch_busy ) ); - // offset FSM state transition logic + assign instr_valid_id_o = fetch_valid & id_ready_i; + assign if_busy_o = prefetch_busy; + always_comb begin - offset_fsm_ns = offset_fsm_cs; - fetch_ready = 1'b0; - branch_req = 1'b0; - valid = 1'b0; + // if (flush_i) begin - unique case (offset_fsm_cs) - // no valid instruction data for ID stage - // assume aligned - IDLE: begin - if (req_i) begin - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end - - // take care of control flow changes - if (set_pc_i) begin - valid = 1'b0; - // switch to new PC from ID stage - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end - end - - // serving aligned 32 bit or 16 bit instruction, we don't know yet - WAIT: begin - if (fetch_valid) begin - valid = 1'b1; // an instruction is ready for ID stage - - if (req_i && if_valid) begin - fetch_ready = 1'b1; - offset_fsm_ns = WAIT; - end - end - - end - - default: begin - offset_fsm_ns = IDLE; - end - endcase - - // take care of control flow changes - if (set_pc_i) begin - valid = 1'b0; - // switch to new PC from PCGEN stage - branch_req = 1'b1; - offset_fsm_ns = WAIT; - end + // end end - - // ------------- - // Branch Logic - // ------------- - // We need to pass those registers on to ID in the case we've set - // a new branch target (or jump) and we got a valid instruction - always_comb begin - // this is the latch case we keep the values - predict_address_n = predict_address_q; - predict_taken_n = predict_taken_q; - branch_valid_n = branch_valid_q; - // a new branch target has been set by PCGEN - // save this in the register stage - if (set_pc_i && is_branch_i) begin - predict_address_n = fetch_addr_i; - // whether we took the branch or not can be seen from the set PC - // nevertheless we also need to keep branches not taken - predict_taken_n = set_pc_i; - branch_valid_n = is_branch_i; - end - - if (if_valid) begin - branch_valid_n = is_branch_i; - end - end - // -------------------------------------------------------------- // IF-ID pipeline registers, frozen when the ID stage is stalled // -------------------------------------------------------------- always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS if (~rst_ni) begin - // offset FSM state - offset_fsm_cs <= IDLE; - instr_valid_id_o <= 1'b0; - instr_rdata_id_o <= '0; - illegal_c_insn_id_o <= 1'b0; - is_compressed_id_o <= 1'b0; - pc_id_o <= '0; ex_o <= '{default: 0}; branch_valid_q <= 1'b0; predict_address_q <= 64'b0; predict_taken_q <= 1'b0; end - else - begin - offset_fsm_cs <= offset_fsm_ns; - predict_address_q <= predict_address_n; - predict_taken_q <= predict_taken_n; - branch_valid_q <= branch_valid_n; - - if (if_valid) begin - // in case of a flush simply say that the next instruction - // is not valid anymore - if (flush_i) begin - instr_valid_id_o <= 1'b0; - end else - instr_valid_id_o <= 1'b1; - instr_rdata_id_o <= instr_decompressed; - illegal_c_insn_id_o <= illegal_c_insn; - is_compressed_id_o <= instr_compressed_int; - pc_id_o <= pc_if_o; - ex_o.cause <= 64'b0; // TODO: Output exception - ex_o.tval <= 64'b0; // TODO: Output exception - ex_o.valid <= 1'b0; // TODO: Output exception - // id stage acknowledged - end else if (instr_ack_i) begin - instr_valid_id_o <= 1'b0; - end + else begin + predict_address_q <= predict_address_n; + predict_taken_q <= predict_taken_n; + branch_valid_q <= branch_valid_n; + ex_o.cause <= 64'b0; // TODO: Output exception + ex_o.tval <= 64'b0; // TODO: Output exception + ex_o.valid <= 1'b0; // TODO: Output exception end end - // Assignments - assign pc_if_o = fetch_addr; - - assign if_ready = valid & id_ready_i; - assign if_valid = (~halt_if_i) & if_ready; - assign if_busy_o = prefetch_busy; assign branch_predict_o = {predict_address_q, predict_taken_q, branch_valid_q}; //------------- // Assertions diff --git a/src/pcgen.sv b/src/pcgen.sv index ac7450ac7..0cb51ce70 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -23,13 +23,14 @@ module pcgen ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low + input logic fetch_enable_i, input logic flush_i, - input logic [63:0] pc_if_i, - input branchpredict resolved_branch_i, // from controller signaling a branchpredict -> update BTB + input logic if_ready_i, + input branchpredict resolved_branch_i, // from controller signaling a branchpredict -> update BTB // to IF output logic [63:0] pc_if_o, // new PC - output logic set_pc_o, // request the PC to be set to pc_if_o - output logic is_branch_o, // to check if we branchpredicted we need to save whether this was a branch or not <- LOL + output logic pc_if_valid_o, // the PC is valid + output logic is_branch_o, // global input input logic [63:0] boot_addr_i, // CSR input @@ -43,44 +44,25 @@ module pcgen ( logic [63:0] npc_n, npc_q; logic is_branch; logic is_branch_n, is_branch_q; - logic set_pc_n, set_pc_q; - // pc which is used to look up the prediction in the BTB - logic [63:0] predict_pc; + assign pc_if_o = npc_q; - assign set_pc_o = set_pc_q; assign is_branch_o = is_branch_q; - // Predict PC source select - // the PC which we use for lookup in the BTB can come from two sources: - // 1. PC from if stage plus + 4 - // 2. or PC which we just predicted + 4 - always_comb begin : pc_btb_lookup - // Ad 2: From PC of previous cycle (which is now in IF) - // predict_pc = npc_q; - // // Ad 1: - // // in the previous cycle we set the PC to npc_q - // // calculate the plus one version - // end else begin - predict_pc = {pc_if_i[62:2], 2'b0} + 64'h4; - // end - end - btb #( .NR_ENTRIES(64), .BITS_SATURATION_COUNTER(2) ) btb_i ( - .vpc_i ( predict_pc ), + // Use the PC from last cycle to perform branch lookup + .vpc_i ( npc_q ), .branchpredict_i ( resolved_branch_i ), .is_branch_o ( is_branch ), .predict_taken_o ( predict_taken ), .branch_target_address_o ( branch_target_address ), .* ); - - // TODO: on flush output exception or other things but do not take branch // ------------------- // Next PC // ------------------- @@ -92,23 +74,23 @@ module pcgen ( // 5. Boot address always_comb begin : npc_select // default assignment - npc_n = npc_q; - set_pc_n = 1'b0; - is_branch_n = is_branch; + // default is a consecutive PC + if (if_ready_i && fetch_enable_i) + npc_n = {npc_q[62:2], 2'b0} + 64'h4; + else // or keep the PC stable if IF is not ready + npc_n = npc_q; + + pc_if_valid_o = 1'b0; + is_branch_n = is_branch; - // we already set the PC a cycle earlier - if (set_pc_q) - is_branch_n = 1'b0; // 4. Predict taken - if (is_branch && predict_taken && ~set_pc_q) begin - set_pc_n = 1'b1; - npc_n = branch_target_address; + if (is_branch && predict_taken) begin + npc_n = branch_target_address; end // 1.Debug // 3. Control flow change request if (resolved_branch_i.is_mispredict) begin - set_pc_n = 1'b1; // we already got the correct target address npc_n = resolved_branch_i.target_address; end @@ -120,7 +102,10 @@ module pcgen ( // 3. Return from exception - + // fetch enable + if (fetch_enable_i) begin + pc_if_valid_o = 1'b1; + end end // ------------------- // Sequential Process @@ -128,12 +113,10 @@ module pcgen ( // PCGEN -> IF Register always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin - npc_q <= 64'b0; - set_pc_q <= 1'b0; + npc_q <= boot_addr_i; is_branch_q <= 1'b0; end else begin npc_q <= npc_n; - set_pc_q <= set_pc_n; is_branch_q <= is_branch_n; end end diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index 5dd07ead8..324770286 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -28,10 +28,8 @@ module prefetch_buffer input logic rst_n, input logic flush_i, - input logic req_i, - - input logic branch_i, - input logic [63:0] addr_i, + input logic [63:0] fetch_addr_i, + input logic fetch_valid_i, input logic ready_i, output logic valid_o, @@ -51,9 +49,8 @@ module prefetch_buffer enum logic [1:0] {IDLE, WAIT_GNT, WAIT_RVALID, WAIT_ABORTED } CS, NS; - logic [63:0] instr_addr_q, fetch_addr; logic addr_valid; - + logic [63:0] instr_addr_q; logic fifo_valid; logic fifo_ready; logic fifo_clear; @@ -61,64 +58,50 @@ module prefetch_buffer //--------------------------------- // Prefetch buffer status //--------------------------------- - - assign busy_o = (CS != IDLE) || instr_req_o; + // we are busy if we are either waiting for a grant + // or if the fifo is full + assign busy_o = (CS inside {WAIT_GNT, WAIT_ABORTED}) && fifo_ready; //--------------------------------- // Fetch FIFO // consumes addresses and rdata //--------------------------------- fetch_fifo fifo_i ( - .clk ( clk ), - .rst_n ( rst_n ), + .clk_i ( clk ), + .rst_ni ( rst_n ), - .clear_i ( fifo_clear ), + .clear_i ( flush_i ), .in_addr_i ( instr_addr_q ), .in_rdata_i ( instr_rdata_i ), .in_valid_i ( fifo_valid ), .in_ready_o ( fifo_ready ), - .out_valid_o ( valid_o ), .out_ready_i ( ready_i ), .out_rdata_o ( rdata_o ), .out_addr_o ( addr_o ) ); - - //--------------- - // Fetch address - //--------------- - - assign fetch_addr = {instr_addr_q[63:2], 2'b00} + 64'd4; - assign fifo_clear = branch_i || flush_i; - - - //------------------------- + //-------------------------------------------------- // Instruction fetch FSM // deals with instruction memory / instruction cache - //------------------------- + //-------------------------------------------------- always_comb begin instr_req_o = 1'b0; - instr_addr_o = fetch_addr; + instr_addr_o = fetch_addr_i; fifo_valid = 1'b0; - addr_valid = 1'b0; NS = CS; unique case(CS) // default state, not waiting for requested data - IDLE: - begin - instr_addr_o = fetch_addr; + IDLE: begin + instr_addr_o = fetch_addr_i; instr_req_o = 1'b0; - if (branch_i) - instr_addr_o = addr_i; - - if (req_i & (fifo_ready | branch_i )) begin + if (fifo_ready && fetch_valid_i) begin instr_req_o = 1'b1; addr_valid = 1'b1; @@ -132,16 +115,10 @@ module prefetch_buffer end // case: IDLE // we sent a request but did not yet get a grant - WAIT_GNT: - begin + WAIT_GNT: begin instr_addr_o = instr_addr_q; instr_req_o = 1'b1; - if (branch_i) begin - instr_addr_o = addr_i; - addr_valid = 1'b1; - end - if(instr_gnt_i) NS = WAIT_RVALID; else @@ -150,16 +127,12 @@ module prefetch_buffer // we wait for rvalid, after that we are ready to serve a new request WAIT_RVALID: begin - instr_addr_o = fetch_addr; + instr_addr_o = fetch_addr_i; - if (branch_i) - instr_addr_o = addr_i; - - - if (req_i & (fifo_ready | branch_i)) begin + if (fifo_ready) begin // prepare for next request - if (instr_rvalid_i) begin + if (fifo_ready && fetch_valid_i) begin instr_req_o = 1'b1; fifo_valid = 1'b1; addr_valid = 1'b1; @@ -173,14 +146,12 @@ module prefetch_buffer end else begin // we are requested to abort our current request // we didn't get an rvalid yet, so wait for it - if (branch_i) begin - addr_valid = 1'b1; - NS = WAIT_ABORTED; + if (flush_i) begin + NS = WAIT_ABORTED; end end end else begin // just wait for rvalid and go back to IDLE, no new request - if (instr_rvalid_i) begin fifo_valid = 1'b1; NS = IDLE; @@ -194,11 +165,6 @@ module prefetch_buffer WAIT_ABORTED: begin instr_addr_o = instr_addr_q; - if (branch_i) begin - instr_addr_o = addr_i; - addr_valid = 1'b1; - end - if (instr_rvalid_i) begin instr_req_o = 1'b1; // no need to send address, already done in WAIT_RVALID From 2fdc3da6b8342a670a4498c6548cd98f6f60c1e6 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 May 2017 00:01:18 +0200 Subject: [PATCH 26/43] More complete prefetcher implementation Still a lot of bugs to fix --- src/ariane.sv | 116 +++++++++++++++++++++-------------------- src/fetch_fifo.sv | 57 +++++++++++++------- src/id_stage.sv | 19 ++----- src/if_stage.sv | 97 ++++++++++++++++++++++++---------- src/prefetch_buffer.sv | 2 +- 5 files changed, 170 insertions(+), 121 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index b71bf58b9..340bc5d9c 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -111,10 +111,10 @@ module ariane logic [31:0] instr_rdata_if_id; logic decode_ack_id_if; logic is_compressed_if_id; - logic illegal_c_insn_id_if; logic [63:0] pc_id_if_id; exception exception_if_id; branchpredict_sbe branch_predict_if_id; + logic instr_is_compressed_if_id; // -------------- // ID <-> EX // -------------- @@ -236,24 +236,25 @@ module ariane // IF // --------- if_stage if_stage_i ( - .flush_i ( flush_ctrl_if ), - .pc_if_valid_i ( pc_valid_pcgen_if ), - .if_busy_o ( if_ready_if_pcgen ), - .id_ready_i ( ready_id_if ), - .is_branch_i ( is_branch_pcgen_if ), - .branch_predict_o ( branch_predict_if_id ), - .fetch_addr_i ( pc_pcgen_if ), - .instr_req_o ( fetch_req_if_ex ), - .instr_addr_o ( fetch_vaddr_if_ex ), - .instr_gnt_i ( fetch_gnt_ex_if ), - .instr_rvalid_i ( fetch_valid_ex_if ), - .instr_rdata_i ( fetch_rdata_ex_if ), - .instr_ack_i ( decode_ack_id_if ), + .flush_i ( flush_ctrl_if ), + .pc_if_valid_i ( pc_valid_pcgen_if ), + .if_busy_o ( if_ready_if_pcgen ), + .id_ready_i ( ready_id_if ), + .is_branch_i ( is_branch_pcgen_if ), + .fetch_addr_i ( pc_pcgen_if ), + .instr_req_o ( fetch_req_if_ex ), + .instr_addr_o ( fetch_vaddr_if_ex ), + .instr_gnt_i ( fetch_gnt_ex_if ), + .instr_rvalid_i ( fetch_valid_ex_if ), + .instr_rdata_i ( fetch_rdata_ex_if ), - .instr_valid_id_o ( instr_valid_if_id ), - .instr_rdata_id_o ( instr_rdata_if_id ), - .pc_id_o ( pc_id_if_id ), - .ex_o ( exception_if_id ), + .pc_id_o ( pc_id_if_id ), + .instr_valid_id_o ( instr_valid_if_id ), + .instr_ack_i ( decode_ack_id_if ), + .instr_rdata_id_o ( instr_rdata_if_id ), + .instr_is_compressed_o ( instr_is_compressed_if_id ), + .branch_predict_o ( branch_predict_if_id ), + .ex_o ( exception_if_id ), .* ); // --------- @@ -265,54 +266,55 @@ module ariane .NR_WB_PORTS ( NR_WB_PORTS ) ) id_stage_i ( - .test_en_i ( test_en_i ), - .flush_i ( flush ), - .flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ), - .flush_scoreboard_i ( flush_scoreboard_ctrl_id ), - .instruction_i ( instr_rdata_if_id ), - .instruction_valid_i ( instr_valid_if_id ), - .decoded_instr_ack_o ( decode_ack_id_if ), - .pc_if_i ( pc_id_if_id ), // PC from if - .ex_if_i ( exception_if_id ), // exception from if - .ready_o ( ready_id_if ), + .test_en_i ( test_en_i ), + .flush_i ( flush ), + .flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ), + .flush_scoreboard_i ( flush_scoreboard_ctrl_id ), + .instruction_i ( instr_rdata_if_id ), + .instr_is_compressed_i ( instr_is_compressed_if_id ), + .instruction_valid_i ( instr_valid_if_id ), + .decoded_instr_ack_o ( decode_ack_id_if ), + .pc_if_i ( pc_id_if_id ), // PC from if + .ex_if_i ( exception_if_id ), // exception from if + .ready_o ( ready_id_if ), // Functional Units - .operator_o ( operator_id_ex ), - .operand_a_o ( operand_a_id_ex ), - .operand_b_o ( operand_b_id_ex ), - .operand_c_o ( operand_c_id_ex ), - .imm_o ( imm_id_ex ), - .trans_id_o ( trans_id_id_ex ), - .pc_o ( pc_id_ex ), - .is_compressed_instr_o ( is_compressed_instr_id_ex ), + .operator_o ( operator_id_ex ), + .operand_a_o ( operand_a_id_ex ), + .operand_b_o ( operand_b_id_ex ), + .operand_c_o ( operand_c_id_ex ), + .imm_o ( imm_id_ex ), + .trans_id_o ( trans_id_id_ex ), + .pc_o ( pc_id_ex ), + .is_compressed_instr_o ( is_compressed_instr_id_ex ), // ALU - .alu_ready_i ( alu_ready_ex_id ), - .alu_valid_o ( alu_valid_id_ex ), + .alu_ready_i ( alu_ready_ex_id ), + .alu_valid_o ( alu_valid_id_ex ), // Branches and Jumps - .branch_valid_o ( branch_valid_id_ex ), // branch is valid - .branch_predict_i ( branch_predict_if_id ), // branch predict from if - .branch_predict_o ( branch_predict_id_ex ), // branch predict to ex - .resolved_branch_i ( resolved_branch ), // in order to resolve the branch + .branch_valid_o ( branch_valid_id_ex ), // branch is valid + .branch_predict_i ( branch_predict_if_id ), // branch predict from if + .branch_predict_o ( branch_predict_id_ex ), // branch predict to ex + .resolved_branch_i ( resolved_branch ), // in order to resolve the branch // LSU - .lsu_ready_i ( lsu_ready_ex_id ), - .lsu_valid_o ( lsu_valid_id_ex ), + .lsu_ready_i ( lsu_ready_ex_id ), + .lsu_valid_o ( lsu_valid_id_ex ), // Multiplier - .mult_ready_i ( mult_ready_ex_id ), - .mult_valid_o ( mult_valid_id_ex ), + .mult_ready_i ( mult_ready_ex_id ), + .mult_valid_o ( mult_valid_id_ex ), // CSR - .csr_ready_i ( csr_ready_ex_id ), - .csr_valid_o ( csr_valid_id_ex ), + .csr_ready_i ( csr_ready_ex_id ), + .csr_valid_o ( csr_valid_id_ex ), - .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, csr_trans_id_ex_id }), - .wdata_i ( {alu_result_ex_id, lsu_result_ex_id, csr_result_ex_id }), - .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception){1'b0}} }), - .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, csr_valid_ex_id }), + .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, csr_trans_id_ex_id }), + .wdata_i ( {alu_result_ex_id, lsu_result_ex_id, csr_result_ex_id }), + .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception){1'b0}} }), + .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, csr_valid_ex_id }), - .waddr_a_i ( waddr_a_commit_id ), - .wdata_a_i ( wdata_a_commit_id ), - .we_a_i ( we_a_commit_id ), + .waddr_a_i ( waddr_a_commit_id ), + .wdata_a_i ( wdata_a_commit_id ), + .we_a_i ( we_a_commit_id ), - .commit_instr_o ( commit_instr_id_commit ), - .commit_ack_i ( commit_ack_commit_id ), + .commit_instr_o ( commit_instr_id_commit ), + .commit_ack_i ( commit_ack_commit_id ), .* ); // --------- diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index f8e5d44f7..83ed497d9 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -63,10 +63,10 @@ module fetch_fifo fetch_entry mem_n[DEPTH-1:0], mem_q[DEPTH-1:0]; logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q; logic [$clog2(DEPTH)-1:0] write_pointer_n, write_pointer_q; - int unsigned status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool + int unsigned status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool // status signals - logic full, empty; + logic full, empty, two_left; // the last instruction was unaligned logic unaligned_n, unaligned_q; // save the unaligned part of the instruction to this ff @@ -76,15 +76,14 @@ module fetch_fifo // we always need two empty places // as it could happen that we get two compressed instructions/cycle - assign full = (status_cnt_q == DEPTH - 2); + assign full = (status_cnt_q >= DEPTH - 2); + assign one_left = (status_cnt_q == DEPTH - 1); // two spaces are left assign empty = (status_cnt_q == 0); - assign out_valid_o = ~empty; - assign in_ready_o = ~full; - - // Output assignments - assign branch_predict_o = mem_q[read_pointer_q].branch_predict; - assign out_addr_o = mem_q[read_pointer_q].address; - assign out_rdata_o = mem_q[read_pointer_q].instruction; + // the output is valid if we are either empty or just got an valid + assign out_valid_o = !empty || in_valid_q; + // we need space for at least two instructions: the full flag is conditioned on that + // but if we pop in the current cycle and we have one place left we can still fit two instructions alt + assign in_ready_o = !full || (out_ready_i && one_left); // ---------------- // Input Registers @@ -93,10 +92,10 @@ module fetch_fifo // if we are not ready latch the values in_addr_n = in_addr_q; in_rdata_n = in_rdata_q; - in_valid_n = in_rdata_q; + in_valid_n = 1'b0; branch_predict_n = branch_predict_q; // if we are ready to accept new data - do so! - if (out_valid_o) begin + if (in_ready_o) begin in_addr_n = in_addr_i; in_rdata_n = in_rdata_i; in_valid_n = in_valid_i; @@ -125,11 +124,11 @@ module fetch_fifo // --------------------------------- // Input port & Instruction Aligner // --------------------------------- - if (in_valid_i && !unaligned_q) begin + if (in_valid_q && !unaligned_q) begin // we got a valid instruction so we can satisfy the unaligned instruction unaligned_n = 1'b0; // check if the instruction is compressed - if(in_rdata_i[1:0] != 2'b11) begin + if (in_rdata_q[1:0] != 2'b11) begin // it is compressed mem_n[write_pointer_q].branch_predict = branch_predict_q; mem_n[write_pointer_q].address = in_addr_q; @@ -141,7 +140,7 @@ module fetch_fifo // _____________________________________________ // | compressed 2 [31:16] | compressed 1[15:0] | // |____________________________________________ - if (in_rdata_i[17:16] != 2'b11) begin + if (in_rdata_q[17:16] != 2'b11) begin mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; @@ -175,7 +174,7 @@ module fetch_fifo end end // we have an outstanding unaligned instruction - if (in_valid_i && unaligned_q) begin + if (in_valid_q && unaligned_q) begin mem_n[write_pointer_q].branch_predict = branch_predict_q; mem_n[write_pointer_q].address = unaligned_address_q; mem_n[write_pointer_q].instruction = {in_rdata_q[15:0], unaligned_instr_q}; @@ -186,7 +185,7 @@ module fetch_fifo // _____________________________________________ // | compressed 2 [31:16] | compressed 1[15:0] | // |____________________________________________ - if (in_rdata_i[17:16] != 2'b11) begin + if (in_rdata_q[17:16] != 2'b11) begin mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; @@ -214,10 +213,30 @@ module fetch_fifo // Output port // ------------- // we are ready to accept a new request if we still have two places in the queue - if (out_ready_i) begin + + // Output assignments + branch_predict_o = mem_q[read_pointer_q].branch_predict; + out_addr_o = mem_q[read_pointer_q].address; + out_rdata_o = mem_q[read_pointer_q].instruction; + + // pass-through if queue is empty but we are currently expanding or re-aligning an instruction + if (empty && in_valid_q && out_ready_i) begin + // we either have a full 32 bit instruction a compressed 16 bit instruction + branch_predict_o = branch_predict_q; + out_addr_o = in_addr_q; + // depending on whether the instruction is compressed or not output the correct thing + if (in_rdata_q[1:0] != 2'b11) + out_rdata_o = in_rdata_q[15:0]; + else + out_rdata_o = in_rdata_q; + // regular read but do not issue if we are already empty + // this can happen since we have an output latch in the IF stage and the ID stage will only know a cycle + // later that we do not have any valid instructions anymore + end else if (out_ready_i && !empty) begin read_pointer_n = read_pointer_q + 1; status_cnt--; end + write_pointer_n = write_pointer; status_cnt_n = status_cnt; @@ -251,7 +270,7 @@ module fetch_fifo // input registers in_addr_q <= in_addr_n; in_rdata_q <= in_rdata_n; - in_valid_q <= in_rdata_n; + in_valid_q <= in_valid_n; branch_predict_q <= branch_predict_n; end end diff --git a/src/id_stage.sv b/src/id_stage.sv index f7498ff97..c43dfffd7 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -32,6 +32,7 @@ module id_stage #( input logic flush_scoreboard_i, // from IF input logic [31:0] instruction_i, + input logic instr_is_compressed_i, input logic instruction_valid_i, output logic decoded_instr_ack_o, input logic [63:0] pc_if_i, @@ -98,8 +99,6 @@ module id_stage #( // --------------------------------------------------- // Compressed Decoder <-> Decoder // --------------------------------------------------- - logic [31:0] instruction_decompressed; - logic instructio_compressed; // --------------------------------------------------- // Decoder (DC) <-> Scoreboard (SB) // --------------------------------------------------- @@ -133,22 +132,10 @@ module id_stage #( // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i.valid == 1) assign ready_o = ~full && (~unresolved_branch_q || resolved_branch_i.valid); - // compressed instruction decoding, or more precisely compressed instruction - // expander - // - // since it does not matter where we decompress instructions, we do it here - // to ease timing closure - compressed_decoder compressed_decoder_i ( - .instr_i ( instruction_i ), - .instr_o ( instruction_decompressed ), - .is_compressed_o ( instr_compressed ), - .illegal_instr_o ( ) // TODO - ); - decoder decoder_i ( .pc_i ( pc_if_i ), - .is_compressed_i ( instr_compressed ), - .instruction_i ( instruction_decompressed ), + .is_compressed_i ( instr_is_compressed_i ), + .instruction_i ( instruction_i ), .ex_i ( ex_if_i ), .instruction_o ( decoded_instr_dc_sb ), .is_control_flow_instr_o ( is_control_flow_instr ), diff --git a/src/if_stage.sv b/src/if_stage.sv index c66368970..f9dde15d6 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -38,8 +38,6 @@ module if_stage ( input logic [63:0] fetch_addr_i, input logic pc_if_valid_i, input logic is_branch_i, // the new PC was a branch e.g.: branch or jump - // branchpredict out - output branchpredict_sbe branch_predict_o, // instruction cache interface output logic instr_req_o, output logic [63:0] instr_addr_o, @@ -47,19 +45,43 @@ module if_stage ( input logic instr_rvalid_i, input logic [31:0] instr_rdata_i, // Output of IF Pipeline stage - output logic instr_valid_id_o, // instruction in IF/ID pipeline is valid - output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding - input logic instr_ack_i, output logic [63:0] pc_id_o, - output exception ex_o + output logic instr_valid_id_o, // instruction in IF/ID pipeline is valid + input logic instr_ack_i, + output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding + output logic instr_is_compressed_o, + output exception ex_o, + output branchpredict_sbe branch_predict_o // branchpredict out ); - + // output logic illegal_compressed_instr_o -> in exception logic fetch_valid; + logic [31:0] instr_rdata; + logic instr_is_compressed; + logic [31:0] decompressed_instruction; + logic [63:0] addr_o; + logic illegal_compressed_instr; + logic prefetch_busy; + // --------------------- + // IF <-> ID Registers + // --------------------- + logic [63:0] pc_id_n, pc_id_q; + logic instr_valid_id_n, instr_valid_id_q; + logic [31:0] instr_rdata_id_n, instr_rdata_id_q; + logic instr_is_compressed_n, instr_is_compressed_q; // branch predict registers - logic branch_valid_n, branch_valid_q; - logic [63:0] predict_address_n, predict_address_q; - logic predict_taken_n, predict_taken_q; + logic branch_valid_n, branch_valid_q; + logic [63:0] predict_address_n, predict_address_q; + logic predict_taken_n, predict_taken_q; + + // compressed instruction decoding, or more precisely compressed instruction expander + // since it does not matter where we decompress instructions, we do it here to ease timing closure + compressed_decoder compressed_decoder_i ( + .instr_i ( instr_rdata ), + .instr_o ( decompressed_instruction ), + .is_compressed_o ( instr_is_compressed ), + .illegal_instr_o ( illegal_compressed_instr ) + ); // Pre-fetch buffer, caches a fixed number of instructions prefetch_buffer prefetch_buffer_i ( @@ -72,8 +94,8 @@ module if_stage ( .ready_i ( instr_ack_i ), .valid_o ( fetch_valid ), - .rdata_o ( instr_rdata_id_o ), - .addr_o ( pc_id_o ), + .rdata_o ( instr_rdata ), + .addr_o ( addr_o ), // goes to instruction memory / instruction cache .instr_req_o ( instr_req_o ), @@ -86,33 +108,52 @@ module if_stage ( .busy_o ( prefetch_busy ) ); - assign instr_valid_id_o = fetch_valid & id_ready_i; assign if_busy_o = prefetch_busy; + assign pc_id_o = pc_id_q; + assign instr_valid_id_o = instr_valid_id_q; + assign instr_rdata_id_o = instr_rdata_id_q; + assign instr_is_compressed_o = instr_is_compressed_q; + // Pipeline registers always_comb begin + // Instruction is valid + pc_id_n = addr_o; + instr_valid_id_n = fetch_valid; + instr_rdata_id_n = decompressed_instruction; + instr_is_compressed_n = instr_is_compressed; - // if (flush_i) begin - - // end + if (flush_i) begin + instr_valid_id_n = 1'b0; + end + // exception forwarding in here end + // -------------------------------------------------------------- // IF-ID pipeline registers, frozen when the ID stage is stalled // -------------------------------------------------------------- always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS if (~rst_ni) begin - ex_o <= '{default: 0}; - branch_valid_q <= 1'b0; - predict_address_q <= 64'b0; - predict_taken_q <= 1'b0; - end - else begin - predict_address_q <= predict_address_n; - predict_taken_q <= predict_taken_n; - branch_valid_q <= branch_valid_n; + ex_o <= '{default: 0}; + branch_valid_q <= 1'b0; + predict_address_q <= 64'b0; + predict_taken_q <= 1'b0; + pc_id_q <= 64'b0; + instr_valid_id_q <= 1'b0; + instr_rdata_id_q <= 32'b0; + instr_is_compressed_q <= 1'b0; + end else begin + pc_id_q <= pc_id_n; + instr_valid_id_q <= instr_valid_id_n; + instr_rdata_id_q <= instr_rdata_id_n; + instr_is_compressed_q <= instr_is_compressed_n; - ex_o.cause <= 64'b0; // TODO: Output exception - ex_o.tval <= 64'b0; // TODO: Output exception - ex_o.valid <= 1'b0; // TODO: Output exception + predict_address_q <= predict_address_n; + predict_taken_q <= predict_taken_n; + branch_valid_q <= branch_valid_n; + + ex_o.cause <= 64'b0; // TODO: Output exception + ex_o.tval <= 64'b0; // TODO: Output exception + ex_o.valid <= 1'b0; //illegal_compressed_instr; // TODO: Output exception end end diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index 324770286..ee8bd7a16 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -60,7 +60,7 @@ module prefetch_buffer //--------------------------------- // we are busy if we are either waiting for a grant // or if the fifo is full - assign busy_o = (CS inside {WAIT_GNT, WAIT_ABORTED}) && fifo_ready; + assign busy_o = (CS inside {WAIT_GNT, WAIT_ABORTED}) || !fifo_ready; //--------------------------------- // Fetch FIFO From e3c7a439dd651f987fed61da1704c6fa96d1e752 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 May 2017 01:18:58 +0200 Subject: [PATCH 27/43] :bug: Fixes in fetch fifo, correct resource util --- src/fetch_fifo.sv | 54 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 83ed497d9..8f5c29924 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -59,6 +59,8 @@ module fetch_fifo logic [63:0] in_addr_n, in_addr_q; logic [31:0] in_rdata_n, in_rdata_q; logic in_valid_n, in_valid_q; + // this bit indicates whether there is a instruction waiting in the pipeline register or not + logic pipelein_register_valid_n, pipelein_register_valid_q; fetch_entry mem_n[DEPTH-1:0], mem_q[DEPTH-1:0]; logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q; @@ -66,7 +68,7 @@ module fetch_fifo int unsigned status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool // status signals - logic full, empty, two_left; + logic full, empty, one_left; // the last instruction was unaligned logic unaligned_n, unaligned_q; // save the unaligned part of the instruction to this ff @@ -76,10 +78,10 @@ module fetch_fifo // we always need two empty places // as it could happen that we get two compressed instructions/cycle - assign full = (status_cnt_q >= DEPTH - 2); + assign full = (status_cnt_q > DEPTH - 2); assign one_left = (status_cnt_q == DEPTH - 1); // two spaces are left assign empty = (status_cnt_q == 0); - // the output is valid if we are either empty or just got an valid + // the output is valid if we are either empty or just got a valid assign out_valid_o = !empty || in_valid_q; // we need space for at least two instructions: the full flag is conditioned on that // but if we pop in the current cycle and we have one place left we can still fit two instructions alt @@ -115,12 +117,13 @@ module fetch_fifo automatic int status_cnt = status_cnt_q; automatic int write_pointer = write_pointer_q; - write_pointer_n = write_pointer_q; - read_pointer_n = read_pointer_q; - mem_n = mem_q; - unaligned_n = unaligned_q; - unaligned_instr_n = unaligned_instr_q; - unaligned_address_n = unaligned_address_q; + write_pointer_n = write_pointer_q; + read_pointer_n = read_pointer_q; + mem_n = mem_q; + unaligned_n = unaligned_q; + unaligned_instr_n = unaligned_instr_q; + unaligned_address_n = unaligned_address_q; + pipelein_register_valid_n = pipelein_register_valid_q; // --------------------------------- // Input port & Instruction Aligner // --------------------------------- @@ -220,29 +223,46 @@ module fetch_fifo out_rdata_o = mem_q[read_pointer_q].instruction; // pass-through if queue is empty but we are currently expanding or re-aligning an instruction - if (empty && in_valid_q && out_ready_i) begin + if (empty && in_valid_q) begin // we either have a full 32 bit instruction a compressed 16 bit instruction branch_predict_o = branch_predict_q; out_addr_o = in_addr_q; // depending on whether the instruction is compressed or not output the correct thing - if (in_rdata_q[1:0] != 2'b11) - out_rdata_o = in_rdata_q[15:0]; - else - out_rdata_o = in_rdata_q; + if (!unaligned_q) begin + if (in_rdata_q[1:0] != 2'b11) + out_rdata_o = {16'b0, in_rdata_q[15:0]}; + else + out_rdata_o = in_rdata_q; + // serve unaligned + end else begin + out_addr_o = unaligned_address_q; + out_rdata_o = {in_rdata_q[15:0], unaligned_instr_q}; + end + // there is currently no valid instruction in the pipeline register push this instruction + if (out_ready_i || !pipelein_register_valid_q) begin + pipelein_register_valid_n = 1'b1; + read_pointer_n = read_pointer_q + 1; + status_cnt--; + end // regular read but do not issue if we are already empty // this can happen since we have an output latch in the IF stage and the ID stage will only know a cycle // later that we do not have any valid instructions anymore - end else if (out_ready_i && !empty) begin + end + + if (out_ready_i && !empty) begin read_pointer_n = read_pointer_q + 1; status_cnt--; end + if (out_ready_i) begin + pipelein_register_valid_n = 1'b0; + end + write_pointer_n = write_pointer; status_cnt_n = status_cnt; if (clear_i) status_cnt_n = '0; - end always_ff @(posedge clk_i or negedge rst_ni) begin @@ -259,6 +279,7 @@ module fetch_fifo in_rdata_q <= 32'b0; in_valid_q <= 1'b0; branch_predict_q <= '{default: 0}; + pipelein_register_valid_q <= 1'b0; end else begin status_cnt_q <= status_cnt_n; mem_q <= mem_n; @@ -272,6 +293,7 @@ module fetch_fifo in_rdata_q <= in_rdata_n; in_valid_q <= in_valid_n; branch_predict_q <= branch_predict_n; + pipelein_register_valid_q <= pipelein_register_valid_n; end end endmodule \ No newline at end of file From 63beafc6ab79808f00a15fab1464f2fda5facc0d Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 May 2017 12:06:29 +0200 Subject: [PATCH 28/43] Increase fetch fifo size in order not to stall --- Makefile | 2 +- src/fetch_fifo.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index b6aeff4c6..91b0f148a 100644 --- a/Makefile +++ b/Makefile @@ -62,7 +62,7 @@ build-interfaces: ${interfaces} # Run the specified test case sim: # vsim${questa_version} ${top_level}_optimized -c -do "run -a" - vsim${questa_version} ${top_level}_optimized +UVM_TESTNAME=${test_case} + vsim${questa_version} ${top_level}_optimized +UVM_TESTNAME=${test_case} -coverage -classdebug -do "do tb/wave/wave_core.do" $(tests): # Optimize top level diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 8f5c29924..91cea8a78 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -48,7 +48,7 @@ module fetch_fifo ); - localparam DEPTH = 4; // must be 3 or greater + localparam DEPTH = 8; // must be a power of two typedef struct packed { branchpredict_sbe branch_predict; logic [63:0] address; @@ -85,7 +85,7 @@ module fetch_fifo assign out_valid_o = !empty || in_valid_q; // we need space for at least two instructions: the full flag is conditioned on that // but if we pop in the current cycle and we have one place left we can still fit two instructions alt - assign in_ready_o = !full || (out_ready_i && one_left); + assign in_ready_o = !full; // ---------------- // Input Registers From f602edb90ea305d6d59cb7acb461044a78e44ff5 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 May 2017 12:51:30 +0200 Subject: [PATCH 29/43] :shirt: Remove various warnings --- include/ariane_pkg.svh | 35 +++++++++++++++++------------------ src/branch_engine.sv | 12 ++++++++---- src/btb.sv | 25 ++++++++++++++----------- src/fetch_fifo.sv | 16 +++++++++------- 4 files changed, 48 insertions(+), 40 deletions(-) diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 8522d7bd9..41a72ea3c 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -34,20 +34,26 @@ package ariane_pkg; typedef struct packed { logic [63:0] cause; // cause of exception logic [63:0] tval; // additional information of causing exception (e.g.: instruction causing it), - // address of ld/st fault + // address of LD/ST fault logic valid; } exception; // branch-predict + // this is the struct we get back from ex stage and we will use it to update + // all the necessary data structures typedef struct packed { logic [63:0] pc; // pc of predict or mis-predict logic [63:0] target_address; // target address at which to jump, or not logic is_mispredict; // set if this was a mis-predict logic is_taken; // branch is taken + logic is_lower_16; // branch instruction is compressed and resides + // in the lower 16 bit of the word logic valid; // prediction with all its values is valid } branchpredict; // branchpredict scoreboard entry + // this is the struct which we will inject into the pipeline to guide the various + // units towards the correct branch decision and resolve typedef struct packed { logic [63:0] predict_address_i; // target address at which to jump, or not logic predict_taken_i; // set if this was a mis-predict @@ -114,7 +120,7 @@ package ariane_pkg; logic [14:12] funct3; logic [11:7] rd; logic [6:0] opcode; - } rtype; + } rtype_t; typedef struct packed { logic [31:20] imm; @@ -122,7 +128,7 @@ package ariane_pkg; logic [14:12] funct3; logic [11:7] rd; logic [6:0] opcode; - } itype; + } itype_t; typedef struct packed { logic [31:25] imm1; @@ -131,27 +137,21 @@ package ariane_pkg; logic [14:12] funct3; logic [11:7] imm0; logic [6:0] opcode; - } stype; + } stype_t; typedef struct packed { logic [31:12] funct3; logic [11:7] rd; logic [6:0] opcode; - } utype; + } utype_t; - // for some reason verilator complains about this union - // since I am not using it for simulation anyway and linting only - // it is not too bad to deactivate it, but a future me (or you) - // should look into that more thoroughly - `ifndef verilator typedef union packed { - logic [31:0] instr; - rtype rtype; - itype itype; - stype stype; - utype utype; + logic [31:0] instr; + rtype_t rtype; + itype_t itype; + stype_t stype; + utype_t utype; } instruction; - `endif // -------------------- // Opcodes @@ -247,10 +247,9 @@ package ariane_pkg; logic [7:0] address; } csr_addr_t; - // `ifndef VERILATOR typedef union packed { csr_reg_t address; csr_addr_t csr_decode; } csr_t; - // `endif + endpackage diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 0c6c350c2..46b3b90f1 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -31,8 +31,8 @@ module branch_engine ( input logic valid_i, input branchpredict_sbe branch_predict_i, // this is the address we predicted - output branchpredict resolved_branch_o, // this is the actual address we are targeting - output exception branch_ex_o // branch exception out + output branchpredict resolved_branch_o, // this is the actual address we are targeting + output exception branch_ex_o // branch exception out ); logic [63:0] target_address; logic [63:0] next_pc; @@ -69,7 +69,7 @@ module branch_engine ( // calculate next PC, depending on whether the instruction is compressed or not this may be different next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); // calculate target address simple 64 bit addition - target_address = $signed(operand_c_i) + $signed(imm_i); + target_address = $unsigned($signed(operand_c_i) + $signed(imm_i)); // save PC - we need this to get the target row in the branch target buffer // we play this trick with the branch instruction which wraps a byte boundary: // |---------- Place the prediction on this PC @@ -79,7 +79,11 @@ module branch_engine ( // |____________________________________________________ // This will relief the prefetcher to re-fetch partially fetched unaligned branch instructions e.g.: // we don't have a back arch between prefetcher and decoder/instruction FIFO. - resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : (pc_i[63:2] + 64'h4); + resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4); + // save if the branch instruction was in the lower 16 bit of the instruction word + // the first case is a compressed instruction which is in slot 0 + // the other case is a misaligned uncompressed instruction which we only predict in the next cycle (see notes above) + resolved_branch_o.is_lower_16 = (is_compressed_instr_i && pc_i[1] == 1'b0) || (!is_compressed_instr_i && pc_i[1] == 1'b1); // write target address which goes to pc gen resolved_branch_o.target_address = (comparison_result) ? target_address : next_pc; resolved_branch_o.is_taken = comparison_result; diff --git a/src/btb.sv b/src/btb.sv index 78ef5e603..ce04f5ba0 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -43,6 +43,7 @@ module btb #( logic valid; logic [63:0] target_address; logic [BITS_SATURATION_COUNTER-1:0] saturation_counter; + logic is_lower_16; } btb_n [NR_ENTRIES-1:0], btb_q [NR_ENTRIES-1:0]; logic [$clog2(NR_ENTRIES)-1:0] index, update_pc; @@ -55,36 +56,38 @@ module btb #( assign index = vpc_i[$clog2(NR_ENTRIES) + OFFSET - 1:OFFSET]; // we combinatorially predict the branch and the target address - assign is_branch_o = btb_q[$unsigned(index)].valid; - assign predict_taken_o = btb_q[$unsigned(index)].saturation_counter[BITS_SATURATION_COUNTER-1]; - assign branch_target_address_o = btb_q[$unsigned(index)].target_address; + assign is_branch_o = btb_q[index].valid; + assign predict_taken_o = btb_q[index].saturation_counter[BITS_SATURATION_COUNTER-1]; + assign branch_target_address_o = btb_q[index].target_address; // update on a mis-predict always_comb begin : update_branchpredict btb_n = btb_q; - saturation_counter = btb_q[$unsigned(update_pc)].saturation_counter; + saturation_counter = btb_q[update_pc].saturation_counter; if (branchpredict_i.valid) begin - btb_n[$unsigned(update_pc)].valid = 1'b1; + btb_n[update_pc].valid = 1'b1; // update saturation counter // first check if counter is already saturated in the positive regime e.g.: branch taken if (saturation_counter == {BITS_SATURATION_COUNTER{1'b1}}) begin // we can safely decrease it if (~branchpredict_i.is_taken) - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter - 1; + btb_n[update_pc].saturation_counter = saturation_counter - 1; // then check if it saturated in the negative regime e.g.: branch not taken end else if (saturation_counter == {BITS_SATURATION_COUNTER{1'b0}}) begin // we can safely increase it if (branchpredict_i.is_taken) - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; + btb_n[update_pc].saturation_counter = saturation_counter + 1; end else begin // otherwise we are not in any boundaries and can decrease or increase it if (branchpredict_i.is_taken) - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; + btb_n[update_pc].saturation_counter = saturation_counter + 1; else - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter - 1; + btb_n[update_pc].saturation_counter = saturation_counter - 1; end // the target address is simply updated - btb_n[$unsigned(update_pc)].target_address = branchpredict_i.target_address; + btb_n[update_pc].target_address = branchpredict_i.target_address; + // as is the information whether this was a compressed branch + btb_n[update_pc].is_lower_16 = branchpredict_i.is_lower_16; end end @@ -93,7 +96,7 @@ module btb #( if(~rst_ni) begin // Bias the branches to be taken upon first arrival for (int i = 0; i < NR_ENTRIES; i++) - btb_q[i] <= '{1'b0, 64'b0, 2'b10}; + btb_q[i] <= '{1'b0, 64'b0, 2'b10, 1'b0}; end else begin // evict all entries if (flush_i) begin diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 91cea8a78..ba39d1713 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -40,7 +40,7 @@ module fetch_fifo input logic in_valid_i, output logic in_ready_o, // output port - output branchpredict_sbe [1:0] branch_predict_o, + output branchpredict_sbe branch_predict_o, output logic [63:0] out_addr_o, output logic [31:0] out_rdata_o, output logic out_valid_o, @@ -65,7 +65,7 @@ module fetch_fifo fetch_entry mem_n[DEPTH-1:0], mem_q[DEPTH-1:0]; logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q; logic [$clog2(DEPTH)-1:0] write_pointer_n, write_pointer_q; - int unsigned status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool + logic [$clog2(DEPTH)-1:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool // status signals logic full, empty, one_left; @@ -78,9 +78,11 @@ module fetch_fifo // we always need two empty places // as it could happen that we get two compressed instructions/cycle + /* verilator lint_off WIDTH */ assign full = (status_cnt_q > DEPTH - 2); assign one_left = (status_cnt_q == DEPTH - 1); // two spaces are left assign empty = (status_cnt_q == 0); + /* verilator lint_on WIDTH */ // the output is valid if we are either empty or just got a valid assign out_valid_o = !empty || in_valid_q; // we need space for at least two instructions: the full flag is conditioned on that @@ -114,8 +116,8 @@ module fetch_fifo // -------------- always_comb begin : output_port // counter - automatic int status_cnt = status_cnt_q; - automatic int write_pointer = write_pointer_q; + automatic logic [$clog2(DEPTH)-1:0] status_cnt = status_cnt_q; + automatic logic [$clog2(DEPTH)-1:0] write_pointer = write_pointer_q; write_pointer_n = write_pointer_q; read_pointer_n = read_pointer_q; @@ -135,7 +137,7 @@ module fetch_fifo // it is compressed mem_n[write_pointer_q].branch_predict = branch_predict_q; mem_n[write_pointer_q].address = in_addr_q; - mem_n[write_pointer_q].instruction = in_rdata_q[15:0]; + mem_n[write_pointer_q].instruction = {16'b0, in_rdata_q[15:0]}; status_cnt++; write_pointer++; @@ -146,7 +148,7 @@ module fetch_fifo if (in_rdata_q[17:16] != 2'b11) begin mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; - mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; + mem_n[write_pointer_q + 1].instruction = {16'b0, in_rdata_q[31:16]}; status_cnt++; write_pointer++; @@ -191,7 +193,7 @@ module fetch_fifo if (in_rdata_q[17:16] != 2'b11) begin mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; - mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; + mem_n[write_pointer_q + 1].instruction = {16'b0, in_rdata_q[31:16]}; status_cnt++; write_pointer++; // unaligned access served From 5fe47f078602b4aec9dea7ef4a23f5241c12e023 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 May 2017 13:59:51 +0200 Subject: [PATCH 30/43] Implement branch prediction path --- include/ariane_pkg.svh | 8 ++-- src/ariane.sv | 54 ++++++++++----------- src/branch_engine.sv | 4 +- src/btb.sv | 37 +++++++-------- src/fetch_fifo.sv | 40 ++++++++++------ src/if_stage.sv | 105 +++++++++++++++++------------------------ src/pcgen.sv | 75 +++++++++++++---------------- src/prefetch_buffer.sv | 70 ++++++++++++++------------- 8 files changed, 190 insertions(+), 203 deletions(-) diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 41a72ea3c..6ec3f40eb 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -55,9 +55,11 @@ package ariane_pkg; // this is the struct which we will inject into the pipeline to guide the various // units towards the correct branch decision and resolve typedef struct packed { - logic [63:0] predict_address_i; // target address at which to jump, or not - logic predict_taken_i; // set if this was a mis-predict - logic valid; // branch is taken + logic [63:0] predict_address; // target address at which to jump, or not + logic predict_taken; // branch is taken + logic is_lower_16; // branch instruction is compressed and resides + // in the lower 16 bit of the word + logic valid; // this is a valid hint } branchpredict_sbe; typedef enum logic[3:0] { diff --git a/src/ariane.sv b/src/ariane.sv index 340bc5d9c..881892324 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -88,11 +88,11 @@ module ariane // -------------- // PCGEN <-> IF // -------------- - logic [63:0] pc_pcgen_if; - logic set_pc_pcgen_if; - logic is_branch_pcgen_if; + logic [63:0] fetch_address_pcgen_if; + logic set_fetch_address_pcgen_if; + branchpredict_sbe branch_predict_pcgen_if; logic if_ready_if_pcgen; - logic pc_valid_pcgen_if; + logic fetch_valid_pcgen_if; // -------------- // PCGEN <-> EX // -------------- @@ -111,7 +111,7 @@ module ariane logic [31:0] instr_rdata_if_id; logic decode_ack_id_if; logic is_compressed_if_id; - logic [63:0] pc_id_if_id; + logic [63:0] pc_if_id; exception exception_if_id; branchpredict_sbe branch_predict_if_id; logic instr_is_compressed_if_id; @@ -223,9 +223,9 @@ module ariane .flush_i ( flush ), .if_ready_i ( ~if_ready_if_pcgen ), .resolved_branch_i ( resolved_branch ), - .pc_if_o ( pc_pcgen_if ), - .pc_if_valid_o ( pc_valid_pcgen_if ), - .is_branch_o ( is_branch_pcgen_if ), + .fetch_address_o ( fetch_address_pcgen_if ), + .fetch_valid_o ( fetch_valid_pcgen_if ), + .branch_predict_o ( branch_predict_pcgen_if ), .boot_addr_i ( boot_addr_i ), .epc_i ( epc_commit_pcgen ), .trap_vector_base_i ( trap_vector_base_commit_pcgen ), @@ -236,25 +236,25 @@ module ariane // IF // --------- if_stage if_stage_i ( - .flush_i ( flush_ctrl_if ), - .pc_if_valid_i ( pc_valid_pcgen_if ), - .if_busy_o ( if_ready_if_pcgen ), - .id_ready_i ( ready_id_if ), - .is_branch_i ( is_branch_pcgen_if ), - .fetch_addr_i ( pc_pcgen_if ), - .instr_req_o ( fetch_req_if_ex ), - .instr_addr_o ( fetch_vaddr_if_ex ), - .instr_gnt_i ( fetch_gnt_ex_if ), - .instr_rvalid_i ( fetch_valid_ex_if ), - .instr_rdata_i ( fetch_rdata_ex_if ), + .flush_i ( flush_ctrl_if ), + .if_busy_o ( if_ready_if_pcgen ), + .id_ready_i ( ready_id_if ), + .fetch_address_i ( fetch_address_pcgen_if ), + .fetch_valid_i ( fetch_valid_pcgen_if ), + .branch_predict_i ( branch_predict_pcgen_if ), + .instr_req_o ( fetch_req_if_ex ), + .instr_addr_o ( fetch_vaddr_if_ex ), + .instr_gnt_i ( fetch_gnt_ex_if ), + .instr_rvalid_i ( fetch_valid_ex_if ), + .instr_rdata_i ( fetch_rdata_ex_if ), - .pc_id_o ( pc_id_if_id ), - .instr_valid_id_o ( instr_valid_if_id ), - .instr_ack_i ( decode_ack_id_if ), - .instr_rdata_id_o ( instr_rdata_if_id ), - .instr_is_compressed_o ( instr_is_compressed_if_id ), - .branch_predict_o ( branch_predict_if_id ), - .ex_o ( exception_if_id ), + .pc_o ( pc_if_id ), + .instr_valid_o ( instr_valid_if_id ), + .instr_ack_i ( decode_ack_id_if ), + .instr_rdata_o ( instr_rdata_if_id ), + .instr_is_compressed_o ( instr_is_compressed_if_id ), + .branch_predict_o ( branch_predict_if_id ), + .ex_o ( exception_if_id ), .* ); // --------- @@ -274,7 +274,7 @@ module ariane .instr_is_compressed_i ( instr_is_compressed_if_id ), .instruction_valid_i ( instr_valid_if_id ), .decoded_instr_ack_o ( decode_ack_id_if ), - .pc_if_i ( pc_id_if_id ), // PC from if + .pc_if_i ( pc_if_id ), // PC from if .ex_if_i ( exception_if_id ), // exception from if .ready_o ( ready_id_if ), // Functional Units diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 46b3b90f1..465ffb6ca 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -92,8 +92,8 @@ module branch_engine ( // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address[0] == 1'b0) begin // TODO in case of branch which is not taken it is not necessary to check for the address - if ( target_address != branch_predict_i.predict_address_i // we mis-predicted the address of the branch - || branch_predict_i.predict_taken_i != comparison_result // we mis-predicted the outcome of the branch + if ( target_address != branch_predict_i.predict_address // we mis-predicted the address of the branch + || branch_predict_i.predict_taken != comparison_result // we mis-predicted the outcome of the branch || branch_predict_i.valid == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one ) begin resolved_branch_o.is_mispredict = 1'b1; diff --git a/src/btb.sv b/src/btb.sv index ce04f5ba0..2ece9e2c1 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -23,16 +23,14 @@ module btb #( parameter int BITS_SATURATION_COUNTER = 2 ) ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic flush_i, // flush the btb + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the btb - input logic [63:0] vpc_i, // virtual PC from IF stage - input branchpredict branchpredict_i, // a miss-predict happened -> update data structure + input logic [63:0] vpc_i, // virtual PC from IF stage + input branchpredict branch_predict_i, // a mis-predict happened -> update data structure - output logic is_branch_o, // instruction at vpc_i is a branch - output logic predict_taken_o, // the branch is taken - output logic [63:0] branch_target_address_o // instruction has the following target address + output branchpredict_sbe branch_predict_o // branch prediction for issuing to the pipeline ); // number of bits which are not used for indexing localparam OFFSET = 2; @@ -52,42 +50,43 @@ module btb #( // get actual index positions // we ignore the 0th bit since all instructions are aligned on // a half word boundary - assign update_pc = branchpredict_i.pc[$clog2(NR_ENTRIES) + OFFSET - 1:OFFSET]; + assign update_pc = branch_predict_i.pc[$clog2(NR_ENTRIES) + OFFSET - 1:OFFSET]; assign index = vpc_i[$clog2(NR_ENTRIES) + OFFSET - 1:OFFSET]; // we combinatorially predict the branch and the target address - assign is_branch_o = btb_q[index].valid; - assign predict_taken_o = btb_q[index].saturation_counter[BITS_SATURATION_COUNTER-1]; - assign branch_target_address_o = btb_q[index].target_address; + assign branch_predict_o.valid = btb_q[index].valid; + assign branch_predict_o.predict_taken = btb_q[index].saturation_counter[BITS_SATURATION_COUNTER-1]; + assign branch_predict_o.predict_address = btb_q[index].target_address; + assign branch_predict_o.is_lower_16 = btb_q[index].is_lower_16; // update on a mis-predict - always_comb begin : update_branchpredict + always_comb begin : update_branch_predict btb_n = btb_q; saturation_counter = btb_q[update_pc].saturation_counter; - if (branchpredict_i.valid) begin + if (branch_predict_i.valid) begin btb_n[update_pc].valid = 1'b1; // update saturation counter // first check if counter is already saturated in the positive regime e.g.: branch taken if (saturation_counter == {BITS_SATURATION_COUNTER{1'b1}}) begin // we can safely decrease it - if (~branchpredict_i.is_taken) + if (~branch_predict_i.is_taken) btb_n[update_pc].saturation_counter = saturation_counter - 1; // then check if it saturated in the negative regime e.g.: branch not taken end else if (saturation_counter == {BITS_SATURATION_COUNTER{1'b0}}) begin // we can safely increase it - if (branchpredict_i.is_taken) + if (branch_predict_i.is_taken) btb_n[update_pc].saturation_counter = saturation_counter + 1; end else begin // otherwise we are not in any boundaries and can decrease or increase it - if (branchpredict_i.is_taken) + if (branch_predict_i.is_taken) btb_n[update_pc].saturation_counter = saturation_counter + 1; else btb_n[update_pc].saturation_counter = saturation_counter - 1; end // the target address is simply updated - btb_n[update_pc].target_address = branchpredict_i.target_address; + btb_n[update_pc].target_address = branch_predict_i.target_address; // as is the information whether this was a compressed branch - btb_n[update_pc].is_lower_16 = branchpredict_i.is_lower_16; + btb_n[update_pc].is_lower_16 = branch_predict_i.is_lower_16; end end diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index ba39d1713..6377de336 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -145,7 +145,9 @@ module fetch_fifo // _____________________________________________ // | compressed 2 [31:16] | compressed 1[15:0] | // |____________________________________________ - if (in_rdata_q[17:16] != 2'b11) begin + // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction + // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction + if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; mem_n[write_pointer_q + 1].instruction = {16'b0, in_rdata_q[31:16]}; @@ -158,12 +160,15 @@ module fetch_fifo // |____________________________________________________ end else begin // we've got an unaligned 32 bit instruction - // save the lower 16 bit - unaligned_instr_n = in_rdata_q[31:16]; - // and that it was unaligned - unaligned_n = 1'b1; - // save the address as well - unaligned_address_n = {in_addr_q[63:2], 2'b10}; + // check if the previous instruction was no predicted taken branch + if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + end // this does not consume space in the FIFO end end else begin @@ -188,9 +193,11 @@ module fetch_fifo // whats up with the other upper 16 bit of this instruction // is the second instruction also compressed, like: // _____________________________________________ - // | compressed 2 [31:16] | compressed 1[15:0] | + // | compressed 2 [31:16] | unaligned[31:16] | // |____________________________________________ - if (in_rdata_q[17:16] != 2'b11) begin + // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction + // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction + if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; mem_n[write_pointer_q + 1].instruction = {16'b0, in_rdata_q[31:16]}; @@ -204,12 +211,15 @@ module fetch_fifo // |____________________________________________________ end else begin // we've got an unaligned 32 bit instruction - // save the lower 16 bit - unaligned_instr_n = in_rdata_q[31:16]; - // and that it was unaligned - unaligned_n = 1'b1; - // save the address as well - unaligned_address_n = {in_addr_q[63:2], 2'b10}; + // check if the previous instruction was no predicted taken branch + if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + end // this does not consume space in the FIFO end end diff --git a/src/if_stage.sv b/src/if_stage.sv index f9dde15d6..c5e99f773 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -31,13 +31,14 @@ import ariane_pkg::*; module if_stage ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low + // control signals input logic flush_i, output logic if_busy_o, // is the IF stage busy fetching instructions? input logic id_ready_i, - // ctrl flow instruction in - input logic [63:0] fetch_addr_i, - input logic pc_if_valid_i, - input logic is_branch_i, // the new PC was a branch e.g.: branch or jump + // fetch direction from PC Gen + input logic [63:0] fetch_address_i, + input logic fetch_valid_i, + input branchpredict_sbe branch_predict_i, // instruction cache interface output logic instr_req_o, output logic [63:0] instr_addr_o, @@ -45,13 +46,13 @@ module if_stage ( input logic instr_rvalid_i, input logic [31:0] instr_rdata_i, // Output of IF Pipeline stage - output logic [63:0] pc_id_o, - output logic instr_valid_id_o, // instruction in IF/ID pipeline is valid + output logic [63:0] pc_o, + output logic instr_valid_o, // instruction in IF/ID pipeline is valid input logic instr_ack_i, - output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding + output logic [31:0] instr_rdata_o, // read instruction is sampled and sent to ID stage for decoding output logic instr_is_compressed_o, output exception ex_o, - output branchpredict_sbe branch_predict_o // branchpredict out + output branchpredict_sbe branch_predict_o // branch prediction out ); // output logic illegal_compressed_instr_o -> in exception logic fetch_valid; @@ -61,18 +62,16 @@ module if_stage ( logic [63:0] addr_o; logic illegal_compressed_instr; logic prefetch_busy; + branchpredict_sbe branch_predict; // --------------------- // IF <-> ID Registers // --------------------- - logic [63:0] pc_id_n, pc_id_q; - logic instr_valid_id_n, instr_valid_id_q; - logic [31:0] instr_rdata_id_n, instr_rdata_id_q; - logic instr_is_compressed_n, instr_is_compressed_q; - + logic [63:0] pc_n, pc_q; + logic instr_valid_n, instr_valid_q; + logic [31:0] instr_rdata_n, instr_rdata_q; + logic instr_is_compressed_n, instr_is_compressed_q; // branch predict registers - logic branch_valid_n, branch_valid_q; - logic [63:0] predict_address_n, predict_address_q; - logic predict_taken_n, predict_taken_q; + logic branch_predict_n, branch_predict_q; // compressed instruction decoding, or more precisely compressed instruction expander // since it does not matter where we decompress instructions, we do it here to ease timing closure @@ -85,45 +84,37 @@ module if_stage ( // Pre-fetch buffer, caches a fixed number of instructions prefetch_buffer prefetch_buffer_i ( - .clk ( clk_i ), - .rst_n ( rst_ni ), - .flush_i ( flush_i ), - - .fetch_addr_i ( {fetch_addr_i[63:1], 1'b0} ), - .fetch_valid_i ( pc_if_valid_i ), .ready_i ( instr_ack_i ), .valid_o ( fetch_valid ), .rdata_o ( instr_rdata ), .addr_o ( addr_o ), - + .branch_predict_o ( branch_predict ), // goes to instruction memory / instruction cache - .instr_req_o ( instr_req_o ), - .instr_addr_o ( instr_addr_o ), - .instr_gnt_i ( instr_gnt_i ), - .instr_rvalid_i ( instr_rvalid_i ), - .instr_rdata_i ( instr_rdata_i ), // Prefetch Buffer Status - .busy_o ( prefetch_busy ) + .busy_o ( prefetch_busy ), + .* ); - assign if_busy_o = prefetch_busy; + assign if_busy_o = prefetch_busy; - assign pc_id_o = pc_id_q; - assign instr_valid_id_o = instr_valid_id_q; - assign instr_rdata_id_o = instr_rdata_id_q; + assign pc_o = pc_q; + assign instr_valid_o = instr_valid_q; + assign instr_rdata_o = instr_rdata_q; assign instr_is_compressed_o = instr_is_compressed_q; + assign branch_predict_o = branch_predict_q; // Pipeline registers always_comb begin - // Instruction is valid - pc_id_n = addr_o; - instr_valid_id_n = fetch_valid; - instr_rdata_id_n = decompressed_instruction; - instr_is_compressed_n = instr_is_compressed; + // Instruction is valid, latch new data + pc_n = addr_o; + instr_valid_n = fetch_valid; + instr_rdata_n = decompressed_instruction; + instr_is_compressed_n = instr_is_compressed; + branch_predict_n = branch_predict; if (flush_i) begin - instr_valid_id_n = 1'b0; + instr_valid_n = 1'b0; end // exception forwarding in here end @@ -133,31 +124,23 @@ module if_stage ( // -------------------------------------------------------------- always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS if (~rst_ni) begin - ex_o <= '{default: 0}; - branch_valid_q <= 1'b0; - predict_address_q <= 64'b0; - predict_taken_q <= 1'b0; - pc_id_q <= 64'b0; - instr_valid_id_q <= 1'b0; - instr_rdata_id_q <= 32'b0; - instr_is_compressed_q <= 1'b0; + ex_o <= '{default: 0}; + branch_predict_q <= '{default: 0}; + pc_q <= 64'b0; + instr_valid_q <= 1'b0; + instr_rdata_q <= 32'b0; + instr_is_compressed_q <= 1'b0; end else begin - pc_id_q <= pc_id_n; - instr_valid_id_q <= instr_valid_id_n; - instr_rdata_id_q <= instr_rdata_id_n; - instr_is_compressed_q <= instr_is_compressed_n; - - predict_address_q <= predict_address_n; - predict_taken_q <= predict_taken_n; - branch_valid_q <= branch_valid_n; - - ex_o.cause <= 64'b0; // TODO: Output exception - ex_o.tval <= 64'b0; // TODO: Output exception - ex_o.valid <= 1'b0; //illegal_compressed_instr; // TODO: Output exception + pc_q <= pc_n; + instr_valid_q <= instr_valid_n; + instr_rdata_q <= instr_rdata_n; + instr_is_compressed_q <= instr_is_compressed_n; + branch_predict_q <= branch_predict_n; + ex_o.cause <= 64'b0; // TODO: Output exception + ex_o.tval <= 64'b0; // TODO: Output exception + ex_o.valid <= 1'b0; //illegal_compressed_instr; // TODO: Output exception end end - - assign branch_predict_o = {predict_address_q, predict_taken_q, branch_valid_q}; //------------- // Assertions //------------- diff --git a/src/pcgen.sv b/src/pcgen.sv index 0cb51ce70..f969b82b8 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -20,34 +20,29 @@ import ariane_pkg::*; module pcgen ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - - input logic fetch_enable_i, - input logic flush_i, - input logic if_ready_i, - input branchpredict resolved_branch_i, // from controller signaling a branchpredict -> update BTB + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // control signals + input logic fetch_enable_i, + input logic flush_i, + input logic if_ready_i, + input branchpredict resolved_branch_i, // from controller signaling a branch_predict -> update BTB // to IF - output logic [63:0] pc_if_o, // new PC - output logic pc_if_valid_o, // the PC is valid - output logic is_branch_o, + output logic [63:0] fetch_address_o, // new PC (address because we do not distinguish instructions) + output logic fetch_valid_o, // the PC (address) is valid + output branchpredict_sbe branch_predict_o, // pass on the information if this is speculative // global input - input logic [63:0] boot_addr_i, + input logic [63:0] boot_addr_i, // CSR input - input logic [63:0] epc_i, // return from exception - input logic [63:0] trap_vector_base_i, // base of trap vector - input exception ex_i // exception in - from commit + input logic [63:0] epc_i, // return from exception + input logic [63:0] trap_vector_base_i, // base of trap vector + input exception ex_i // exception in - from commit ); - logic [63:0] branch_target_address; - logic predict_taken; - logic [63:0] npc_n, npc_q; - logic is_branch; - logic is_branch_n, is_branch_q; + logic [63:0] npc_n, npc_q; + branchpredict_sbe branch_predict_btb; - - assign pc_if_o = npc_q; - assign is_branch_o = is_branch_q; + assign pc_if_o = npc_q; btb #( .NR_ENTRIES(64), @@ -55,40 +50,36 @@ module pcgen ( ) btb_i ( - // Use the PC from last cycle to perform branch lookup + // Use the PC from last cycle to perform branch lookup for the current cycle .vpc_i ( npc_q ), - .branchpredict_i ( resolved_branch_i ), - .is_branch_o ( is_branch ), - .predict_taken_o ( predict_taken ), - .branch_target_address_o ( branch_target_address ), + .branch_predict_i ( resolved_branch_i ), // update port + .branch_predict_o ( branch_predict_btb ), // read port .* ); // ------------------- // Next PC // ------------------- - // next PC (npc) can come from: + // next PC (NPC) can come from: // 1. Exception // 2. Return from exception // 3. Predicted branch // 4. Debug // 5. Boot address always_comb begin : npc_select - // default assignment + branch_predict_o = branch_predict_btb; + fetch_valid_o = 1'b1; + + // 0. Default assignment // default is a consecutive PC if (if_ready_i && fetch_enable_i) npc_n = {npc_q[62:2], 2'b0} + 64'h4; else // or keep the PC stable if IF is not ready npc_n = npc_q; - - pc_if_valid_o = 1'b0; - is_branch_n = is_branch; - // 4. Predict taken - if (is_branch && predict_taken) begin - npc_n = branch_target_address; + if (branch_predict_btb.valid && branch_predict_btb.predict_taken) begin + npc_n = branch_predict_btb.predict_address; end // 1.Debug - // 3. Control flow change request if (resolved_branch_i.is_mispredict) begin // we already got the correct target address @@ -96,28 +87,26 @@ module pcgen ( end // 2. Exception if (ex_i.valid) begin - npc_n = trap_vector_base_i; - is_branch_n = 1'b0; + npc_n = trap_vector_base_i; + branch_predict_o.valid = 1'b0; end // 3. Return from exception // fetch enable - if (fetch_enable_i) begin - pc_if_valid_o = 1'b1; + if (!fetch_enable_i) begin + fetch_valid_o = 1'b0; end end // ------------------- // Sequential Process // ------------------- - // PCGEN -> IF Register + // PCGEN -> IF Pipeline Stage always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin npc_q <= boot_addr_i; - is_branch_q <= 1'b0; end else begin npc_q <= npc_n; - is_branch_q <= is_branch_n; end end diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index ee8bd7a16..4af831a73 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -21,27 +21,31 @@ // long critical paths to the instruction cache // // // //////////////////////////////////////////////////////////////////////////////// +import ariane_pkg::*; module prefetch_buffer ( - input logic clk, - input logic rst_n, - input logic flush_i, + input logic clk_i, + input logic rst_ni, + input logic flush_i, - input logic [63:0] fetch_addr_i, - input logic fetch_valid_i, - - input logic ready_i, - output logic valid_o, - output logic [63:0] addr_o, - output logic [31:0] rdata_o, + // input side + input logic [63:0] fetch_address_i, + input logic fetch_valid_i, + input branchpredict_sbe branch_predict_i, + // output side + input logic ready_i, + output logic valid_o, + output logic [63:0] addr_o, + output logic [31:0] rdata_o, + output branchpredict_sbe branch_predict_o, // goes to instruction memory / instruction cache - output logic instr_req_o, - input logic instr_gnt_i, - output logic [63:0] instr_addr_o, - input logic [31:0] instr_rdata_i, - input logic instr_rvalid_i, + output logic instr_req_o, + input logic instr_gnt_i, + output logic [63:0] instr_addr_o, + input logic [31:0] instr_rdata_i, + input logic instr_rvalid_i, // Prefetch Buffer Status output logic busy_o @@ -49,12 +53,12 @@ module prefetch_buffer enum logic [1:0] {IDLE, WAIT_GNT, WAIT_RVALID, WAIT_ABORTED } CS, NS; - logic addr_valid; - logic [63:0] instr_addr_q; - logic fifo_valid; - logic fifo_ready; - logic fifo_clear; - + logic addr_valid; + logic [63:0] instr_addr_q; + logic fifo_valid; + logic fifo_ready; + logic fifo_clear; + branchpredict_sbe branchpredict_q; //--------------------------------- // Prefetch buffer status //--------------------------------- @@ -67,16 +71,18 @@ module prefetch_buffer // consumes addresses and rdata //--------------------------------- fetch_fifo fifo_i ( - .clk_i ( clk ), - .rst_ni ( rst_n ), + .clk_i ( clk_i ), + .rst_ni ( rst_n_i ), .clear_i ( flush_i ), + .branch_predict_i ( branchpredict_q ), .in_addr_i ( instr_addr_q ), .in_rdata_i ( instr_rdata_i ), .in_valid_i ( fifo_valid ), .in_ready_o ( fifo_ready ), + .branch_predict_o ( branch_predict_o ), .out_valid_o ( valid_o ), .out_ready_i ( ready_i ), .out_rdata_o ( rdata_o ), @@ -91,14 +97,14 @@ module prefetch_buffer always_comb begin instr_req_o = 1'b0; - instr_addr_o = fetch_addr_i; + instr_addr_o = fetch_address_i; fifo_valid = 1'b0; NS = CS; unique case(CS) // default state, not waiting for requested data IDLE: begin - instr_addr_o = fetch_addr_i; + instr_addr_o = fetch_address_i; instr_req_o = 1'b0; if (fifo_ready && fetch_valid_i) begin @@ -127,7 +133,7 @@ module prefetch_buffer // we wait for rvalid, after that we are ready to serve a new request WAIT_RVALID: begin - instr_addr_o = fetch_addr_i; + instr_addr_o = fetch_address_i; if (fifo_ready) begin // prepare for next request @@ -189,19 +195,17 @@ module prefetch_buffer // Registers //------------- - always_ff @(posedge clk, negedge rst_n) + always_ff @(posedge clk_i, negedge rst_n_i) begin - if(rst_n == 1'b0) - begin + if (~rst_ni) begin CS <= IDLE; instr_addr_q <= '0; - end - else - begin + branchpredict_q <= '{default: 0}; + end else begin CS <= NS; - if (addr_valid) begin instr_addr_q <= instr_addr_o; + branchpredict_q <= branch_predict_i; end end end From 851faa78515548a15eb2e05947ee76a00831e3ed Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 May 2017 20:11:39 +0200 Subject: [PATCH 31/43] :white_check_mark: Add fetch fifo testbench stub --- Makefile | 2 +- src/branch_engine.sv | 38 +++---- src/btb.sv | 2 +- src/fetch_fifo.sv | 127 ++++++++++++----------- src/if_stage.sv | 14 +-- src/pcgen.sv | 4 +- src/prefetch_buffer.sv | 48 +++++---- tb/agents/fetch_fifo_if/fetch_fifo_if.sv | 53 ++++++++++ tb/fetch_fifo_tb.sv | 75 +++++++++++++ tb/test/fetch_fifo/fetch_fifo_pkg.sv | 23 ++++ 10 files changed, 280 insertions(+), 106 deletions(-) create mode 100755 tb/agents/fetch_fifo_if/fetch_fifo_if.sv create mode 100755 tb/fetch_fifo_tb.sv create mode 100755 tb/test/fetch_fifo/fetch_fifo_pkg.sv diff --git a/Makefile b/Makefile index 91b0f148a..e8d4e763b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ library = work top_level = core_tb test_top_level = core_tb # test targets -tests = alu scoreboard fifo mem_arbiter store_queue lsu core +tests = alu scoreboard fifo mem_arbiter store_queue lsu core fetch_fifo # UVM agents agents = include/ariane_pkg.svh $(wildcard tb/agents/*/*.sv) # path to interfaces diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 465ffb6ca..a896b2b66 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -66,29 +66,31 @@ module branch_engine ( resolved_branch_o.is_taken = 1'b0; resolved_branch_o.valid = valid_i; resolved_branch_o.is_mispredict = 1'b0; + resolved_branch_o.is_lower_16 = 1'b0; // calculate next PC, depending on whether the instruction is compressed or not this may be different next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); // calculate target address simple 64 bit addition target_address = $unsigned($signed(operand_c_i) + $signed(imm_i)); - // save PC - we need this to get the target row in the branch target buffer - // we play this trick with the branch instruction which wraps a byte boundary: - // |---------- Place the prediction on this PC - // \/ - // ____________________________________________________ - // |branch [15:0] | branch[31:16] | compressed 1[15:0] | - // |____________________________________________________ - // This will relief the prefetcher to re-fetch partially fetched unaligned branch instructions e.g.: - // we don't have a back arch between prefetcher and decoder/instruction FIFO. - resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4); - // save if the branch instruction was in the lower 16 bit of the instruction word - // the first case is a compressed instruction which is in slot 0 - // the other case is a misaligned uncompressed instruction which we only predict in the next cycle (see notes above) - resolved_branch_o.is_lower_16 = (is_compressed_instr_i && pc_i[1] == 1'b0) || (!is_compressed_instr_i && pc_i[1] == 1'b1); - // write target address which goes to pc gen - resolved_branch_o.target_address = (comparison_result) ? target_address : next_pc; - resolved_branch_o.is_taken = comparison_result; - // we've detected a branch in ID with the following parameters + if (valid_i) begin + // save PC - we need this to get the target row in the branch target buffer + // we play this trick with the branch instruction which wraps a byte boundary: + // |---------- Place the prediction on this PC + // \/ + // ____________________________________________________ + // |branch [15:0] | branch[31:16] | compressed 1[15:0] | + // |____________________________________________________ + // This will relief the prefetcher to re-fetch partially fetched unaligned branch instructions e.g.: + // we don't have a back arch between prefetcher and decoder/instruction FIFO. + resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4); + // save if the branch instruction was in the lower 16 bit of the instruction word + // the first case is a compressed instruction which is in slot 0 + // the other case is a misaligned uncompressed instruction which we only predict in the next cycle (see notes above) + resolved_branch_o.is_lower_16 = (is_compressed_instr_i && pc_i[1] == 1'b0) || (!is_compressed_instr_i && pc_i[1] == 1'b1); + // write target address which goes to pc gen + resolved_branch_o.target_address = (comparison_result) ? target_address : next_pc; + resolved_branch_o.is_taken = comparison_result; + // we've detected a branch in ID with the following parameters // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address[0] == 1'b0) begin // TODO in case of branch which is not taken it is not necessary to check for the address diff --git a/src/btb.sv b/src/btb.sv index 2ece9e2c1..75364a8a2 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -19,7 +19,7 @@ import ariane_pkg::*; module btb #( - parameter int NR_ENTRIES = 64, + parameter int NR_ENTRIES = 1024, parameter int BITS_SATURATION_COUNTER = 2 ) ( diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 6377de336..884e5c7d8 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -1,36 +1,30 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2017 ETH Zurich, University of Bologna // -// All rights reserved. // -// // -// This code is under development and not yet released to the public. // -// Until it is released, the code is under the copyright of ETH Zurich // -// and the University of Bologna, and may contain unpublished work. // -// Any reuse/redistribution should only be under explicit permission. // -// // -// Bug fixes and contributions will eventually be released under the // -// SolderPad open hardware license and under the copyright of ETH Zurich // -// and the University of Bologna. // -// // -// Engineer: Andreas Traber - atraber@iis.ee.ethz.ch // -// // -// Design Name: Fetch Fifo for 32 bit memory interface // -// Project Name: zero-riscy // -// Language: SystemVerilog // -// // -// Description: Fetch fifo // -//////////////////////////////////////////////////////////////////////////////// +// Author: Florian Zaruba, ETH Zurich +// Date: 14.05.2017 +// Description: Dual Port fetch FIFO with instruction aligner and support for compressed instructions +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// import ariane_pkg::*; -// input port: send address one cycle before the data -// clear_i clears the FIFO for the following cycle. module fetch_fifo ( input logic clk_i, input logic rst_ni, // control signals - input logic clear_i, // clears the contents of the fifo - // input port + input logic flush_i, // clears the contents of the FIFO -> quasi reset // branch prediction at in_addr_i address, as this is an address and not PC it can be the case // that we have two compressed instruction (or one compressed instruction and one unaligned instruction) so we need // keep two prediction inputs: [c1|c0] <- prediction for c1 and c0 @@ -68,7 +62,7 @@ module fetch_fifo logic [$clog2(DEPTH)-1:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool // status signals - logic full, empty, one_left; + logic full, empty; // the last instruction was unaligned logic unaligned_n, unaligned_q; // save the unaligned part of the instruction to this ff @@ -79,8 +73,7 @@ module fetch_fifo // we always need two empty places // as it could happen that we get two compressed instructions/cycle /* verilator lint_off WIDTH */ - assign full = (status_cnt_q > DEPTH - 2); - assign one_left = (status_cnt_q == DEPTH - 1); // two spaces are left + assign full = (status_cnt_q >= DEPTH - 2); assign empty = (status_cnt_q == 0); /* verilator lint_on WIDTH */ // the output is valid if we are either empty or just got a valid @@ -106,7 +99,7 @@ module fetch_fifo branch_predict_n = branch_predict_i; end // flush the input registers - if (clear_i) begin + if (flush_i) begin in_valid_n = 1'b0; end end @@ -251,17 +244,17 @@ module fetch_fifo out_rdata_o = {in_rdata_q[15:0], unaligned_instr_q}; end // there is currently no valid instruction in the pipeline register push this instruction - if (out_ready_i || !pipelein_register_valid_q) begin - pipelein_register_valid_n = 1'b1; - read_pointer_n = read_pointer_q + 1; - status_cnt--; - end + // if (out_ready_i) begin + // pipelein_register_valid_n = 1'b1; + // read_pointer_n = read_pointer_q + 1; + // status_cnt--; + // end // regular read but do not issue if we are already empty // this can happen since we have an output latch in the IF stage and the ID stage will only know a cycle // later that we do not have any valid instructions anymore end - if (out_ready_i && !empty) begin + if (out_ready_i) begin read_pointer_n = read_pointer_q + 1; status_cnt--; end @@ -273,39 +266,55 @@ module fetch_fifo write_pointer_n = write_pointer; status_cnt_n = status_cnt; - if (clear_i) - status_cnt_n = '0; + if (flush_i) begin + status_cnt_n = '0; + write_pointer_n = 'b0; + read_pointer_n = 'b0; + end end always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - status_cnt_q <= '{default: 0}; - mem_q <= '{default: 0}; - read_pointer_q <= '{default: 0}; - write_pointer_q <= '{default: 0}; - unaligned_q <= 1'b0; - unaligned_instr_q <= 16'b0; - unaligned_address_q <= 64'b0; + status_cnt_q <= '{default: 0}; + mem_q <= '{default: 0}; + read_pointer_q <= '{default: 0}; + write_pointer_q <= '{default: 0}; + unaligned_q <= 1'b0; + unaligned_instr_q <= 16'b0; + unaligned_address_q <= 64'b0; // input registers - in_addr_q <= 64'b0; - in_rdata_q <= 32'b0; - in_valid_q <= 1'b0; - branch_predict_q <= '{default: 0}; + in_addr_q <= 64'b0; + in_rdata_q <= 32'b0; + in_valid_q <= 1'b0; + branch_predict_q <= '{default: 0}; pipelein_register_valid_q <= 1'b0; end else begin - status_cnt_q <= status_cnt_n; - mem_q <= mem_n; - read_pointer_q <= read_pointer_n; - write_pointer_q <= write_pointer_n; - unaligned_q <= unaligned_n; - unaligned_instr_q <= unaligned_instr_n; - unaligned_address_q <= unaligned_address_n; + status_cnt_q <= status_cnt_n; + mem_q <= mem_n; + read_pointer_q <= read_pointer_n; + write_pointer_q <= write_pointer_n; + unaligned_q <= unaligned_n; + unaligned_instr_q <= unaligned_instr_n; + unaligned_address_q <= unaligned_address_n; // input registers - in_addr_q <= in_addr_n; - in_rdata_q <= in_rdata_n; - in_valid_q <= in_valid_n; - branch_predict_q <= branch_predict_n; + in_addr_q <= in_addr_n; + in_rdata_q <= in_rdata_n; + in_valid_q <= in_valid_n; + branch_predict_q <= branch_predict_n; pipelein_register_valid_q <= pipelein_register_valid_n; end end + + //------------- + // Assertions + //------------- + `ifndef SYNTHESIS + `ifndef VERILATOR + // since this is a dual port queue the status count of the queue should never change more than two + assert property (@(posedge clk_i) ((status_cnt_n - status_cnt_q) < 3 || (status_cnt_n - status_cnt_q) > 3)) else $error("FIFO underflowed or overflowed"); + // assert property ( + // @(posedge clk_i) (instr_gnt_i) |-> (instr_req_o) ) + // else $warning("There was a grant without a request"); + `endif + `endif endmodule \ No newline at end of file diff --git a/src/if_stage.sv b/src/if_stage.sv index c5e99f773..7b97f7b37 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -66,12 +66,12 @@ module if_stage ( // --------------------- // IF <-> ID Registers // --------------------- - logic [63:0] pc_n, pc_q; - logic instr_valid_n, instr_valid_q; - logic [31:0] instr_rdata_n, instr_rdata_q; - logic instr_is_compressed_n, instr_is_compressed_q; + logic [63:0] pc_n, pc_q; + logic instr_valid_n, instr_valid_q; + logic [31:0] instr_rdata_n, instr_rdata_q; + logic instr_is_compressed_n, instr_is_compressed_q; // branch predict registers - logic branch_predict_n, branch_predict_q; + branchpredict_sbe branch_predict_n, branch_predict_q; // compressed instruction decoding, or more precisely compressed instruction expander // since it does not matter where we decompress instructions, we do it here to ease timing closure @@ -116,7 +116,7 @@ module if_stage ( if (flush_i) begin instr_valid_n = 1'b0; end - // exception forwarding in here + // TODO: exception forwarding in here end // -------------------------------------------------------------- @@ -125,7 +125,7 @@ module if_stage ( always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS if (~rst_ni) begin ex_o <= '{default: 0}; - branch_predict_q <= '{default: 0}; + branch_predict_q <= '0; pc_q <= 64'b0; instr_valid_q <= 1'b0; instr_rdata_q <= 32'b0; diff --git a/src/pcgen.sv b/src/pcgen.sv index f969b82b8..d20d8c4a2 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -42,10 +42,10 @@ module pcgen ( logic [63:0] npc_n, npc_q; branchpredict_sbe branch_predict_btb; - assign pc_if_o = npc_q; + assign fetch_address_o = npc_q; btb #( - .NR_ENTRIES(64), + .NR_ENTRIES(1024), .BITS_SATURATION_COUNTER(2) ) btb_i diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index 4af831a73..e1969f468 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -64,18 +64,13 @@ module prefetch_buffer //--------------------------------- // we are busy if we are either waiting for a grant // or if the fifo is full - assign busy_o = (CS inside {WAIT_GNT, WAIT_ABORTED}) || !fifo_ready; + assign busy_o = (CS inside {WAIT_GNT, WAIT_ABORTED} && !instr_req_o) || !fifo_ready; //--------------------------------- // Fetch FIFO // consumes addresses and rdata //--------------------------------- fetch_fifo fifo_i ( - .clk_i ( clk_i ), - .rst_ni ( rst_n_i ), - - .clear_i ( flush_i ), - .branch_predict_i ( branchpredict_q ), .in_addr_i ( instr_addr_q ), .in_rdata_i ( instr_rdata_i ), @@ -86,7 +81,8 @@ module prefetch_buffer .out_valid_o ( valid_o ), .out_ready_i ( ready_i ), .out_rdata_o ( rdata_o ), - .out_addr_o ( addr_o ) + .out_addr_o ( addr_o ), + .* ); //-------------------------------------------------- @@ -113,7 +109,11 @@ module prefetch_buffer if(instr_gnt_i) //~> granted request - NS = WAIT_RVALID; + // we have one outstanding rvalid: wait for it + if (flush_i) + NS = WAIT_ABORTED; + else + NS = WAIT_RVALID; else begin //~> got a request but no grant NS = WAIT_GNT; end @@ -126,7 +126,11 @@ module prefetch_buffer instr_req_o = 1'b1; if(instr_gnt_i) - NS = WAIT_RVALID; + // we have one outstanding rvalid: wait for it + if (flush_i) + NS = WAIT_ABORTED; + else + NS = WAIT_RVALID; else NS = WAIT_GNT; end // case: WAIT_GNT @@ -137,15 +141,18 @@ module prefetch_buffer if (fifo_ready) begin // prepare for next request - if (fifo_ready && fetch_valid_i) begin instr_req_o = 1'b1; + // if we are receiving a data item during a flush ignore it fifo_valid = 1'b1; addr_valid = 1'b1; - if (instr_gnt_i) begin - NS = WAIT_RVALID; + // we have one outstanding rvalid: wait for it + if (flush_i) + NS = WAIT_ABORTED; + else + NS = WAIT_RVALID; end else begin NS = WAIT_GNT; end @@ -153,12 +160,13 @@ module prefetch_buffer // we are requested to abort our current request // we didn't get an rvalid yet, so wait for it if (flush_i) begin - NS = WAIT_ABORTED; + NS = WAIT_ABORTED; end end end else begin // just wait for rvalid and go back to IDLE, no new request if (instr_rvalid_i) begin + // if we are receiving a data item during a flush ignore it fifo_valid = 1'b1; NS = IDLE; end @@ -169,33 +177,37 @@ module prefetch_buffer // there was no new request sent yet // we assume that req_i is set to high WAIT_ABORTED: begin - instr_addr_o = instr_addr_q; + instr_addr_o = fetch_address_i; if (instr_rvalid_i) begin instr_req_o = 1'b1; // no need to send address, already done in WAIT_RVALID if (instr_gnt_i) begin - NS = WAIT_RVALID; + // we have one outstanding rvalid + if (flush_i) + NS = WAIT_ABORTED; + else + NS = WAIT_RVALID; end else begin NS = WAIT_GNT; end end end - default: - begin + default: begin NS = IDLE; instr_req_o = 1'b0; end endcase + end //------------- // Registers //------------- - always_ff @(posedge clk_i, negedge rst_n_i) + always_ff @(posedge clk_i, negedge rst_ni) begin if (~rst_ni) begin CS <= IDLE; diff --git a/tb/agents/fetch_fifo_if/fetch_fifo_if.sv b/tb/agents/fetch_fifo_if/fetch_fifo_if.sv new file mode 100755 index 000000000..97c1d6a67 --- /dev/null +++ b/tb/agents/fetch_fifo_if/fetch_fifo_if.sv @@ -0,0 +1,53 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 14.5.2017 +// Description: Fetch FIFO interface +// +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +`ifndef FETCH_FIFO_IF_SV +`define FETCH_FIFO_IF_SV +import ariane_pkg::*; + +interface fetch_fifo_if #( + parameter type dtype = logic[7:0] + )( + input clk + ); + + wire flush; + wire [$bits(branchpredict_sbe)-1:0] in_branch_predict; + wire [63:0] in_addr; + wire [31:0] in_rdata; + wire in_valid; + wire in_ready; + wire [$bits(branchpredict_sbe)-1:0] out_branch_predict; + wire [63:0] out_addr; + wire [31:0] out_rdata; + wire out_valid; + wire out_ready; + + clocking mck @(posedge clk); + input in_ready, out_branch_predict, out_addr, out_rdata, out_valid; + output flush, in_branch_predict, in_addr, in_rdata, in_valid, out_ready; + endclocking + + clocking pck @(posedge clk); + input in_ready, out_branch_predict, out_addr, out_rdata, out_valid, + flush, in_branch_predict, in_addr, in_rdata, in_valid, out_ready; + endclocking + +endinterface +`endif \ No newline at end of file diff --git a/tb/fetch_fifo_tb.sv b/tb/fetch_fifo_tb.sv new file mode 100755 index 000000000..5eef448fe --- /dev/null +++ b/tb/fetch_fifo_tb.sv @@ -0,0 +1,75 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 14.5.2017 +// Description: Fetch FIFO testbench +// +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// + +module fetch_fifo_tb; + + logic rst_ni, clk_i; + fetch_fifo_if fetch_fifo_if (clk); + + fetch_fifo + dut ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( fetch_fifo_if.flush ), + .branch_predict_i ( fetch_fifo_if.in_branch_predict ), + .in_addr_i ( fetch_fifo_if.in_addr ), + .in_rdata_i ( fetch_fifo_if.in_rdata ), + .in_valid_i ( fetch_fifo_if.in_valid ), + .in_ready_o ( fetch_fifo_if.in_ready ), + .branch_predict_o ( fetch_fifo_if.out_branch_predict ), + .out_addr_o ( fetch_fifo_if.out_addr ), + .out_rdata_o ( fetch_fifo_if.out_rdata ), + .out_valid_o ( fetch_fifo_if.out_valid ), + .out_ready_i ( fetch_fifo_if.out_ready ) + ); + + initial begin + clk_i = 1'b0; + rst_ni = 1'b0; + repeat(8) + #10ns clk_i = ~clk_i; + + rst_ni = 1'b1; + forever + #10ns clk_i = ~clk_i; + end + + // simulator stopper, this is suboptimal better go for coverage + initial begin + #10000000ns + $stop; + end + + program testbench (fetch_fifo_if fetch_fifo_if); + + initial begin + fetch_fifo_if.mck.flush <= 1'b0; + fetch_fifo_if.mck.in_branch_predict <= 'b0; + fetch_fifo_if.mck.in_addr <= 'b0; + fetch_fifo_if.mck.in_rdata <= 'b0; + fetch_fifo_if.mck.in_valid <= 'b0; + fetch_fifo_if.mck.out_ready <= 'b0; + + end + + endprogram + + testbench tb(fetch_fifo_if); +endmodule \ No newline at end of file diff --git a/tb/test/fetch_fifo/fetch_fifo_pkg.sv b/tb/test/fetch_fifo/fetch_fifo_pkg.sv new file mode 100755 index 000000000..d08e8bb3e --- /dev/null +++ b/tb/test/fetch_fifo/fetch_fifo_pkg.sv @@ -0,0 +1,23 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 14.5.2017 +// Description: Fetch FIFO Pkg +// +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// + +package fetch_fifo_pkg; + +endpackage \ No newline at end of file From a6c81e7cab624a5f027564e324a006751afdd8a5 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 May 2017 21:12:18 +0200 Subject: [PATCH 32/43] Add golden model and instruction generator --- Makefile | 2 +- tb/fetch_fifo_tb.sv | 10 +++ tb/test/fetch_fifo/fetch_fifo_model.svh | 69 ++++++++++++++++ tb/test/fetch_fifo/fetch_fifo_pkg.sv | 3 +- tb/test/fetch_fifo/instruction_stream.svh | 97 +++++++++++++++++++++++ 5 files changed, 179 insertions(+), 2 deletions(-) create mode 100755 tb/test/fetch_fifo/fetch_fifo_model.svh create mode 100755 tb/test/fetch_fifo/instruction_stream.svh diff --git a/Makefile b/Makefile index e8d4e763b..b96d84c3e 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ envs = $(wildcard tb/env/*/*.sv) # UVM Sequences sequences = $(wildcard tb/sequences/*/*.sv) # Test packages -test_pkg = $(wildcard tb/test/*/*sequence_pkg.sv) $(wildcard tb/test/*/*lib_pkg.sv) +test_pkg = $(wildcard tb/test/*/*_pkg.sv) # this list contains the standalone components src = $(wildcard src/util/*.sv) $(wildcard src/*.sv) diff --git a/tb/fetch_fifo_tb.sv b/tb/fetch_fifo_tb.sv index 5eef448fe..017b30240 100755 --- a/tb/fetch_fifo_tb.sv +++ b/tb/fetch_fifo_tb.sv @@ -18,6 +18,9 @@ // University of Bologna. // +import ariane_pkg::*; +import fetch_fifo_pkg::*; + module fetch_fifo_tb; logic rst_ni, clk_i; @@ -59,6 +62,9 @@ module fetch_fifo_tb; program testbench (fetch_fifo_if fetch_fifo_if); + instruction_stream is = new; + fetch_fifo_model model = new; + initial begin fetch_fifo_if.mck.flush <= 1'b0; fetch_fifo_if.mck.in_branch_predict <= 'b0; @@ -67,6 +73,10 @@ module fetch_fifo_tb; fetch_fifo_if.mck.in_valid <= 'b0; fetch_fifo_if.mck.out_ready <= 'b0; + forever begin + is.get_instruction(); + // @(fetch_fifo_if.mck); + end end endprogram diff --git a/tb/test/fetch_fifo/fetch_fifo_model.svh b/tb/test/fetch_fifo/fetch_fifo_model.svh new file mode 100755 index 000000000..863cb7b1e --- /dev/null +++ b/tb/test/fetch_fifo/fetch_fifo_model.svh @@ -0,0 +1,69 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 14.5.2017 +// Description: Fetch FIFO Golden Model +// +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// + +// Read 32 bit instruction, separate and re-align them +class fetch_fifo_model; + + logic [15:0] unaligned_part; + int is_unaligned = 0; + + logic [31:0] instruction_queue[$]; + + function void put(logic [31:0] instr); + + if (is_unaligned == 0) begin + // we've generated a compressed instruction so generate another one + if (instr[1:0] != 2'b11) begin + instruction_queue.push_back({16'b0, instr[15:0]}); + + if (instr[17:16] == 2'b11) begin + is_unaligned = 1; + unaligned_part = instr[31:16]; + end + // normal instruction + end else begin + instruction_queue.push_back(instr); + end + // the last generation iteration produced an outstanding instruction + end else begin + instruction_queue.push_back({instr[15:0], unaligned_part}); + + if (instr[17:16] != 2'b11) begin + instruction_queue.push_back({16'b0, instr[31:16]}); + is_unaligned = 0; + end else begin + // again we have an unaligned instruction + is_unaligned = 1; + unaligned_part = instr[31:16]; + end + end + endfunction : put + + function logic [31:0] pull(); + return instruction_queue.pop_front(); + endfunction : pull + + function flush(); + for (int i = 0; i < instruction_queue.size(); i++) begin + instruction_queue.delete(i); + end + endfunction : flush + +endclass : fetch_fifo_model \ No newline at end of file diff --git a/tb/test/fetch_fifo/fetch_fifo_pkg.sv b/tb/test/fetch_fifo/fetch_fifo_pkg.sv index d08e8bb3e..9bcc30b7d 100755 --- a/tb/test/fetch_fifo/fetch_fifo_pkg.sv +++ b/tb/test/fetch_fifo/fetch_fifo_pkg.sv @@ -19,5 +19,6 @@ // package fetch_fifo_pkg; - + `include "instruction_stream.svh" + `include "fetch_fifo_model.svh" endpackage \ No newline at end of file diff --git a/tb/test/fetch_fifo/instruction_stream.svh b/tb/test/fetch_fifo/instruction_stream.svh new file mode 100755 index 000000000..f6d8ded28 --- /dev/null +++ b/tb/test/fetch_fifo/instruction_stream.svh @@ -0,0 +1,97 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 14.5.2017 +// Description: Random instruction class +// +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +class instruction; + rand logic [31:0] instruction; + rand bit is_compressed; + + constraint compressed_constraint { + (is_compressed) -> { + instruction[1:0] != 2'b11; + } + (!is_compressed) -> { + instruction[1:0] == 2'b11; + instruction[4:2] != 3'b111; + } + } + + // Return readable representation + function string convert2string(); + + string s; + $sformat(s, "Instruction: %0h\nCompressed: %h", instruction, is_compressed); + return s; + + endfunction : convert2string +endclass : instruction + +class instruction_stream; + + instruction instr; + logic [15:0] unaligned_part; + int is_unaligned = 0; + // get an instruction stream of consecutive data + function logic [31:0] get_instruction(); + logic [31:0] return_instruction; + // generate a new instruction + if (is_unaligned == 0) begin + instr = new; + void'(randomize(instr)); + // we've generated a compressed instruction so generate another one + if (instr.is_compressed) begin + return_instruction [15:0] = instr.instruction[15:0]; + // get a new instruction + instr = new; + void'(randomize(instr)); + return_instruction[31:0] = instr.instruction[15:0]; + // $display("Instruction: [ c | c ]"); + // was this a compressed instruction as well? + // if not than store that this was an unaligned access + if (!instr.is_compressed) begin + // $display("Instruction: [ i0 | c ]"); + is_unaligned = 1; + unaligned_part = instr.instruction[31:16]; + end + // normal instruction + end else begin + return_instruction = instr.instruction; + // $display("Instruction: [ i ]"); + end + // the last generation iteration produced an outstanding instruction + end else begin + return_instruction [15:0] = unaligned_part; + // generate a new isntruction + instr = new; + void'(randomize(instr)); + // was it compressed? + if (instr.is_compressed) begin + return_instruction [31:16] = instr.instruction[15:0]; + is_unaligned = 0; + // $display("Instruction: [ c | i1 ]"); + end else begin + // again we have an unaligned instruction + unaligned_part = instr.instruction[31:16]; + // $display("Instruction: [ i0 | i1 ]"); + end + end + + return return_instruction; + endfunction : get_instruction + +endclass : instruction_stream \ No newline at end of file From 650f514bb222e3a424e722b9a0e046d8fc1382c1 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 15 May 2017 00:37:31 +0200 Subject: [PATCH 33/43] :bug: Fix FIFO wrap around bug --- src/fetch_fifo.sv | 55 ++++--- src/prefetch_buffer.sv | 172 +++++++++++----------- tb/agents/fetch_fifo_if/fetch_fifo_if.sv | 4 +- tb/fetch_fifo_tb.sv | 21 ++- tb/test/fetch_fifo/fetch_fifo_model.svh | 57 +++++-- tb/test/fetch_fifo/fetch_fifo_pkg.sv | 3 +- tb/test/fetch_fifo/instruction_stream.svh | 18 ++- 7 files changed, 191 insertions(+), 139 deletions(-) diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 884e5c7d8..32f3bdb59 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -73,7 +73,7 @@ module fetch_fifo // we always need two empty places // as it could happen that we get two compressed instructions/cycle /* verilator lint_off WIDTH */ - assign full = (status_cnt_q >= DEPTH - 2); + assign full = (status_cnt_q >= DEPTH - 3); assign empty = (status_cnt_q == 0); /* verilator lint_on WIDTH */ // the output is valid if we are either empty or just got a valid @@ -141,27 +141,25 @@ module fetch_fifo // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; - mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; - mem_n[write_pointer_q + 1].instruction = {16'b0, in_rdata_q[31:16]}; + mem_n[(write_pointer_q + 1) % DEPTH].branch_predict = branch_predict_q; + mem_n[(write_pointer_q + 1) % DEPTH].address = {in_addr_q[63:2], 2'b10}; + mem_n[(write_pointer_q + 1) % DEPTH].instruction = {16'b0, in_rdata_q[31:16]}; status_cnt++; write_pointer++; + $display("Instruction: [ c | c ] @ %t", $time); // or is it an unaligned 32 bit instruction like // ____________________________________________________ // |instr [15:0] | instr [31:16] | compressed 1[15:0] | // |____________________________________________________ - end else begin - // we've got an unaligned 32 bit instruction - // check if the previous instruction was no predicted taken branch - if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - // save the lower 16 bit - unaligned_instr_n = in_rdata_q[31:16]; - // and that it was unaligned - unaligned_n = 1'b1; - // save the address as well - unaligned_address_n = {in_addr_q[63:2], 2'b10}; - end + end else if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + $display("Instruction: [ i0 | c ] @ %t", $time); // this does not consume space in the FIFO end end else begin @@ -174,6 +172,7 @@ module fetch_fifo mem_n[write_pointer_q].instruction = in_rdata_q; status_cnt++; write_pointer++; + $display("Instruction: [ i ] @ %t", $time); end end // we have an outstanding unaligned instruction @@ -191,28 +190,26 @@ module fetch_fifo // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; - mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; - mem_n[write_pointer_q + 1].instruction = {16'b0, in_rdata_q[31:16]}; + mem_n[(write_pointer_q + 1) % DEPTH].branch_predict = branch_predict_q; + mem_n[(write_pointer_q + 1) % DEPTH].address = {in_addr_q[63:2], 2'b10}; + mem_n[(write_pointer_q + 1) % DEPTH].instruction = {16'b0, in_rdata_q[31:16]}; status_cnt++; write_pointer++; // unaligned access served unaligned_n = 1'b0; + $display("Instruction: [ c | i1 ] @ %t", $time); // or is it an unaligned 32 bit instruction like // ____________________________________________________ // |instr [15:0] | instr [31:16] | compressed 1[15:0] | // |____________________________________________________ - end else begin - // we've got an unaligned 32 bit instruction - // check if the previous instruction was no predicted taken branch - if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - // save the lower 16 bit - unaligned_instr_n = in_rdata_q[31:16]; - // and that it was unaligned - unaligned_n = 1'b1; - // save the address as well - unaligned_address_n = {in_addr_q[63:2], 2'b10}; - end + end else if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + $display("Instruction: [ i0 | i1 ] @ %t", $time); // this does not consume space in the FIFO end end diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index e1969f468..3cfbb6f77 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -98,109 +98,107 @@ module prefetch_buffer NS = CS; unique case(CS) - // default state, not waiting for requested data - IDLE: begin - instr_addr_o = fetch_address_i; - instr_req_o = 1'b0; + // default state, not waiting for requested data + IDLE: begin + instr_addr_o = fetch_address_i; + instr_req_o = 1'b0; - if (fifo_ready && fetch_valid_i) begin - instr_req_o = 1'b1; - addr_valid = 1'b1; + // make a new request + if (fifo_ready && fetch_valid_i) begin + instr_req_o = 1'b1; + addr_valid = 1'b1; - if(instr_gnt_i) //~> granted request + if(instr_gnt_i) //~> granted request + // we have one outstanding rvalid: wait for it + if (flush_i) + NS = WAIT_ABORTED; + else + NS = WAIT_RVALID; + else begin //~> got a request but no grant + NS = WAIT_GNT; + end + end + end // case: IDLE + + // we sent a request but did not yet get a grant + WAIT_GNT: begin + instr_addr_o = instr_addr_q; + instr_req_o = 1'b1; + + if(instr_gnt_i) // we have one outstanding rvalid: wait for it if (flush_i) NS = WAIT_ABORTED; else NS = WAIT_RVALID; - else begin //~> got a request but no grant - NS = WAIT_GNT; - end - end - end // case: IDLE - - // we sent a request but did not yet get a grant - WAIT_GNT: begin - instr_addr_o = instr_addr_q; - instr_req_o = 1'b1; - - if(instr_gnt_i) - // we have one outstanding rvalid: wait for it - if (flush_i) - NS = WAIT_ABORTED; else - NS = WAIT_RVALID; - else - NS = WAIT_GNT; - end // case: WAIT_GNT + NS = WAIT_GNT; + end // case: WAIT_GNT - // we wait for rvalid, after that we are ready to serve a new request - WAIT_RVALID: begin - instr_addr_o = fetch_address_i; + // we wait for rvalid, after that we are ready to serve a new request + WAIT_RVALID: begin + instr_addr_o = fetch_address_i; + // prepare for next request + if (fifo_ready && fetch_valid_i) begin + // wait for the valid signal + if (instr_rvalid_i) begin + instr_req_o = 1'b1; + fifo_valid = 1'b1; + addr_valid = 1'b1; - if (fifo_ready) begin - // prepare for next request - if (fifo_ready && fetch_valid_i) begin - instr_req_o = 1'b1; - // if we are receiving a data item during a flush ignore it - fifo_valid = 1'b1; - addr_valid = 1'b1; - - if (instr_gnt_i) begin - // we have one outstanding rvalid: wait for it - if (flush_i) - NS = WAIT_ABORTED; - else - NS = WAIT_RVALID; + if (instr_gnt_i) begin + // we have one outstanding rvalid: wait for it + // if we are receiving a data item during a flush ignore it + if (flush_i) + NS = WAIT_ABORTED; + else + NS = WAIT_RVALID; + end else begin + NS = WAIT_GNT; + end + end end else begin - NS = WAIT_GNT; - end - end else begin - // we are requested to abort our current request - // we didn't get an rvalid yet, so wait for it - if (flush_i) begin - NS = WAIT_ABORTED; - end - end - end else begin - // just wait for rvalid and go back to IDLE, no new request - if (instr_rvalid_i) begin - // if we are receiving a data item during a flush ignore it - fifo_valid = 1'b1; - NS = IDLE; - end - end - end // case: WAIT_RVALID - - // our last request was aborted, but we didn't yet get a rvalid and - // there was no new request sent yet - // we assume that req_i is set to high - WAIT_ABORTED: begin - instr_addr_o = fetch_address_i; - - if (instr_rvalid_i) begin - instr_req_o = 1'b1; - // no need to send address, already done in WAIT_RVALID - - if (instr_gnt_i) begin - // we have one outstanding rvalid - if (flush_i) + // we are requested to abort our current request + // we didn't get an rvalid yet, so wait for it + if (flush_i) begin NS = WAIT_ABORTED; - else - NS = WAIT_RVALID; - end else begin - NS = WAIT_GNT; - end + end + // just wait for rvalid and go back to IDLE, no new request + if (instr_rvalid_i) begin + // if we are receiving a data item during a flush ignore it + fifo_valid = 1'b1; + NS = IDLE; + end + end + + end // case: WAIT_RVALID + + // our last request was aborted, but we didn't yet get a rvalid and + // there was no new request sent yet we assume that req_i is set to high + WAIT_ABORTED: begin + instr_addr_o = fetch_address_i; + + if (instr_rvalid_i) begin + instr_req_o = 1'b1; + + if (instr_gnt_i) begin + // we have one outstanding rvalid + if (flush_i) + NS = WAIT_ABORTED; + else + NS = WAIT_RVALID; + end else begin + NS = WAIT_GNT; + end + end end - end - default: begin - NS = IDLE; - instr_req_o = 1'b0; - end + default: begin + NS = IDLE; + instr_req_o = 1'b0; + end endcase - end //------------- diff --git a/tb/agents/fetch_fifo_if/fetch_fifo_if.sv b/tb/agents/fetch_fifo_if/fetch_fifo_if.sv index 97c1d6a67..308b7cfc9 100755 --- a/tb/agents/fetch_fifo_if/fetch_fifo_if.sv +++ b/tb/agents/fetch_fifo_if/fetch_fifo_if.sv @@ -21,9 +21,7 @@ `define FETCH_FIFO_IF_SV import ariane_pkg::*; -interface fetch_fifo_if #( - parameter type dtype = logic[7:0] - )( +interface fetch_fifo_if ( input clk ); diff --git a/tb/fetch_fifo_tb.sv b/tb/fetch_fifo_tb.sv index 017b30240..0e74493cc 100755 --- a/tb/fetch_fifo_tb.sv +++ b/tb/fetch_fifo_tb.sv @@ -24,7 +24,7 @@ import fetch_fifo_pkg::*; module fetch_fifo_tb; logic rst_ni, clk_i; - fetch_fifo_if fetch_fifo_if (clk); + fetch_fifo_if fetch_fifo_if (clk_i); fetch_fifo dut ( @@ -57,25 +57,38 @@ module fetch_fifo_tb; // simulator stopper, this is suboptimal better go for coverage initial begin #10000000ns - $stop; + $finish; end program testbench (fetch_fifo_if fetch_fifo_if); instruction_stream is = new; fetch_fifo_model model = new; + instruction_queue_entry_t iqe; initial begin + fetch_fifo_if.mck.flush <= 1'b0; fetch_fifo_if.mck.in_branch_predict <= 'b0; fetch_fifo_if.mck.in_addr <= 'b0; fetch_fifo_if.mck.in_rdata <= 'b0; fetch_fifo_if.mck.in_valid <= 'b0; fetch_fifo_if.mck.out_ready <= 'b0; + wait(rst_ni == 1'b1); + // Driver forever begin - is.get_instruction(); - // @(fetch_fifo_if.mck); + @(fetch_fifo_if.mck iff fetch_fifo_if.in_ready); + + do begin + iqe = is.get_instruction(); + fetch_fifo_if.mck.in_addr <= iqe.address; + fetch_fifo_if.mck.in_rdata <= iqe.instr; + fetch_fifo_if.mck.in_branch_predict <= iqe.bp; + fetch_fifo_if.mck.in_valid <= 1'b1; + @(fetch_fifo_if.mck); + end while (fetch_fifo_if.mck.in_ready); + fetch_fifo_if.mck.in_valid <= 1'b0; end end diff --git a/tb/test/fetch_fifo/fetch_fifo_model.svh b/tb/test/fetch_fifo/fetch_fifo_model.svh index 863cb7b1e..ff5c4343e 100755 --- a/tb/test/fetch_fifo/fetch_fifo_model.svh +++ b/tb/test/fetch_fifo/fetch_fifo_model.svh @@ -19,44 +19,79 @@ // // Read 32 bit instruction, separate and re-align them +typedef struct { + logic [63:0] address; + logic [31:0] instr; + branchpredict_sbe bp; +} instruction_queue_entry_t; + class fetch_fifo_model; logic [15:0] unaligned_part; int is_unaligned = 0; + logic [63:0] unaligend_address; - logic [31:0] instruction_queue[$]; + instruction_queue_entry_t instruction_queue[$]; - function void put(logic [31:0] instr); + function void put(logic [63:0] address, logic [31:0] instr, branchpredict_sbe bp); + instruction_queue_entry_t param; if (is_unaligned == 0) begin - // we've generated a compressed instruction so generate another one + // we've got a compressed instruction if (instr[1:0] != 2'b11) begin - instruction_queue.push_back({16'b0, instr[15:0]}); + param.address = address; + param.instr = {16'b0, instr[15:0]}; + param.bp = bp; + instruction_queue.push_back(param); + // the upper part is a unaligned 32 bit instruction if (instr[17:16] == 2'b11) begin - is_unaligned = 1; - unaligned_part = instr[31:16]; + unaligend_address = {address[63:2], 2'b10}; + is_unaligned = 1; + unaligned_part = instr[31:16]; + // there is another compressed instruction + // don't include if branch prediction predicted a compressed + // branch in the first instruction part + end else if (!(bp.predict_taken && bp.valid && bp.is_lower_16)) begin + param.address = {address[63:2], 2'b10}; + param.instr = instr[31:16]; + param.bp = bp; + instruction_queue.push_back(param); end // normal instruction end else begin - instruction_queue.push_back(instr); + param.address = address; + param.instr = instr; + param.bp = bp; + instruction_queue.push_back(param); end // the last generation iteration produced an outstanding instruction end else begin - instruction_queue.push_back({instr[15:0], unaligned_part}); - + param.address = unaligend_address; + param.instr = {instr[15:0], unaligned_part}; + param.bp = bp; + instruction_queue.push_back(param); + // there is another compressed instruction + // don't include if branch prediction predicted a compressed + // branch in the first instruction part if (instr[17:16] != 2'b11) begin - instruction_queue.push_back({16'b0, instr[31:16]}); + if (!(bp.predict_taken && bp.valid && bp.is_lower_16)) begin + param.address = {address[63:2], 2'b10}; + param.instr = instr[31:16]; + param.bp = bp; + instruction_queue.push_back(param); + end is_unaligned = 0; end else begin // again we have an unaligned instruction + param.address = {address[63:2], 2'b10}; is_unaligned = 1; unaligned_part = instr[31:16]; end end endfunction : put - function logic [31:0] pull(); + function instruction_queue_entry_t pull(); return instruction_queue.pop_front(); endfunction : pull diff --git a/tb/test/fetch_fifo/fetch_fifo_pkg.sv b/tb/test/fetch_fifo/fetch_fifo_pkg.sv index 9bcc30b7d..313a0c3d6 100755 --- a/tb/test/fetch_fifo/fetch_fifo_pkg.sv +++ b/tb/test/fetch_fifo/fetch_fifo_pkg.sv @@ -19,6 +19,7 @@ // package fetch_fifo_pkg; - `include "instruction_stream.svh" + import ariane_pkg::*; `include "fetch_fifo_model.svh" + `include "instruction_stream.svh" endpackage \ No newline at end of file diff --git a/tb/test/fetch_fifo/instruction_stream.svh b/tb/test/fetch_fifo/instruction_stream.svh index f6d8ded28..e88b41ca6 100755 --- a/tb/test/fetch_fifo/instruction_stream.svh +++ b/tb/test/fetch_fifo/instruction_stream.svh @@ -43,11 +43,16 @@ endclass : instruction class instruction_stream; + logic [63:0] address = 0; instruction instr; logic [15:0] unaligned_part; int is_unaligned = 0; // get an instruction stream of consecutive data - function logic [31:0] get_instruction(); + function instruction_queue_entry_t get_instruction(); + + branchpredict_sbe bp = '0; + instruction_queue_entry_t return_entry; + logic [31:0] return_instruction; // generate a new instruction if (is_unaligned == 0) begin @@ -59,7 +64,7 @@ class instruction_stream; // get a new instruction instr = new; void'(randomize(instr)); - return_instruction[31:0] = instr.instruction[15:0]; + return_instruction[31:16] = instr.instruction[15:0]; // $display("Instruction: [ c | c ]"); // was this a compressed instruction as well? // if not than store that this was an unaligned access @@ -79,9 +84,9 @@ class instruction_stream; // generate a new isntruction instr = new; void'(randomize(instr)); + return_instruction [31:16] = instr.instruction[15:0]; // was it compressed? if (instr.is_compressed) begin - return_instruction [31:16] = instr.instruction[15:0]; is_unaligned = 0; // $display("Instruction: [ c | i1 ]"); end else begin @@ -90,8 +95,13 @@ class instruction_stream; // $display("Instruction: [ i0 | i1 ]"); end end + return_entry.instr = return_instruction; + return_entry.bp = bp; + return_entry.address = address; - return return_instruction; + address = address + 4; + + return return_entry; endfunction : get_instruction endclass : instruction_stream \ No newline at end of file From 7fc20b1e5f9b84461d33085a644ce721fbe5edc2 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 15 May 2017 00:56:03 +0200 Subject: [PATCH 34/43] Fix CI build --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b96d84c3e..bb64b3c6a 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ envs = $(wildcard tb/env/*/*.sv) # UVM Sequences sequences = $(wildcard tb/sequences/*/*.sv) # Test packages -test_pkg = $(wildcard tb/test/*/*_pkg.sv) +test_pkg = $(wildcard tb/test/*/*sequence_pkg.sv) $(wildcard tb/test/*/*_pkg.sv) # this list contains the standalone components src = $(wildcard src/util/*.sv) $(wildcard src/*.sv) From 7e4bdfb11c0db3003a81a114e2e4d6c4bbbe3992 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 15 May 2017 18:36:31 +0200 Subject: [PATCH 35/43] Restructure IF stage --- Makefile | 2 +- include/ariane_pkg.svh | 11 +++ src/ariane.sv | 16 ++-- src/compressed_decoder.sv | 15 +--- src/fetch_fifo.sv | 110 +++++++++-------------- src/id_stage.sv | 25 +++--- src/if_stage.sv | 106 ++++++---------------- src/prefetch_buffer.sv | 7 +- tb/agents/fetch_fifo_if/fetch_fifo_if.sv | 8 +- tb/fetch_fifo_tb.sv | 4 +- 10 files changed, 105 insertions(+), 199 deletions(-) diff --git a/Makefile b/Makefile index bb64b3c6a..8af73bebc 100644 --- a/Makefile +++ b/Makefile @@ -68,7 +68,7 @@ $(tests): # Optimize top level vopt${questa_version} ${compile_flag} $@_tb -o $@_tb_optimized +acc -check_synthesis # vsim${questa_version} $@_tb_optimized - # vsim${questa_version} -c +UVM_TESTNAME=$@_test -coverage -classdebug $@_tb_optimized + # vsim${questa_version} +UVM_TESTNAME=$@_test -coverage -classdebug $@_tb_optimized vsim${questa_version} +UVM_TESTNAME=$@_test +uvm_set_action="*,_ALL_,UVM_ERROR,UVM_DISPLAY|UVM_STOP" -c -coverage -classdebug -do "coverage save -onexit $@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" $@_tb_optimized build-moore: diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 6ec3f40eb..5e517c181 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -88,6 +88,17 @@ package ariane_pkg; // LSU functions LD, SD, LW, LWU, SW, LH, LHU, SH, LB, SB, LBU } fu_op; + // --------------- + // ID/EX/WB Stage + // --------------- + // store the decompressed instruction + typedef struct packed { + branchpredict_sbe branch_predict; + logic [63:0] address; + logic [31:0] instruction; + logic is_compressed; + logic is_illegal; + } fetch_entry; // --------------- // ID/EX/WB Stage diff --git a/src/ariane.sv b/src/ariane.sv index 881892324..32efb3f10 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -107,7 +107,7 @@ module ariane logic busy_if_id; logic ready_id_if; logic [31:0] fetch_rdata_id_if; - logic instr_valid_if_id; + logic fetch_valid_if_id; logic [31:0] instr_rdata_if_id; logic decode_ack_id_if; logic is_compressed_if_id; @@ -248,12 +248,9 @@ module ariane .instr_rvalid_i ( fetch_valid_ex_if ), .instr_rdata_i ( fetch_rdata_ex_if ), - .pc_o ( pc_if_id ), - .instr_valid_o ( instr_valid_if_id ), + .fetch_entry_o ( fetch_entry_if_id ), + .fetch_entry_valid_i ( fetch_valid_if_id ), .instr_ack_i ( decode_ack_id_if ), - .instr_rdata_o ( instr_rdata_if_id ), - .instr_is_compressed_o ( instr_is_compressed_if_id ), - .branch_predict_o ( branch_predict_if_id ), .ex_o ( exception_if_id ), .* ); @@ -270,11 +267,9 @@ module ariane .flush_i ( flush ), .flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ), .flush_scoreboard_i ( flush_scoreboard_ctrl_id ), - .instruction_i ( instr_rdata_if_id ), - .instr_is_compressed_i ( instr_is_compressed_if_id ), - .instruction_valid_i ( instr_valid_if_id ), + .fetch_entry_i ( fetch_entry_if_id ), + .fetch_entry_valid_i ( fetch_valid_if_id ), .decoded_instr_ack_o ( decode_ack_id_if ), - .pc_if_i ( pc_if_id ), // PC from if .ex_if_i ( exception_if_id ), // exception from if .ready_o ( ready_id_if ), // Functional Units @@ -291,7 +286,6 @@ module ariane .alu_valid_o ( alu_valid_id_ex ), // Branches and Jumps .branch_valid_o ( branch_valid_id_ex ), // branch is valid - .branch_predict_i ( branch_predict_if_id ), // branch predict from if .branch_predict_o ( branch_predict_id_ex ), // branch predict to ex .resolved_branch_i ( resolved_branch ), // in order to resolve the branch // LSU diff --git a/src/compressed_decoder.sv b/src/compressed_decoder.sv index 59986886d..b5c2e0c6a 100644 --- a/src/compressed_decoder.sv +++ b/src/compressed_decoder.sv @@ -27,18 +27,15 @@ import ariane_pkg::*; module compressed_decoder ( - input logic [31:0] instr_i, + input logic [15:0] instr_i, output logic [31:0] instr_o, - output logic is_compressed_o, output logic illegal_instr_o ); // ------------------- // Compressed Decoder // ------------------- - - always_comb - begin + always_comb begin illegal_instr_o = 1'b0; instr_o = '0; @@ -253,13 +250,7 @@ module compressed_decoder endcase end - default: begin - // 32 bit (or more) instruction - instr_o = instr_i; - end + default: ; endcase end - - assign is_compressed_o = (instr_i[1:0] != 2'b11); - endmodule \ No newline at end of file diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 32f3bdb59..b0ae388c2 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -34,27 +34,21 @@ module fetch_fifo input logic in_valid_i, output logic in_ready_o, // output port - output branchpredict_sbe branch_predict_o, - output logic [63:0] out_addr_o, - output logic [31:0] out_rdata_o, + output fetch_entry fetch_entry_o, output logic out_valid_o, input logic out_ready_i - ); localparam DEPTH = 8; // must be a power of two - typedef struct packed { - branchpredict_sbe branch_predict; - logic [63:0] address; - logic [31:0] instruction; - } fetch_entry; + // input registers - bounding the path from memory branchpredict_sbe branch_predict_n, branch_predict_q; logic [63:0] in_addr_n, in_addr_q; logic [31:0] in_rdata_n, in_rdata_q; logic in_valid_n, in_valid_q; - // this bit indicates whether there is a instruction waiting in the pipeline register or not - logic pipelein_register_valid_n, pipelein_register_valid_q; + // compressed to decompressed instruction + logic [31:0] decompressed_instruction [2]; + logic is_illegal [2]; fetch_entry mem_n[DEPTH-1:0], mem_q[DEPTH-1:0]; logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q; @@ -69,7 +63,6 @@ module fetch_fifo logic [15:0] unaligned_instr_n, unaligned_instr_q; // save the address of the unaligned instruction logic [63:0] unaligned_address_n, unaligned_address_q; - // we always need two empty places // as it could happen that we get two compressed instructions/cycle /* verilator lint_off WIDTH */ @@ -103,10 +96,25 @@ module fetch_fifo in_valid_n = 1'b0; end end + // -------------------- + // Compressed Decoders + // -------------------- + // compressed instruction decoding, or more precisely compressed instruction expander + // since it does not matter where we decompress instructions, we do it here to ease timing closure + genvar i; + generate + for (i = 0; i < 2; i++) begin + compressed_decoder compressed_decoder_i ( + .instr_i ( in_rdata_q[(16*(i+1)-1):(i*16)] ), + .instr_o ( decompressed_instruction[i] ), + .illegal_instr_o ( is_illegal[i] ) + ); + end + endgenerate - // -------------- - // FIFO Management - // -------------- + // -------------------------------------------- + // FIFO Management + Instruction (re)-aligner + // -------------------------------------------- always_comb begin : output_port // counter automatic logic [$clog2(DEPTH)-1:0] status_cnt = status_cnt_q; @@ -118,7 +126,6 @@ module fetch_fifo unaligned_n = unaligned_q; unaligned_instr_n = unaligned_instr_q; unaligned_address_n = unaligned_address_q; - pipelein_register_valid_n = pipelein_register_valid_q; // --------------------------------- // Input port & Instruction Aligner // --------------------------------- @@ -128,9 +135,9 @@ module fetch_fifo // check if the instruction is compressed if (in_rdata_q[1:0] != 2'b11) begin // it is compressed - mem_n[write_pointer_q].branch_predict = branch_predict_q; - mem_n[write_pointer_q].address = in_addr_q; - mem_n[write_pointer_q].instruction = {16'b0, in_rdata_q[15:0]}; + mem_n[write_pointer_q] = { + branch_predict_q, in_addr_q, decompressed_instruction[0], 1'b1, is_illegal[0] + }; status_cnt++; write_pointer++; @@ -141,9 +148,10 @@ module fetch_fifo // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - mem_n[(write_pointer_q + 1) % DEPTH].branch_predict = branch_predict_q; - mem_n[(write_pointer_q + 1) % DEPTH].address = {in_addr_q[63:2], 2'b10}; - mem_n[(write_pointer_q + 1) % DEPTH].instruction = {16'b0, in_rdata_q[31:16]}; + + mem_n[write_pointer_q + 1'b1] = { + branch_predict_q, {in_addr_q[63:2], 2'b10}, decompressed_instruction[1], 1'b1, is_illegal[1] + }; status_cnt++; write_pointer++; @@ -167,9 +175,9 @@ module fetch_fifo // _______________________ // | instruction [31:0] | // |______________________ - mem_n[write_pointer_q].branch_predict = branch_predict_q; - mem_n[write_pointer_q].address = in_addr_q; - mem_n[write_pointer_q].instruction = in_rdata_q; + mem_n[write_pointer_q] = { + branch_predict_q, in_addr_q, in_rdata_q, 1'b0, 1'b0 + }; status_cnt++; write_pointer++; $display("Instruction: [ i ] @ %t", $time); @@ -177,9 +185,11 @@ module fetch_fifo end // we have an outstanding unaligned instruction if (in_valid_q && unaligned_q) begin - mem_n[write_pointer_q].branch_predict = branch_predict_q; - mem_n[write_pointer_q].address = unaligned_address_q; - mem_n[write_pointer_q].instruction = {in_rdata_q[15:0], unaligned_instr_q}; + + mem_n[write_pointer_q] = { + branch_predict_q, unaligned_address_q, {in_rdata_q[15:0], unaligned_instr_q}, 1'b0, 1'b0 + }; + status_cnt++; write_pointer++; // whats up with the other upper 16 bit of this instruction @@ -190,9 +200,10 @@ module fetch_fifo // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - mem_n[(write_pointer_q + 1) % DEPTH].branch_predict = branch_predict_q; - mem_n[(write_pointer_q + 1) % DEPTH].address = {in_addr_q[63:2], 2'b10}; - mem_n[(write_pointer_q + 1) % DEPTH].instruction = {16'b0, in_rdata_q[31:16]}; + mem_n[write_pointer_q + 1'b1] = { + branch_predict_q, {in_addr_q[63:2], 2'b10}, decompressed_instruction[1], 1'b1, is_illegal[1] + }; + status_cnt++; write_pointer++; // unaligned access served @@ -220,46 +231,13 @@ module fetch_fifo // we are ready to accept a new request if we still have two places in the queue // Output assignments - branch_predict_o = mem_q[read_pointer_q].branch_predict; - out_addr_o = mem_q[read_pointer_q].address; - out_rdata_o = mem_q[read_pointer_q].instruction; - - // pass-through if queue is empty but we are currently expanding or re-aligning an instruction - if (empty && in_valid_q) begin - // we either have a full 32 bit instruction a compressed 16 bit instruction - branch_predict_o = branch_predict_q; - out_addr_o = in_addr_q; - // depending on whether the instruction is compressed or not output the correct thing - if (!unaligned_q) begin - if (in_rdata_q[1:0] != 2'b11) - out_rdata_o = {16'b0, in_rdata_q[15:0]}; - else - out_rdata_o = in_rdata_q; - // serve unaligned - end else begin - out_addr_o = unaligned_address_q; - out_rdata_o = {in_rdata_q[15:0], unaligned_instr_q}; - end - // there is currently no valid instruction in the pipeline register push this instruction - // if (out_ready_i) begin - // pipelein_register_valid_n = 1'b1; - // read_pointer_n = read_pointer_q + 1; - // status_cnt--; - // end - // regular read but do not issue if we are already empty - // this can happen since we have an output latch in the IF stage and the ID stage will only know a cycle - // later that we do not have any valid instructions anymore - end + fetch_entry_o = mem_q[read_pointer_q].branch_predict; if (out_ready_i) begin read_pointer_n = read_pointer_q + 1; status_cnt--; end - if (out_ready_i) begin - pipelein_register_valid_n = 1'b0; - end - write_pointer_n = write_pointer; status_cnt_n = status_cnt; @@ -284,7 +262,6 @@ module fetch_fifo in_rdata_q <= 32'b0; in_valid_q <= 1'b0; branch_predict_q <= '{default: 0}; - pipelein_register_valid_q <= 1'b0; end else begin status_cnt_q <= status_cnt_n; mem_q <= mem_n; @@ -298,7 +275,6 @@ module fetch_fifo in_rdata_q <= in_rdata_n; in_valid_q <= in_valid_n; branch_predict_q <= branch_predict_n; - pipelein_register_valid_q <= pipelein_register_valid_n; end end diff --git a/src/id_stage.sv b/src/id_stage.sv index c43dfffd7..43eb483e7 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -31,11 +31,9 @@ module id_stage #( input logic flush_unissued_instr_i, input logic flush_scoreboard_i, // from IF - input logic [31:0] instruction_i, - input logic instr_is_compressed_i, - input logic instruction_valid_i, + input fetch_entry fetch_entry_i, + input logic fetch_entry_valid_i, output logic decoded_instr_ack_o, - input logic [63:0] pc_if_i, input exception ex_if_i, // we already got an exception in IF output logic ready_o, // id is ready @@ -51,8 +49,6 @@ module id_stage #( input logic alu_ready_i, output logic alu_valid_o, output logic branch_valid_o, // use branch prediction unit - // Branch predict In - input branchpredict_sbe branch_predict_i, // ex just resolved our predicted branch, we are ready to accept new requests input branchpredict resolved_branch_i, @@ -124,7 +120,7 @@ module id_stage #( unresolved_branch_n = 1'b0; end // if the instruction is valid and it is a control flow instruction - if (instruction_valid_i && is_control_flow_instr && ~flush_unissued_instr_i) begin + if (fetch_entry_valid_i && is_control_flow_instr && ~flush_unissued_instr_i) begin unresolved_branch_n = 1'b1; end end @@ -133,12 +129,13 @@ module id_stage #( assign ready_o = ~full && (~unresolved_branch_q || resolved_branch_i.valid); decoder decoder_i ( - .pc_i ( pc_if_i ), - .is_compressed_i ( instr_is_compressed_i ), - .instruction_i ( instruction_i ), - .ex_i ( ex_if_i ), - .instruction_o ( decoded_instr_dc_sb ), - .is_control_flow_instr_o ( is_control_flow_instr ), + .pc_i ( fetch_entry_i.address ), + .is_compressed_i ( fetch_entry_i.is_compressed ), + .instruction_i ( fetch_entry_i.instruction ), + .branch_predict_i ( fetch_entry_ibranch_predict ), + .ex_i ( ex_if_i ), + .instruction_o ( decoded_instr_dc_sb ), + .is_control_flow_instr_o ( is_control_flow_instr ), .* ); @@ -160,7 +157,7 @@ module id_stage #( .commit_instr_o ( commit_instr_o ), .commit_ack_i ( commit_ack_i ), .decoded_instr_i ( decoded_instr_dc_sb ), - .decoded_instr_valid_i ( instruction_valid_i ), + .decoded_instr_valid_i ( fetch_entry_valid_i ), .issue_instr_o ( issue_instr_sb_iro ), .issue_instr_valid_o ( issue_instr_valid_sb_iro ), .issue_ack_i ( issue_ack_iro_sb ), diff --git a/src/if_stage.sv b/src/if_stage.sv index 7b97f7b37..9265aa21e 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -1,31 +1,21 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2017 ETH Zurich, University of Bologna // -// All rights reserved. // -// // -// This code is under development and not yet released to the public. // -// Until it is released, the code is under the copyright of ETH Zurich // -// and the University of Bologna, and may contain unpublished work. // -// Any reuse/redistribution should only be under explicit permission. // -// // -// Bug fixes and contributions will eventually be released under the // -// SolderPad open hardware license and under the copyright of ETH Zurich // -// and the University of Bologna. // -// /// -// Engineer: Renzo Andri - andrire@student.ethz.ch // -// // -// Additional contributions by: // -// Igor Loi - igor.loi@unibo.it // -// Andreas Traber - atraber@student.ethz.ch // -// Sven Stucki - svstucki@student.ethz.ch // -// // -// Design Name: Instruction Fetch Stage // -// Project Name: zero-riscy // -// Language: SystemVerilog // -// // -// Description: Instruction fetch unit: Selection of the next PC, and // -// buffering (sampling) of the read instruction // -// // -//////////////////////////////////////////////////////////////////////////////// +// Author: Florian Zaruba, ETH Zurich +// Date: 14.05.2017 +// Description: Instruction fetch stage +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// import ariane_pkg::*; module if_stage ( @@ -46,72 +36,36 @@ module if_stage ( input logic instr_rvalid_i, input logic [31:0] instr_rdata_i, // Output of IF Pipeline stage - output logic [63:0] pc_o, - output logic instr_valid_o, // instruction in IF/ID pipeline is valid + output fetch_entry fetch_entry_o, + output logic fetch_entry_valid_i, // instruction in IF/ID pipeline is valid input logic instr_ack_i, - output logic [31:0] instr_rdata_o, // read instruction is sampled and sent to ID stage for decoding - output logic instr_is_compressed_o, - output exception ex_o, - output branchpredict_sbe branch_predict_o // branch prediction out + output exception ex_o ); // output logic illegal_compressed_instr_o -> in exception logic fetch_valid; - logic [31:0] instr_rdata; - logic instr_is_compressed; - logic [31:0] decompressed_instruction; - logic [63:0] addr_o; - logic illegal_compressed_instr; logic prefetch_busy; - branchpredict_sbe branch_predict; // --------------------- // IF <-> ID Registers // --------------------- - logic [63:0] pc_n, pc_q; logic instr_valid_n, instr_valid_q; - logic [31:0] instr_rdata_n, instr_rdata_q; - logic instr_is_compressed_n, instr_is_compressed_q; - // branch predict registers - branchpredict_sbe branch_predict_n, branch_predict_q; - - // compressed instruction decoding, or more precisely compressed instruction expander - // since it does not matter where we decompress instructions, we do it here to ease timing closure - compressed_decoder compressed_decoder_i ( - .instr_i ( instr_rdata ), - .instr_o ( decompressed_instruction ), - .is_compressed_o ( instr_is_compressed ), - .illegal_instr_o ( illegal_compressed_instr ) - ); // Pre-fetch buffer, caches a fixed number of instructions prefetch_buffer prefetch_buffer_i ( .ready_i ( instr_ack_i ), .valid_o ( fetch_valid ), - .rdata_o ( instr_rdata ), - .addr_o ( addr_o ), - .branch_predict_o ( branch_predict ), - // goes to instruction memory / instruction cache - // Prefetch Buffer Status .busy_o ( prefetch_busy ), .* ); assign if_busy_o = prefetch_busy; + assign fetch_entry_valid_i = instr_valid_q; - assign pc_o = pc_q; - assign instr_valid_o = instr_valid_q; - assign instr_rdata_o = instr_rdata_q; - assign instr_is_compressed_o = instr_is_compressed_q; - assign branch_predict_o = branch_predict_q; // Pipeline registers always_comb begin // Instruction is valid, latch new data - pc_n = addr_o; instr_valid_n = fetch_valid; - instr_rdata_n = decompressed_instruction; - instr_is_compressed_n = instr_is_compressed; - branch_predict_n = branch_predict; if (flush_i) begin instr_valid_n = 1'b0; @@ -125,17 +79,9 @@ module if_stage ( always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS if (~rst_ni) begin ex_o <= '{default: 0}; - branch_predict_q <= '0; - pc_q <= 64'b0; instr_valid_q <= 1'b0; - instr_rdata_q <= 32'b0; - instr_is_compressed_q <= 1'b0; end else begin - pc_q <= pc_n; instr_valid_q <= instr_valid_n; - instr_rdata_q <= instr_rdata_n; - instr_is_compressed_q <= instr_is_compressed_n; - branch_predict_q <= branch_predict_n; ex_o.cause <= 64'b0; // TODO: Output exception ex_o.tval <= 64'b0; // TODO: Output exception ex_o.valid <= 1'b0; //illegal_compressed_instr; // TODO: Output exception @@ -146,10 +92,10 @@ module if_stage ( //------------- `ifndef SYNTHESIS `ifndef VERILATOR - // there should never be a grant when there was no request - assert property ( - @(posedge clk_i) (instr_gnt_i) |-> (instr_req_o) ) - else $warning("There was a grant without a request"); + // there should never be a grant when there was no request + assert property ( + @(posedge clk_i) (instr_gnt_i) |-> (instr_req_o) ) + else $warning("There was a grant without a request"); `endif `endif endmodule \ No newline at end of file diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index 3cfbb6f77..afd905d35 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -36,9 +36,7 @@ module prefetch_buffer // output side input logic ready_i, output logic valid_o, - output logic [63:0] addr_o, - output logic [31:0] rdata_o, - output branchpredict_sbe branch_predict_o, + output fetch_entry fetch_entry_o, // goes to instruction memory / instruction cache output logic instr_req_o, @@ -77,11 +75,8 @@ module prefetch_buffer .in_valid_i ( fifo_valid ), .in_ready_o ( fifo_ready ), - .branch_predict_o ( branch_predict_o ), .out_valid_o ( valid_o ), .out_ready_i ( ready_i ), - .out_rdata_o ( rdata_o ), - .out_addr_o ( addr_o ), .* ); diff --git a/tb/agents/fetch_fifo_if/fetch_fifo_if.sv b/tb/agents/fetch_fifo_if/fetch_fifo_if.sv index 308b7cfc9..4d7a036aa 100755 --- a/tb/agents/fetch_fifo_if/fetch_fifo_if.sv +++ b/tb/agents/fetch_fifo_if/fetch_fifo_if.sv @@ -31,19 +31,17 @@ interface fetch_fifo_if ( wire [31:0] in_rdata; wire in_valid; wire in_ready; - wire [$bits(branchpredict_sbe)-1:0] out_branch_predict; - wire [63:0] out_addr; - wire [31:0] out_rdata; + wire [$bits(fetch_entry)-1:0] fetch_entry; wire out_valid; wire out_ready; clocking mck @(posedge clk); - input in_ready, out_branch_predict, out_addr, out_rdata, out_valid; + input in_ready, fetch_entry, out_valid; output flush, in_branch_predict, in_addr, in_rdata, in_valid, out_ready; endclocking clocking pck @(posedge clk); - input in_ready, out_branch_predict, out_addr, out_rdata, out_valid, + input in_ready, fetch_entry, out_valid, flush, in_branch_predict, in_addr, in_rdata, in_valid, out_ready; endclocking diff --git a/tb/fetch_fifo_tb.sv b/tb/fetch_fifo_tb.sv index 0e74493cc..e917eef3c 100755 --- a/tb/fetch_fifo_tb.sv +++ b/tb/fetch_fifo_tb.sv @@ -36,9 +36,7 @@ module fetch_fifo_tb; .in_rdata_i ( fetch_fifo_if.in_rdata ), .in_valid_i ( fetch_fifo_if.in_valid ), .in_ready_o ( fetch_fifo_if.in_ready ), - .branch_predict_o ( fetch_fifo_if.out_branch_predict ), - .out_addr_o ( fetch_fifo_if.out_addr ), - .out_rdata_o ( fetch_fifo_if.out_rdata ), + .fetch_entry_o ( fetch_fifo_if.fetch_entry ), .out_valid_o ( fetch_fifo_if.out_valid ), .out_ready_i ( fetch_fifo_if.out_ready ) ); From c8de1aaae96138b7bdebc688094e81430e9b6710 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 15 May 2017 19:00:57 +0200 Subject: [PATCH 36/43] Basic jump and branch prediction test passing --- src/ariane.sv | 7 +------ src/fetch_fifo.sv | 16 ++++++++-------- src/id_stage.sv | 2 +- src/if_stage.sv | 22 +--------------------- tb/fetch_fifo_tb.sv | 2 +- 5 files changed, 12 insertions(+), 37 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 32efb3f10..1ec66de5b 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -105,16 +105,11 @@ module ariane // IF <-> ID // -------------- logic busy_if_id; + fetch_entry fetch_entry_if_id; logic ready_id_if; - logic [31:0] fetch_rdata_id_if; logic fetch_valid_if_id; - logic [31:0] instr_rdata_if_id; logic decode_ack_id_if; - logic is_compressed_if_id; - logic [63:0] pc_if_id; exception exception_if_id; - branchpredict_sbe branch_predict_if_id; - logic instr_is_compressed_if_id; // -------------- // ID <-> EX // -------------- diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index b0ae388c2..c9d010a5c 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -69,8 +69,8 @@ module fetch_fifo assign full = (status_cnt_q >= DEPTH - 3); assign empty = (status_cnt_q == 0); /* verilator lint_on WIDTH */ - // the output is valid if we are either empty or just got a valid - assign out_valid_o = !empty || in_valid_q; + // the output is valid if we are are not empty + assign out_valid_o = !empty; // we need space for at least two instructions: the full flag is conditioned on that // but if we pop in the current cycle and we have one place left we can still fit two instructions alt assign in_ready_o = !full; @@ -155,7 +155,7 @@ module fetch_fifo status_cnt++; write_pointer++; - $display("Instruction: [ c | c ] @ %t", $time); + // $display("Instruction: [ c | c ] @ %t", $time); // or is it an unaligned 32 bit instruction like // ____________________________________________________ // |instr [15:0] | instr [31:16] | compressed 1[15:0] | @@ -167,7 +167,7 @@ module fetch_fifo unaligned_n = 1'b1; // save the address as well unaligned_address_n = {in_addr_q[63:2], 2'b10}; - $display("Instruction: [ i0 | c ] @ %t", $time); + // $display("Instruction: [ i0 | c ] @ %t", $time); // this does not consume space in the FIFO end end else begin @@ -180,7 +180,7 @@ module fetch_fifo }; status_cnt++; write_pointer++; - $display("Instruction: [ i ] @ %t", $time); + // $display("Instruction: [ i ] @ %t", $time); end end // we have an outstanding unaligned instruction @@ -208,7 +208,7 @@ module fetch_fifo write_pointer++; // unaligned access served unaligned_n = 1'b0; - $display("Instruction: [ c | i1 ] @ %t", $time); + // $display("Instruction: [ c | i1 ] @ %t", $time); // or is it an unaligned 32 bit instruction like // ____________________________________________________ // |instr [15:0] | instr [31:16] | compressed 1[15:0] | @@ -220,7 +220,7 @@ module fetch_fifo unaligned_n = 1'b1; // save the address as well unaligned_address_n = {in_addr_q[63:2], 2'b10}; - $display("Instruction: [ i0 | i1 ] @ %t", $time); + // $display("Instruction: [ i0 | i1 ] @ %t", $time); // this does not consume space in the FIFO end end @@ -231,7 +231,7 @@ module fetch_fifo // we are ready to accept a new request if we still have two places in the queue // Output assignments - fetch_entry_o = mem_q[read_pointer_q].branch_predict; + fetch_entry_o = mem_q[read_pointer_q]; if (out_ready_i) begin read_pointer_n = read_pointer_q + 1; diff --git a/src/id_stage.sv b/src/id_stage.sv index 43eb483e7..0a649cdcc 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -132,7 +132,7 @@ module id_stage #( .pc_i ( fetch_entry_i.address ), .is_compressed_i ( fetch_entry_i.is_compressed ), .instruction_i ( fetch_entry_i.instruction ), - .branch_predict_i ( fetch_entry_ibranch_predict ), + .branch_predict_i ( fetch_entry_i.branch_predict ), .ex_i ( ex_if_i ), .instruction_o ( decoded_instr_dc_sb ), .is_control_flow_instr_o ( is_control_flow_instr ), diff --git a/src/if_stage.sv b/src/if_stage.sv index 9265aa21e..784afe09a 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -41,37 +41,19 @@ module if_stage ( input logic instr_ack_i, output exception ex_o ); - // output logic illegal_compressed_instr_o -> in exception - logic fetch_valid; logic prefetch_busy; - // --------------------- - // IF <-> ID Registers - // --------------------- - logic instr_valid_n, instr_valid_q; // Pre-fetch buffer, caches a fixed number of instructions prefetch_buffer prefetch_buffer_i ( .ready_i ( instr_ack_i ), - .valid_o ( fetch_valid ), + .valid_o ( fetch_entry_valid_i ), // Prefetch Buffer Status .busy_o ( prefetch_busy ), .* ); assign if_busy_o = prefetch_busy; - assign fetch_entry_valid_i = instr_valid_q; - - // Pipeline registers - always_comb begin - // Instruction is valid, latch new data - instr_valid_n = fetch_valid; - - if (flush_i) begin - instr_valid_n = 1'b0; - end - // TODO: exception forwarding in here - end // -------------------------------------------------------------- // IF-ID pipeline registers, frozen when the ID stage is stalled @@ -79,9 +61,7 @@ module if_stage ( always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS if (~rst_ni) begin ex_o <= '{default: 0}; - instr_valid_q <= 1'b0; end else begin - instr_valid_q <= instr_valid_n; ex_o.cause <= 64'b0; // TODO: Output exception ex_o.tval <= 64'b0; // TODO: Output exception ex_o.valid <= 1'b0; //illegal_compressed_instr; // TODO: Output exception diff --git a/tb/fetch_fifo_tb.sv b/tb/fetch_fifo_tb.sv index e917eef3c..e21b454d7 100755 --- a/tb/fetch_fifo_tb.sv +++ b/tb/fetch_fifo_tb.sv @@ -36,7 +36,7 @@ module fetch_fifo_tb; .in_rdata_i ( fetch_fifo_if.in_rdata ), .in_valid_i ( fetch_fifo_if.in_valid ), .in_ready_o ( fetch_fifo_if.in_ready ), - .fetch_entry_o ( fetch_fifo_if.fetch_entry ), + .fetch_entry_o ( fetch_fifo_if.fetch_entry ), .out_valid_o ( fetch_fifo_if.out_valid ), .out_ready_i ( fetch_fifo_if.out_ready ) ); From 1b799bb1c20ca9dd5bbae034c2fdbdfda095cdb3 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 15 May 2017 21:11:13 +0200 Subject: [PATCH 37/43] Unaligned branches working --- src/ariane.sv | 5 ----- src/fetch_fifo.sv | 21 ++++++++++----------- test/add_test.S | 1 - 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 1ec66de5b..324792d32 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -82,14 +82,12 @@ module ariane logic flush; logic fetch_enable; logic halt_if; - logic [63:0] pc_if; exception ex_commit; // exception from commit stage branchpredict resolved_branch; // -------------- // PCGEN <-> IF // -------------- logic [63:0] fetch_address_pcgen_if; - logic set_fetch_address_pcgen_if; branchpredict_sbe branch_predict_pcgen_if; logic if_ready_if_pcgen; logic fetch_valid_pcgen_if; @@ -104,7 +102,6 @@ module ariane // -------------- // IF <-> ID // -------------- - logic busy_if_id; fetch_entry fetch_entry_if_id; logic ready_id_if; logic fetch_valid_if_id; @@ -114,7 +111,6 @@ module ariane // ID <-> EX // -------------- logic [63:0] imm_id_ex; - logic ready_id_ex; logic [TRANS_ID_BITS-1:0] trans_id_id_ex; fu_op operator_id_ex; logic [63:0] operand_a_id_ex; @@ -183,7 +179,6 @@ module ariane logic flag_mxr_csr_ex; logic [37:0] pd_ppn_csr_ex; logic [0:0] asid_csr_ex; - logic flush_tlb_csr_ex; logic [11:0] csr_addr_ex_csr; // -------------- // COMMIT <-> CSR diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index c9d010a5c..d0d39b4a4 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -79,18 +79,11 @@ module fetch_fifo // Input Registers // ---------------- always_comb begin - // if we are not ready latch the values - in_addr_n = in_addr_q; - in_rdata_n = in_rdata_q; - in_valid_n = 1'b0; - branch_predict_n = branch_predict_q; // if we are ready to accept new data - do so! - if (in_ready_o) begin - in_addr_n = in_addr_i; - in_rdata_n = in_rdata_i; - in_valid_n = in_valid_i; - branch_predict_n = branch_predict_i; - end + in_addr_n = in_addr_i; + in_rdata_n = in_rdata_i; + in_valid_n = in_valid_i; + branch_predict_n = branch_predict_i; // flush the input registers if (flush_i) begin in_valid_n = 1'b0; @@ -222,6 +215,10 @@ module fetch_fifo unaligned_address_n = {in_addr_q[63:2], 2'b10}; // $display("Instruction: [ i0 | i1 ] @ %t", $time); // this does not consume space in the FIFO + // we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction + end else if (branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16) begin + // the next fetch will start from a 4 byte boundary again + unaligned_n = 1'b0; end end @@ -245,6 +242,8 @@ module fetch_fifo status_cnt_n = '0; write_pointer_n = 'b0; read_pointer_n = 'b0; + // clear the unaligned instruction + unaligned_n = 1'b0; end end diff --git a/test/add_test.S b/test/add_test.S index 1e26140ab..19d368e3f 100755 --- a/test/add_test.S +++ b/test/add_test.S @@ -14,7 +14,6 @@ csrr x1, mstatus nop L0: nop - nop nop nop jal L1 From 555260743e15a1280600d62d102675fcb02f05a0 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 16 May 2017 11:37:26 +0200 Subject: [PATCH 38/43] ALU cleanup remove branch logic --- src/alu.sv | 297 ++++++++++++++++++++---------------------------- src/ex_stage.sv | 8 -- src/pcgen.sv | 2 +- 3 files changed, 126 insertions(+), 181 deletions(-) diff --git a/src/alu.sv b/src/alu.sv index cdef04797..f4ee73fc6 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -16,213 +16,166 @@ import ariane_pkg::*; module alu ( - input fu_op operator_i, - input logic [63:0] operand_a_i, - input logic [63:0] operand_b_i, - - output logic [63:0] adder_result_o, - output logic [65:0] adder_result_ext_o, - - output logic [63:0] result_o, - output logic comparison_result_o, - output logic is_equal_result_o + input logic [TRANS_ID_BITS-1:0] trans_id_i, + input logic alu_valid_i, + input fu_op operator_i, + input logic [63:0] operand_a_i, + input logic [63:0] operand_b_i, + output logic [63:0] result_o, + output logic alu_valid_o, + output logic alu_ready_o, + output logic [TRANS_ID_BITS-1:0] alu_trans_id_o ); + // ALU is a single cycle instructions, hence it is always ready - logic [63:0] operand_a_rev; - logic [31:0] operand_a_rev32; - logic [64:0] operand_b_neg; + assign alu_ready_o = 1'b1; + assign alu_valid_o = alu_valid_i; + assign alu_trans_id_o = trans_id_i; - // bit reverse operand_a for left shifts and bit counting - generate - genvar k; - for(k = 0; k < 64; k++) - assign operand_a_rev[k] = operand_a_i[63-k]; + logic [63:0] operand_a_rev; + logic [31:0] operand_a_rev32; + logic [64:0] operand_b_neg; + logic [65:0] adder_result_ext_o; + // bit reverse operand_a for left shifts and bit counting + generate + genvar k; + for(k = 0; k < 64; k++) + assign operand_a_rev[k] = operand_a_i[63-k]; - for (k = 0; k < 32; k++) - assign operand_a_rev32[k] = operand_a_i[31-k]; - endgenerate + for (k = 0; k < 32; k++) + assign operand_a_rev32[k] = operand_a_i[31-k]; + endgenerate - // ------ - // Adder - // ------ - logic adder_op_b_negate; - logic [64:0] adder_in_a, adder_in_b; - logic [63:0] adder_result; + // ------ + // Adder + // ------ + logic adder_op_b_negate; + logic [64:0] adder_in_a, adder_in_b; + logic [63:0] adder_result; - always_comb - begin - adder_op_b_negate = 1'b0; + always_comb begin + adder_op_b_negate = 1'b0; - unique case (operator_i) - // ADDER OPS - SUB, SUBW, - // COMPARATOR OPs - EQ, NE, - GEU, LTU, - GES, LTS: adder_op_b_negate = 1'b1; + unique case (operator_i) + // ADDER OPS + SUB, SUBW: adder_op_b_negate = 1'b1; - default: ; - endcase - end + default: ; + endcase + end - // prepare operand a - assign adder_in_a = {operand_a_i, 1'b1}; + // prepare operand a + assign adder_in_a = {operand_a_i, 1'b1}; - // prepare operand b - assign operand_b_neg = {operand_b_i, 1'b0} ^ {65{adder_op_b_negate}}; - assign adder_in_b = operand_b_neg ; + // prepare operand b + assign operand_b_neg = {operand_b_i, 1'b0} ^ {65{adder_op_b_negate}}; + assign adder_in_b = operand_b_neg ; - // actual adder - assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b); + // actual adder + assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b); + assign adder_result = adder_result_ext_o[64:1]; - assign adder_result = adder_result_ext_o[64:1]; + // --------- + // Shifts + // --------- - assign adder_result_o = adder_result; + // TODO: this can probably optimized significantly + logic shift_left; // should we shift left + logic shift_arithmetic; - // --------- - // Shifts - // --------- + logic [63:0] shift_amt; // amount of shift, to the right + logic [63:0] shift_op_a; // input of the shifter + logic [31:0] shift_op_a32; // input to the 32 bit shift operation - // TODO: this can probably optimized significantly - logic shift_left; // should we shift left - logic shift_arithmetic; + logic [63:0] shift_result; + logic [31:0] shift_result32; - logic [63:0] shift_amt; // amount of shift, to the right - logic [63:0] shift_op_a; // input of the shifter - logic [31:0] shift_op_a32; // input to the 32 bit shift operation + logic [64:0] shift_right_result; + logic [32:0] shift_right_result32; - logic [63:0] shift_result; - logic [31:0] shift_result32; + logic [63:0] shift_left_result; + logic [31:0] shift_left_result32; - logic [64:0] shift_right_result; - logic [32:0] shift_right_result32; + assign shift_amt = operand_b_i; - logic [63:0] shift_left_result; - logic [31:0] shift_left_result32; + assign shift_left = (operator_i == SLL) | (operator_i == SLLW); - assign shift_amt = operand_b_i; + assign shift_arithmetic = (operator_i == SRA) | (operator_i == SRAW); - assign shift_left = (operator_i == SLL) | (operator_i == SLLW); + // right shifts, we let the synthesizer optimize this + logic [64:0] shift_op_a_64; + logic [32:0] shift_op_a_32; - assign shift_arithmetic = (operator_i == SRA) | (operator_i == SRAW); + // choose the bit reversed or the normal input for shift operand a + assign shift_op_a = shift_left ? operand_a_rev : operand_a_i; + assign shift_op_a32 = shift_left ? operand_a_rev32 : operand_a_i[31:0]; - // right shifts, we let the synthesizer optimize this - logic [64:0] shift_op_a_64; - logic [32:0] shift_op_a_32; + assign shift_op_a_64 = { shift_arithmetic & shift_op_a[63], shift_op_a}; + assign shift_op_a_32 = { shift_arithmetic & shift_op_a[31], shift_op_a32}; - // choose the bit reversed or the normal input for shift operand a - assign shift_op_a = shift_left ? operand_a_rev : operand_a_i; - assign shift_op_a32 = shift_left ? operand_a_rev32 : operand_a_i[31:0]; + assign shift_right_result = $signed(shift_op_a_64) >>> shift_amt[5:0]; - assign shift_op_a_64 = { shift_arithmetic & shift_op_a[63], shift_op_a}; - assign shift_op_a_32 = { shift_arithmetic & shift_op_a[31], shift_op_a32}; + assign shift_right_result32 = $signed(shift_op_a_32) >>> shift_amt[4:0]; + // bit reverse the shift_right_result for left shifts + genvar j; + generate + for(j = 0; j < 64; j++) + assign shift_left_result[j] = shift_right_result[63-j]; - assign shift_right_result = $signed(shift_op_a_64) >>> shift_amt[5:0]; + for(j = 0; j < 32; j++) + assign shift_left_result32[j] = shift_right_result32[31-j]; - assign shift_right_result32 = $signed(shift_op_a_32) >>> shift_amt[4:0]; - // bit reverse the shift_right_result for left shifts - genvar j; - generate - for(j = 0; j < 64; j++) - assign shift_left_result[j] = shift_right_result[63-j]; + endgenerate - for(j = 0; j < 32; j++) - assign shift_left_result32[j] = shift_right_result32[31-j]; + assign shift_result = shift_left ? shift_left_result : shift_right_result[63:0]; + assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0]; - endgenerate + // ------------ + // Comparisons + // ------------ + logic is_greater_equal; // handles both signed and unsigned forms + logic cmp_signed; - assign shift_result = shift_left ? shift_left_result : shift_right_result[63:0]; - assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0]; + always_comb begin + cmp_signed = 1'b0; -// ------------ -// Comparisons -// ------------ - logic is_equal; - logic is_greater_equal; // handles both signed and unsigned forms - logic cmp_signed; + if (operator_i == SLTS) + cmp_signed = 1'b1; + // Is greater equal + if ((operand_a_i[63] ^ operand_b_i[63]) == 0) + is_greater_equal = (adder_result[63] == 0); + else + is_greater_equal = operand_a_i[63] ^ (cmp_signed); + end - always_comb - begin - cmp_signed = 1'b0; + // ----------- + // Result MUX + // ----------- + always_comb begin + result_o = '0; - unique case (operator_i) - GES, - LTS, - SLTS: begin - cmp_signed = 1'b1; - end + unique case (operator_i) + // Standard Operations + ANDL: result_o = operand_a_i & operand_b_i; + ORL: result_o = operand_a_i | operand_b_i; + XORL: result_o = operand_a_i ^ operand_b_i; - default:; - endcase - end + // Adder Operations + ADD, SUB: result_o = adder_result; + // Add word: Ignore the upper bits and sign extend to 64 bit + ADDW, SUBW: result_o = {{32{adder_result[31]}}, adder_result[31:0]}; + // Shift Operations + SLL, + SRL, SRA: result_o = shift_result; + // Shifts 32 bit + SLLW, + SRLW, SRAW: result_o = {{32{shift_result32[31]}}, shift_result32[31:0]}; - assign is_equal = (adder_result == 64'b0); - assign is_equal_result_o = is_equal; + // Comparison Operations + SLTS, SLTU: result_o = {63'b0, (~is_greater_equal)}; - - // Is greater equal - always_comb - begin - if ((operand_a_i[63] ^ operand_b_i[63]) == 0) - is_greater_equal = (adder_result[63] == 0); - else - is_greater_equal = operand_a_i[63] ^ (cmp_signed); - end - - // generate comparison result - logic cmp_result; - - always_comb - begin - // this is used only for branches - // as we are also using it for unconditional branches - // set it to 1 as a default - cmp_result = 1'b1; - - unique case (operator_i) - EQ: cmp_result = is_equal; - NE: cmp_result = (~is_equal); - GES, GEU: cmp_result = is_greater_equal; - LTS, SLTS, - LTU, SLTU: cmp_result = (~is_greater_equal); - default: ; - endcase - end - - assign comparison_result_o = cmp_result; - - // ----------- - // Result MUX - // ----------- - always_comb - begin - result_o = '0; - - unique case (operator_i) - // Standard Operations - ANDL: result_o = operand_a_i & operand_b_i; - ORL: result_o = operand_a_i | operand_b_i; - XORL: result_o = operand_a_i ^ operand_b_i; - - // Adder Operations - ADD, SUB: result_o = adder_result; - // Add word: Ignore the upper bits and sign extend to 64 bit - ADDW, SUBW: result_o = {{32{adder_result[31]}}, adder_result[31:0]}; - // Shift Operations - SLL, - SRL, SRA: result_o = shift_result; - // Shifts 32 bit - SLLW, - SRLW, SRAW: result_o = {{32{shift_result32[31]}}, shift_result32[31:0]}; - - // Comparison Operations - EQ, NE, - GEU, LTU, - LTS, GES, - SLTS, SLTU: result_o = {63'b0, cmp_result}; - - default: ; // default case to suppress unique warning - endcase - end + default: ; // default case to suppress unique warning + endcase + end endmodule diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 87e8b49df..5fad63dbc 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -98,19 +98,11 @@ module ex_stage #( input logic mult_valid_i // Output is valid ); - // ALU is a single cycle instructions, hence it is always ready - assign alu_ready_o = 1'b1; - assign alu_valid_o = alu_valid_i; - assign alu_trans_id_o = trans_id_i; // ----- // ALU // ----- alu alu_i ( - .adder_result_o ( ), - .adder_result_ext_o ( ), .result_o ( alu_result_o ), - .is_equal_result_o ( ), - .comparison_result_o ( ), .* ); diff --git a/src/pcgen.sv b/src/pcgen.sv index d20d8c4a2..aad1b99cf 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -45,7 +45,7 @@ module pcgen ( assign fetch_address_o = npc_q; btb #( - .NR_ENTRIES(1024), + .NR_ENTRIES(64), .BITS_SATURATION_COUNTER(2) ) btb_i From e629c6fbdd59bba8c9a07fbdeae65c18c8ce58c9 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 16 May 2017 11:40:53 +0200 Subject: [PATCH 39/43] Remove ALU signals from TB --- tb/alu_tb.sv | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tb/alu_tb.sv b/tb/alu_tb.sv index de3e388a4..55fdfacdf 100644 --- a/tb/alu_tb.sv +++ b/tb/alu_tb.sv @@ -26,14 +26,15 @@ module alu_tb; alu dut ( + .trans_id_i ( ), + .alu_valid_i ( ), .operator_i ( fu_op'(alu_if.operator) ), .operand_a_i ( alu_if.operand_a ), .operand_b_i ( alu_if.operand_b ), .result_o ( alu_if.result ), - .comparison_result_o ( alu_if.comparison_result ), - .is_equal_result_o ( ), - .adder_result_ext_o ( ), - .adder_result_o ( ) + .alu_valid_o ( ), + .alu_ready_o ( ), + .alu_trans_id_o ( ) ); initial begin From bbe39b8ba2ef612f81220e0c9b6d93ed5eb260d4 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 16 May 2017 12:35:30 +0200 Subject: [PATCH 40/43] Only resolve branch if it was indeed a branch inst --- src/ariane.sv | 6 ++++-- src/branch_engine.sv | 14 ++++++++++---- src/ex_stage.sv | 23 ++++++++++++----------- src/id_stage.sv | 13 +++++++++---- 4 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 324792d32..ba435b821 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -128,6 +128,7 @@ module ariane // Branches and Jumps logic branch_valid_id_ex; branchpredict_sbe branch_predict_id_ex; + logic resolve_branch_ex_id; // LSU logic [TRANS_ID_BITS-1:0] lsu_trans_id_ex_id; logic lsu_valid_id_ex; @@ -277,7 +278,7 @@ module ariane // Branches and Jumps .branch_valid_o ( branch_valid_id_ex ), // branch is valid .branch_predict_o ( branch_predict_id_ex ), // branch predict to ex - .resolved_branch_i ( resolved_branch ), // in order to resolve the branch + .resolve_branch_i ( resolve_branch_ex_id ), // in order to resolve the branch // LSU .lsu_ready_i ( lsu_ready_ex_id ), .lsu_valid_o ( lsu_valid_id_ex ), @@ -323,8 +324,9 @@ module ariane .alu_exception_o ( alu_exception_ex_id ), // Branches and Jumps .branch_valid_i ( branch_valid_id_ex ), - .branch_predict_i ( branch_predict_id_ex ), // branch predict to ex + .branch_predict_i ( branch_predict_id_ex ), // branch predict to ex .resolved_branch_o ( resolved_branch ), + .resolve_branch_o ( resolve_branch_ex_id ), // LSU .lsu_ready_o ( lsu_ready_ex_id ), .lsu_valid_i ( lsu_valid_id_ex ), diff --git a/src/branch_engine.sv b/src/branch_engine.sv index a896b2b66..006bd2949 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -32,6 +32,8 @@ module branch_engine ( input branchpredict_sbe branch_predict_i, // this is the address we predicted output branchpredict resolved_branch_o, // this is the actual address we are targeting + output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can + // accept new entries to the scoreboard output exception branch_ex_o // branch exception out ); logic [63:0] target_address; @@ -67,6 +69,7 @@ module branch_engine ( resolved_branch_o.valid = valid_i; resolved_branch_o.is_mispredict = 1'b0; resolved_branch_o.is_lower_16 = 1'b0; + resolved_branch_o = 1'b0; // calculate next PC, depending on whether the instruction is compressed or not this may be different next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); // calculate target address simple 64 bit addition @@ -94,14 +97,17 @@ module branch_engine ( // we mis-predicted e.g.: the predicted address is unequal to the actual address if (target_address[0] == 1'b0) begin // TODO in case of branch which is not taken it is not necessary to check for the address - if ( target_address != branch_predict_i.predict_address // we mis-predicted the address of the branch - || branch_predict_i.predict_taken != comparison_result // we mis-predicted the outcome of the branch - || branch_predict_i.valid == 1'b0 // this means branch-prediction thought it was no branch but in reality it was one + if (target_address != branch_predict_i.predict_address // we mis-predicted the address of the branch + || branch_predict_i.predict_taken != comparison_result // we mis-predicted the outcome of the branch + || branch_predict_i.valid == 1'b0 // this means branch-prediction thought it was no + // branch but in reality it was one ) begin resolved_branch_o.is_mispredict = 1'b1; end end - // the other case would be that this instruction was no branch but branchprediction thought that it was one + // to resolve the branch in ID -> only do this if this was indeed a branch (hence vald_i is asserted) + resolved_branch_o = 1'b1; + // the other case would be that this instruction was no branch but branch prediction thought that it was one // this is essentially also a mis-predict end else if (fu_valid_i && branch_predict_i.valid) begin // re-set the branch to the next PC diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 5fad63dbc..d65454fda 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -32,24 +32,25 @@ module ex_stage #( input logic [63:0] operand_c_i, input logic [63:0] imm_i, input logic [TRANS_ID_BITS-1:0] trans_id_i, - input logic [63:0] pc_i, // PC of current instruction + input logic [63:0] pc_i, // PC of current instruction input logic is_compressed_instr_i, // we need to know if this was a compressed instruction // in order to calculate the next PC on a mis-predict // ALU 1 - output logic alu_ready_o, // FU is ready - input logic alu_valid_i, // Output is valid - output logic alu_valid_o, // ALU result is valid + output logic alu_ready_o, // FU is ready + input logic alu_valid_i, // Output is valid + output logic alu_valid_o, // ALU result is valid output logic [63:0] alu_result_o, - output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back + output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back output exception alu_exception_o, // Branches and Jumps - input logic branch_valid_i, // we are using the branch unit - input branchpredict_sbe branch_predict_i, // branch prediction in - output branchpredict resolved_branch_o, // the branch engine uses the write back from the ALU + input logic branch_valid_i, // we are using the branch unit + input branchpredict_sbe branch_predict_i, // branch prediction in + output branchpredict resolved_branch_o, // the branch engine uses the write back from the ALU + output logic resolve_branch_o, // to ID signaling that we resolved the branch // LSU - output logic lsu_ready_o, // FU is ready - input logic lsu_valid_i, // Input is valid - output logic lsu_valid_o, // Output is valid + output logic lsu_ready_o, // FU is ready + input logic lsu_valid_i, // Input is valid + output logic lsu_valid_o, // Output is valid output logic [63:0] lsu_result_o, output logic [TRANS_ID_BITS-1:0] lsu_trans_id_o, input logic lsu_commit_i, diff --git a/src/id_stage.sv b/src/id_stage.sv index 0a649cdcc..f5ba700e0 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -50,7 +50,7 @@ module id_stage #( output logic alu_valid_o, output logic branch_valid_o, // use branch prediction unit // ex just resolved our predicted branch, we are ready to accept new requests - input branchpredict resolved_branch_i, + input logic resolve_branch_i, input logic lsu_ready_i, output logic lsu_valid_o, @@ -116,17 +116,22 @@ module id_stage #( always_comb begin : unresolved_branch unresolved_branch_n = unresolved_branch_q; // we just resolved the branch - if (resolved_branch_i.valid) begin + if (resolve_branch_i) begin unresolved_branch_n = 1'b0; end // if the instruction is valid and it is a control flow instruction - if (fetch_entry_valid_i && is_control_flow_instr && ~flush_unissued_instr_i) begin + if (fetch_entry_valid_i && is_control_flow_instr) begin unresolved_branch_n = 1'b1; end + // if we are requested to flush also flush the unresolved branch flag because either the flush + // was requested by a branch or an exception. In any case: any unresolved branch will get evicted + if (flush_unissued_instr_i || flush_i) begin + unresolved_branch_n = 1'b0; + end end // we are ready if we are not full and don't have any unresolved branches, but it can be // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i.valid == 1) - assign ready_o = ~full && (~unresolved_branch_q || resolved_branch_i.valid); + assign ready_o = ~full && (~unresolved_branch_q || resolve_branch_i); decoder decoder_i ( .pc_i ( fetch_entry_i.address ), From 2e1047dc02837cbaf620723a2a11a4ce4ebb98f8 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 16 May 2017 15:19:42 +0200 Subject: [PATCH 41/43] :bug: Fixes in instruction aligner --- src/branch_engine.sv | 4 +- src/commit_stage.sv | 25 +++++++- src/decoder.sv | 4 +- src/fetch_fifo.sv | 142 ++++++++++++++++++++++++----------------- src/prefetch_buffer.sv | 22 +++---- test/add_test.S | 11 +--- 6 files changed, 126 insertions(+), 82 deletions(-) diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 006bd2949..fcf7467f9 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -69,7 +69,7 @@ module branch_engine ( resolved_branch_o.valid = valid_i; resolved_branch_o.is_mispredict = 1'b0; resolved_branch_o.is_lower_16 = 1'b0; - resolved_branch_o = 1'b0; + resolve_branch_o = 1'b0; // calculate next PC, depending on whether the instruction is compressed or not this may be different next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); // calculate target address simple 64 bit addition @@ -106,7 +106,7 @@ module branch_engine ( end end // to resolve the branch in ID -> only do this if this was indeed a branch (hence vald_i is asserted) - resolved_branch_o = 1'b1; + resolve_branch_o = 1'b1; // the other case would be that this instruction was no branch but branch prediction thought that it was one // this is essentially also a mis-predict end else if (fu_valid_i && branch_predict_i.valid) begin diff --git a/src/commit_stage.sv b/src/commit_stage.sv index 1ddd18703..420cca5ed 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -115,4 +115,27 @@ module commit_stage ( end end -endmodule \ No newline at end of file + `ifndef SYNTHESIS + always_ff @(posedge clk_i) begin : exception_displayer + string cause; + // we encountered an exception + // format cause + if (exception_o.valid) begin + case (exception_o.cause) + INSTR_ADDR_MISALIGNED: cause = "Instruction Address Misaligned"; + INSTR_ACCESS_FAULT: cause = "Instruction Access Fault"; + ILLEGAL_INSTR: cause = "Illegal Instruction"; + BREAKPOINT: cause = "Breakpoint"; + LD_ADDR_MISALIGNED: cause = "Load Address Misaligned"; + LD_ACCESS_FAULT: cause = "Load Access Fault"; + ST_ADDR_MISALIGNED: cause = "Store Address Misaligned"; + ST_ACCESS_FAULT: cause = "Store Access Fault"; + ENV_CALL_UMODE: cause = "Environment Call UMode"; + ENV_CALL_SMODE: cause = "Environment Call SMode"; + ENV_CALL_MMODE: cause = "Environment Call MMode"; + endcase + $display("Exception @%t, PC: %0h, TVal: %0h, Cause: %s", $time, commit_instr_i.pc, exception_o.tval, cause); + end + end + `endif +endmodule diff --git a/src/decoder.sv b/src/decoder.sv index 7523c13c7..920d52411 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -404,8 +404,8 @@ module decoder ( // Exception handling // -------------------------------- always_comb begin : exception_handling - instruction_o.ex = ex_i; - instruction_o.valid = 1'b0; + instruction_o.ex = ex_i; + instruction_o.valid = 1'b0; // look if we didn't already get an exception in any previous // stage - we should not overwrite it as we retain order regarding the exception if (~ex_i.valid && illegal_instr) begin diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index d0d39b4a4..232f050e5 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -122,33 +122,89 @@ module fetch_fifo // --------------------------------- // Input port & Instruction Aligner // --------------------------------- - if (in_valid_q && !unaligned_q) begin - // we got a valid instruction so we can satisfy the unaligned instruction - unaligned_n = 1'b0; - // check if the instruction is compressed - if (in_rdata_q[1:0] != 2'b11) begin - // it is compressed + // do we actually want the first instruction or was the address a half word access? + if (in_valid_q && in_addr_q[1] == 1'b0) begin + if (!unaligned_q) begin + // we got a valid instruction so we can satisfy the unaligned instruction + unaligned_n = 1'b0; + // check if the instruction is compressed + if (in_rdata_q[1:0] != 2'b11) begin + // it is compressed + mem_n[write_pointer_q] = { + branch_predict_q, in_addr_q, decompressed_instruction[0], 1'b1, is_illegal[0] + }; + + status_cnt++; + write_pointer++; + + // is the second instruction also compressed, like: + // _____________________________________________ + // | compressed 2 [31:16] | compressed 1[15:0] | + // |____________________________________________ + // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction + // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction + if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin + + mem_n[write_pointer_q + 1'b1] = { + branch_predict_q, {in_addr_q[63:2], 2'b10}, decompressed_instruction[1], 1'b1, is_illegal[1] + }; + + status_cnt++; + write_pointer++; + // $display("Instruction: [ c | c ] @ %t", $time); + // or is it an unaligned 32 bit instruction like + // ____________________________________________________ + // |instr [15:0] | instr [31:16] | compressed 1[15:0] | + // |____________________________________________________ + end else if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin + // save the lower 16 bit + unaligned_instr_n = in_rdata_q[31:16]; + // and that it was unaligned + unaligned_n = 1'b1; + // save the address as well + unaligned_address_n = {in_addr_q[63:2], 2'b10}; + // $display("Instruction: [ i0 | c ] @ %t", $time); + // this does not consume space in the FIFO + end + end else begin + // this is a full 32 bit instruction like + // _______________________ + // | instruction [31:0] | + // |______________________ + mem_n[write_pointer_q] = { + branch_predict_q, in_addr_q, in_rdata_q, 1'b0, 1'b0 + }; + status_cnt++; + write_pointer++; + // $display("Instruction: [ i ] @ %t", $time); + end + end + // we have an outstanding unaligned instruction + if (in_valid_q && unaligned_q) begin + mem_n[write_pointer_q] = { - branch_predict_q, in_addr_q, decompressed_instruction[0], 1'b1, is_illegal[0] + branch_predict_q, unaligned_address_q, {in_rdata_q[15:0], unaligned_instr_q}, 1'b0, 1'b0 }; status_cnt++; write_pointer++; + // whats up with the other upper 16 bit of this instruction // is the second instruction also compressed, like: // _____________________________________________ - // | compressed 2 [31:16] | compressed 1[15:0] | + // | compressed 2 [31:16] | unaligned[31:16] | // |____________________________________________ // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction - if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - - mem_n[write_pointer_q + 1'b1] = { + if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin + mem_n[write_pointer_q + 1'b1] = { branch_predict_q, {in_addr_q[63:2], 2'b10}, decompressed_instruction[1], 1'b1, is_illegal[1] }; status_cnt++; write_pointer++; - // $display("Instruction: [ c | c ] @ %t", $time); + // unaligned access served + unaligned_n = 1'b0; + // $display("Instruction: [ c | i1 ] @ %t", $time); // or is it an unaligned 32 bit instruction like // ____________________________________________________ // |instr [15:0] | instr [31:16] | compressed 1[15:0] | @@ -160,66 +216,36 @@ module fetch_fifo unaligned_n = 1'b1; // save the address as well unaligned_address_n = {in_addr_q[63:2], 2'b10}; - // $display("Instruction: [ i0 | c ] @ %t", $time); + // $display("Instruction: [ i0 | i1 ] @ %t", $time); // this does not consume space in the FIFO + // we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction + end else if (branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16) begin + // the next fetch will start from a 4 byte boundary again + unaligned_n = 1'b0; end - end else begin - // this is a full 32 bit instruction like - // _______________________ - // | instruction [31:0] | - // |______________________ - mem_n[write_pointer_q] = { - branch_predict_q, in_addr_q, in_rdata_q, 1'b0, 1'b0 - }; - status_cnt++; - write_pointer++; - // $display("Instruction: [ i ] @ %t", $time); end - end - // we have an outstanding unaligned instruction - if (in_valid_q && unaligned_q) begin - - mem_n[write_pointer_q] = { - branch_predict_q, unaligned_address_q, {in_rdata_q[15:0], unaligned_instr_q}, 1'b0, 1'b0 - }; - - status_cnt++; - write_pointer++; - // whats up with the other upper 16 bit of this instruction - // is the second instruction also compressed, like: - // _____________________________________________ - // | compressed 2 [31:16] | unaligned[31:16] | - // |____________________________________________ - // check if the lower compressed instruction was no branch otherwise we will need to squash this instruction - // but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction - if (in_rdata_q[17:16] != 2'b11 && !(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - mem_n[write_pointer_q + 1'b1] = { - branch_predict_q, {in_addr_q[63:2], 2'b10}, decompressed_instruction[1], 1'b1, is_illegal[1] + end else if (in_valid_q && in_addr_q[1] == 1'b1) begin // address was a half word access + // reset the unaligned flag as this is a completely new fetch (because consecutive fetches only happen on a word basis) + unaligned_n = 1'b0; + // this is a compressed instruction + if (in_rdata_q[17:16] != 2'b11) begin + // it is compressed + mem_n[write_pointer_q] = { + branch_predict_q, in_addr_q, decompressed_instruction[1], 1'b1, is_illegal[1] }; status_cnt++; write_pointer++; - // unaligned access served - unaligned_n = 1'b0; - // $display("Instruction: [ c | i1 ] @ %t", $time); - // or is it an unaligned 32 bit instruction like - // ____________________________________________________ - // |instr [15:0] | instr [31:16] | compressed 1[15:0] | - // |____________________________________________________ - end else if (!(branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16)) begin - // save the lower 16 bit + end else begin // this is the first part of a 32 bit unaligned instruction + // save the lower 16 bit unaligned_instr_n = in_rdata_q[31:16]; // and that it was unaligned unaligned_n = 1'b1; // save the address as well unaligned_address_n = {in_addr_q[63:2], 2'b10}; - // $display("Instruction: [ i0 | i1 ] @ %t", $time); // this does not consume space in the FIFO - // we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction - end else if (branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16) begin - // the next fetch will start from a 4 byte boundary again - unaligned_n = 1'b0; end + // there can never be a whole 32 bit instruction on a half word access end // ------------- diff --git a/src/prefetch_buffer.sv b/src/prefetch_buffer.sv index afd905d35..75b92f0fd 100644 --- a/src/prefetch_buffer.sv +++ b/src/prefetch_buffer.sv @@ -51,6 +51,7 @@ module prefetch_buffer enum logic [1:0] {IDLE, WAIT_GNT, WAIT_RVALID, WAIT_ABORTED } CS, NS; + logic [63:0] fetch_address; logic addr_valid; logic [63:0] instr_addr_q; logic fifo_valid; @@ -63,7 +64,7 @@ module prefetch_buffer // we are busy if we are either waiting for a grant // or if the fifo is full assign busy_o = (CS inside {WAIT_GNT, WAIT_ABORTED} && !instr_req_o) || !fifo_ready; - + assign fetch_address = {fetch_address_i[63:2], 2'b0}; //--------------------------------- // Fetch FIFO // consumes addresses and rdata @@ -84,18 +85,16 @@ module prefetch_buffer // Instruction fetch FSM // deals with instruction memory / instruction cache //-------------------------------------------------- - - always_comb - begin + always_comb begin instr_req_o = 1'b0; - instr_addr_o = fetch_address_i; + instr_addr_o = fetch_address; fifo_valid = 1'b0; NS = CS; unique case(CS) // default state, not waiting for requested data IDLE: begin - instr_addr_o = fetch_address_i; + instr_addr_o = fetch_address; instr_req_o = 1'b0; // make a new request @@ -118,7 +117,7 @@ module prefetch_buffer // we sent a request but did not yet get a grant WAIT_GNT: begin - instr_addr_o = instr_addr_q; + instr_addr_o = {instr_addr_q[63:2], 2'b0}; instr_req_o = 1'b1; if(instr_gnt_i) @@ -133,7 +132,7 @@ module prefetch_buffer // we wait for rvalid, after that we are ready to serve a new request WAIT_RVALID: begin - instr_addr_o = fetch_address_i; + instr_addr_o = fetch_address; // prepare for next request if (fifo_ready && fetch_valid_i) begin // wait for the valid signal @@ -172,9 +171,10 @@ module prefetch_buffer // our last request was aborted, but we didn't yet get a rvalid and // there was no new request sent yet we assume that req_i is set to high WAIT_ABORTED: begin - instr_addr_o = fetch_address_i; - + instr_addr_o = {fetch_address_i[63:2], 2'b0}; if (instr_rvalid_i) begin + // we are aborting this instruction so don't tell the FIFO it is valid + fifo_valid = 1'b0; instr_req_o = 1'b1; if (instr_gnt_i) begin @@ -209,7 +209,7 @@ module prefetch_buffer end else begin CS <= NS; if (addr_valid) begin - instr_addr_q <= instr_addr_o; + instr_addr_q <= fetch_address_i; branchpredict_q <= branch_predict_i; end end diff --git a/test/add_test.S b/test/add_test.S index 19d368e3f..e72d31922 100755 --- a/test/add_test.S +++ b/test/add_test.S @@ -13,17 +13,12 @@ add x9, x7, x8 csrr x1, mstatus nop -L0: nop - nop - nop - jal L1 - nop - nop -L1: nop +L0: jal x0, L1 nop nop nop - jal L0 + nop +L1: jal x0, L0 nop nop addi x1, x0, 55 \ No newline at end of file From d601cef6c67d800e6e5409fd60fc608fb9e7318e Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 16 May 2017 15:21:43 +0200 Subject: [PATCH 42/43] Retrigger CI build From cc421d8fa56cee98d7f4577bf1432d2a22360f6b Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 16 May 2017 18:34:30 +0200 Subject: [PATCH 43/43] Add very preliminary instruction tracer --- Makefile | 8 +- src/ariane.sv | 28 +++ src/util/instruction_trace_item.svh | 311 ++++++++++++++++++++++++ src/util/instruction_tracer.svh | 78 ++++++ src/util/instruction_tracer_defines.svh | 73 ++++++ src/util/instruction_tracer_if.sv | 48 ++++ src/util/instruction_tracer_pkg.sv | 25 ++ 7 files changed, 569 insertions(+), 2 deletions(-) create mode 100755 src/util/instruction_trace_item.svh create mode 100755 src/util/instruction_tracer.svh create mode 100755 src/util/instruction_tracer_defines.svh create mode 100755 src/util/instruction_tracer_if.sv create mode 100755 src/util/instruction_tracer_pkg.sv diff --git a/Makefile b/Makefile index 8af73bebc..9c14cf42b 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,9 @@ library = work # Top level module to compile top_level = core_tb test_top_level = core_tb + +# utility modules +util = $(wildcard src/util/*.sv) # test targets tests = alu scoreboard fifo mem_arbiter store_queue lsu core fetch_fifo # UVM agents @@ -21,9 +24,9 @@ sequences = $(wildcard tb/sequences/*/*.sv) test_pkg = $(wildcard tb/test/*/*sequence_pkg.sv) $(wildcard tb/test/*/*_pkg.sv) # this list contains the standalone components -src = $(wildcard src/util/*.sv) $(wildcard src/*.sv) +src = $(wildcard src/*.sv) # look for testbenches -tbs = $(wildcard tb/*_tb.sv) +tbs = $(wildcard tb/*_tb.sv) # Search here for include files (e.g.: non-standalone components) incdir = ./includes @@ -46,6 +49,7 @@ $(library): # Build the TB and module using QuestaSim build: $(library) build-agents build-interfaces # Suppress message that always_latch may not be checked thoroughly by QuestaSim. + vlog${questa_version} ${compile_flag} -incr ${util} ${list_incdir} -suppress 2583 # Compile agents, interfaces and environments vlog${questa_version} ${compile_flag} -incr ${envs} ${sequences} ${test_pkg} ${list_incdir} -suppress 2583 # Compile source files diff --git a/src/ariane.sv b/src/ariane.sv index ba435b821..6c5a115a5 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -18,6 +18,9 @@ // University of Bologna. // import ariane_pkg::*; +`ifndef SYNTHESIS +import instruction_tracer_pkg::*; +`endif module ariane #( @@ -428,7 +431,32 @@ module ariane .resolved_branch_i ( resolved_branch ), .* ); + // ------------------- + // Instruction Tracer + // ------------------- + `ifndef SYNTHESIS + instruction_tracer_if tracer_if (clk_i); + // assign instruction tracer interface + assign tracer_if.rstn = rst_ni; + assign tracer_if.commit_instr = commit_instr_id_commit; + assign tracer_if.commit_ack = commit_ack_commit_id; + assign tracer_if.fetch = fetch_entry_if_id; + assign tracer_if.fetch_valid = fetch_valid_if_id; + assign tracer_if.fetch_ack = decode_ack_id_if; + assign tracer_if.waddr = waddr_a_commit_id; + assign tracer_if.wdata = wdata_a_commit_id; + assign tracer_if.we = we_a_commit_id; + program instr_tracer (instruction_tracer_if tracer_if); + instruction_tracer it = new (tracer_if); + + initial begin + it.trace(); + end + endprogram + + instr_tracer instr_tracer_i (tracer_if); + `endif always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh new file mode 100755 index 000000000..c28bb5188 --- /dev/null +++ b/src/util/instruction_trace_item.svh @@ -0,0 +1,311 @@ +class instruction_trace_item; + time simtime; + integer cycles; + logic [31:0] pc; + logic [31:0] instr; + string str; + + function new (); + + endfunction + + function string regAddrToStr(logic [5:0] addr); + return $sformatf("x%0d", addr); + endfunction + + function string printInstr(logic [63:0] instr); + string s; + + casex (instr) + // Aliases + 32'h00_00_00_13: s = this.printMnemonic("nop"); + // Regular opcodes + INSTR_LUI: s = this.printUInstr("lui"); + INSTR_AUIPC: s = this.printUInstr("auipc"); + INSTR_JAL: s = this.printUJInstr("jal"); + INSTR_JALR: s = this.printIInstr("jalr"); + // BRANCH + INSTR_BEQ: s = this.printSBInstr("beq"); + INSTR_BNE: s = this.printSBInstr("bne"); + INSTR_BLT: s = this.printSBInstr("blt"); + INSTR_BGE: s = this.printSBInstr("bge"); + INSTR_BLTU: s = this.printSBInstr("bltu"); + INSTR_BGEU: s = this.printSBInstr("bgeu"); + // OPIMM + INSTR_ADDI: s = this.printIInstr("addi"); + INSTR_SLTI: s = this.printIInstr("slti"); + INSTR_SLTIU: s = this.printIInstr("sltiu"); + INSTR_XORI: s = this.printIInstr("xori"); + INSTR_ORI: s = this.printIInstr("ori"); + INSTR_ANDI: s = this.printIInstr("andi"); + INSTR_SLLI: s = this.printIuInstr("slli"); + INSTR_SRLI: s = this.printIuInstr("srli"); + INSTR_SRAI: s = this.printIuInstr("srai"); + // OP + INSTR_ADD: s = this.printRInstr("add"); + INSTR_SUB: s = this.printRInstr("sub"); + INSTR_SLL: s = this.printRInstr("sll"); + INSTR_SLT: s = this.printRInstr("slt"); + INSTR_SLTU: s = this.printRInstr("sltu"); + INSTR_XOR: s = this.printRInstr("xor"); + INSTR_SRL: s = this.printRInstr("srl"); + INSTR_SRA: s = this.printRInstr("sra"); + INSTR_OR: s = this.printRInstr("or"); + INSTR_AND: s = this.printRInstr("and"); + // FENCE + INSTR_FENCE: s = this.printMnemonic("fence"); + INSTR_FENCEI: s = this.printMnemonic("fencei"); + // SYSTEM (CSR man ipulation) + INSTR_CSRRW: s = this.printCSRInstr("csrrw"); + INSTR_CSRRS: s = this.printCSRInstr("csrrs"); + INSTR_CSRRC: s = this.printCSRInstr("csrrc"); + INSTR_CSRRWI: s = this.printCSRInstr("csrrwi"); + INSTR_CSRRSI: s = this.printCSRInstr("csrrsi"); + INSTR_CSRRCI: s = this.printCSRInstr("csrrci"); + // SYSTEM (others) + INSTR_ECALL: s = this.printMnemonic("ecall"); + INSTR_EBREAK: s = this.printMnemonic("ebreak"); + INSTR_ERET: s = this.printMnemonic("eret"); + INSTR_WFI: s = this.printMnemonic("wfi"); + // opcodes with custom decoding + {25'b?, OPCODE_LOAD}: s = this.printLoadInstr(); + {25'b?, OPCODE_STORE}: s = this.printStoreInstr(); + default: s = this.printMnemonic("INVALID"); + endcase + + return s; + // $fwrite(f, "%t %15d %h %h %-36s", simtime, + // cycles, + // pc, + // instr, + // str); + + // foreach(regs_write[i]) begin + // if (regs_write[i].addr != 0) + // $fwrite(f, " %s=%08x", regAddrToStr(regs_write[i].addr), regs_write[i].value); + // end + + // foreach(regs_read[i]) begin + // if (regs_read[i].addr != 0) + // $fwrite(f, " %s:%08x", regAddrToStr(regs_read[i].addr), regs_read[i].value); + // end + + // if (mem_access.size() > 0) begin + // mem_acc = mem_access.pop_front(); + + // $fwrite(f, " PA:%08x", mem_acc.addr); + // end + + // $fwrite(f, "\n"); + endfunction + + function string printMnemonic(input string mnemonic); + return mnemonic; + endfunction // printMnemonic + + function string printRInstr(input string mnemonic); + // return $sformatf("%-16s x%0d, x%0d, x%0d", mnemonic, rd, rs1, rs2); + return mnemonic; + endfunction // printRInstr + + function string printIInstr(input string mnemonic); + // begin + // regs_read.push_back('{rs1, rs1_value}); + // regs_write.push_back('{rd, 'x}); + // str = $sformatf("%-16s x%0d, x%0d, %0d", mnemonic, rd, rs1, $signed(imm_i_type)); + // end + return mnemonic; + endfunction // printIInstr + + function string printIuInstr(input string mnemonic); + // begin + // regs_read.push_back('{rs1, rs1_value}); + // regs_write.push_back('{rd, 'x}); + // str = $sformatf("%-16s x%0d, x%0d, 0x%0x", mnemonic, rd, rs1, imm_i_type); + // end + return mnemonic; + endfunction // printIuInstr + + function string printSBInstr(input string mnemonic); + // begin + // regs_read.push_back('{rs1, rs1_value}); + // regs_write.push_back('{rd, 'x}); + // str = $sformatf("%-16s x%0d, x%0d, 0x%0x", mnemonic, rd, rs1, imm_i_type); + // end + return mnemonic; + endfunction // printIuInstr + + function string printUInstr(input string mnemonic); + // begin + // regs_write.push_back('{rd, 'x}); + // str = $sformatf("%-16s x%0d, 0x%0h", mnemonic, rd, {imm_u_type[31:12], 12'h000}); + // end + return mnemonic; + endfunction // printUInstr + + function string printUJInstr(input string mnemonic); + // begin + // regs_write.push_back('{rd, 'x}); + // str = $sformatf("%-16s x%0d, %0d", mnemonic, rd, $signed(imm_uj_type)); + // end + return mnemonic; + endfunction // printUJInstr + + function string printCSRInstr(input string mnemonic); + // logic [11:0] csr; + // begin + // csr = instr[31:20]; + + // regs_write.push_back('{rd, 'x}); + + // if (instr[14] == 1'b0) begin + // regs_read.push_back('{rs1, rs1_value}); + // str = $sformatf("%-16s x%0d, x%0d, 0x%h", mnemonic, rd, rs1, csr); + // end else begin + // str = $sformatf("%-16s x%0d, 0x%h, 0x%h", mnemonic, rd, imm_z_type, csr); + // end + // end + return mnemonic; + endfunction // printCSRInstr + + function string printLoadInstr(); + // string mnemonic; + // logic [2:0] size; + // begin + // // detect reg-reg load and find size + // size = instr[14:12]; + // if (instr[14:12] == 3'b111) + // size = instr[30:28]; + + // case (size) + // 3'b000: mnemonic = "lb"; + // 3'b001: mnemonic = "lh"; + // 3'b010: mnemonic = "lw"; + // 3'b100: mnemonic = "lbu"; + // 3'b101: mnemonic = "lhu"; + // 3'b110: mnemonic = "p.elw"; + // 3'b011, + // 3'b111: begin + // printMnemonic("INVALID"); + // return; + // end + // endcase + + // regs_write.push_back('{rd, 'x}); + + // if (instr[14:12] != 3'b111) begin + // // regular load + // if (instr[6:0] != OPCODE_LOAD_POST) begin + // regs_read.push_back('{rs1, rs1_value}); + // str = $sformatf("%-16s x%0d, %0d(x%0d)", mnemonic, rd, $signed(imm_i_type), rs1); + // end else begin + // regs_read.push_back('{rs1, rs1_value}); + // regs_write.push_back('{rs1, 'x}); + // str = $sformatf("p.%-14s x%0d, %0d(x%0d!)", mnemonic, rd, $signed(imm_i_type), rs1); + // end + // end else begin + // // reg-reg load + // if (instr[6:0] != OPCODE_LOAD_POST) begin + // regs_read.push_back('{rs2, rs2_value}); + // regs_read.push_back('{rs1, rs1_value}); + // str = $sformatf("%-16s x%0d, x%0d(x%0d)", mnemonic, rd, rs2, rs1); + // end else begin + // regs_read.push_back('{rs2, rs2_value}); + // regs_read.push_back('{rs1, rs1_value}); + // regs_write.push_back('{rs1, 'x}); + // str = $sformatf("p.%-14s x%0d, x%0d(x%0d!)", mnemonic, rd, rs2, rs1); + // end + // end + // end + return ""; + endfunction + + function string printStoreInstr(); + // string mnemonic; + // begin + + // case (instr[13:12]) + // 2'b00: mnemonic = "sb"; + // 2'b01: mnemonic = "sh"; + // 2'b10: mnemonic = "sw"; + // 2'b11: begin + // printMnemonic("INVALID"); + // return; + // end + // endcase + + // if (instr[14] == 1'b0) begin + // // regular store + // if (instr[6:0] != OPCODE_STORE_POST) begin + // regs_read.push_back('{rs2, rs2_value}); + // regs_read.push_back('{rs1, rs1_value}); + // str = $sformatf("%-16s x%0d, %0d(x%0d)", mnemonic, rs2, $signed(imm_s_type), rs1); + // end else begin + // regs_read.push_back('{rs2, rs2_value}); + // regs_read.push_back('{rs1, rs1_value}); + // regs_write.push_back('{rs1, 'x}); + // str = $sformatf("p.%-14s x%0d, %0d(x%0d!)", mnemonic, rs2, $signed(imm_s_type), rs1); + // end + // end else begin + // // reg-reg store + // if (instr[6:0] != OPCODE_STORE_POST) begin + // regs_read.push_back('{rs2, rs2_value}); + // regs_read.push_back('{rs3, rs3_value}); + // regs_read.push_back('{rs1, rs1_value}); + // str = $sformatf("p.%-14s x%0d, x%0d(x%0d)", mnemonic, rs2, rs3, rs1); + // end else begin + // regs_read.push_back('{rs2, rs2_value}); + // regs_read.push_back('{rs3, rs3_value}); + // regs_read.push_back('{rs1, rs1_value}); + // regs_write.push_back('{rs1, 'x}); + // str = $sformatf("p.%-14s x%0d, x%0d(x%0d!)", mnemonic, rs2, rs3, rs1); + // end + // end + // end + return ""; + endfunction // printSInstr + + function string printMulInstr(); + // string mnemonic; + // string str_suf; + // string str_imm; + // string str_asm; + // begin + + // // always read rs1 and rs2 and write rd + // regs_read.push_back('{rs1, rs1_value}); + // regs_read.push_back('{rs2, rs2_value}); + // regs_write.push_back('{rd, 'x}); + + // if (instr[12]) + // regs_read.push_back('{rd, rs3_value}); + + // case ({instr[31:30], instr[14]}) + // 3'b000: str_suf = "u"; + // 3'b001: str_suf = "uR"; + // 3'b010: str_suf = "hhu"; + // 3'b011: str_suf = "hhuR"; + // 3'b100: str_suf = "s"; + // 3'b101: str_suf = "sR"; + // 3'b110: str_suf = "hhs"; + // 3'b111: str_suf = "hhsR"; + // endcase + + // if (instr[12]) + // mnemonic = "p.mac"; + // else + // mnemonic = "p.mul"; + + // if (imm_s3_type[4:0] != 5'b00000) + // str_asm = $sformatf("%s%sN", mnemonic, str_suf); + // else + // str_asm = $sformatf("%s%s", mnemonic, str_suf); + + // if (instr[29:25] != 5'b00000) + // str = $sformatf("%-16s x%0d, x%0d, x%0d, %0d", str_asm, rd, rs1, rs2, $unsigned(imm_s3_type[4:0])); + // else + // str = $sformatf("%-16s x%0d, x%0d, x%0d", str_asm, rd, rs1, rs2); + // end + return ""; + endfunction + endclass \ No newline at end of file diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh new file mode 100755 index 000000000..3f428470e --- /dev/null +++ b/src/util/instruction_tracer.svh @@ -0,0 +1,78 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Main Class +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// + +class instruction_tracer; + + // interface to the core + virtual instruction_tracer_if tracer_if; + + // keep the decoded instructions in a queue + fetch_entry decode_queue [$]; + // shadow copy of the register file + logic [63:0] reg_file [31]; + // 64 bit clock tick count + longint unsigned clk_ticks; + + function new(virtual instruction_tracer_if tracer_if); + this.tracer_if = tracer_if; + endfunction : new + + task trace(); + fetch_entry issue_instruction; + forever begin + // new cycle, we are only interested if reset is de-asserted + @(tracer_if.pck iff tracer_if.pck.rstn); + clk_ticks++; + + // We are decoding an instruction + if (tracer_if.pck.fetch_valid && tracer_if.pck.fetch_ack) begin + decode_queue.push_back(tracer_if.pck.fetch); + issue_instruction = fetch_entry'(tracer_if.pck.fetch); + printInstr(issue_instruction.instruction); + end + // we are committing an instruction + + // if (tracer_if.pck.commit_instr.valid) begin + // $display("Committing: %0h", tracer_if.pck.commit_instr); + // end + + // write back + if (tracer_if.pck.we && tracer_if.pck.waddr != 5'b0) begin + reg_file[tracer_if.pck.waddr] = tracer_if.pck.wdata; + end + + end + + endtask + + function void flushIssue (); + + endfunction; + + function void flush (); + + endfunction; + + function void printInstr(logic [63:0] instr); + instruction_trace_item iti = new; + $display(iti.printInstr(instr)); + + endfunction; + +endclass : instruction_tracer \ No newline at end of file diff --git a/src/util/instruction_tracer_defines.svh b/src/util/instruction_tracer_defines.svh new file mode 100755 index 000000000..90f966573 --- /dev/null +++ b/src/util/instruction_tracer_defines.svh @@ -0,0 +1,73 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Defines +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// + +parameter INSTR_LUI = { 25'b?, OPCODE_LUI }; +parameter INSTR_AUIPC = { 25'b?, OPCODE_AUIPC }; +parameter INSTR_JAL = { 25'b?, OPCODE_JAL }; +parameter INSTR_JALR = { 17'b?, 3'b000, 5'b?, OPCODE_JALR }; +// BRANCH +parameter INSTR_BEQ = { 17'b?, 3'b000, 5'b?, OPCODE_BRANCH }; +parameter INSTR_BNE = { 17'b?, 3'b001, 5'b?, OPCODE_BRANCH }; +parameter INSTR_BLT = { 17'b?, 3'b100, 5'b?, OPCODE_BRANCH }; +parameter INSTR_BGE = { 17'b?, 3'b101, 5'b?, OPCODE_BRANCH }; +parameter INSTR_BLTU = { 17'b?, 3'b110, 5'b?, OPCODE_BRANCH }; +parameter INSTR_BGEU = { 17'b?, 3'b111, 5'b?, OPCODE_BRANCH }; + +// OPIMM +parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, OPCODE_OPIMM }; +parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, OPCODE_OPIMM }; +parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, OPCODE_OPIMM }; +parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, OPCODE_OPIMM }; +parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, OPCODE_OPIMM }; +parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, OPCODE_OPIMM }; +parameter INSTR_SLLI = { 7'b0000000, 10'b?, 3'b001, 5'b?, OPCODE_OPIMM }; +parameter INSTR_SRLI = { 7'b0000000, 10'b?, 3'b101, 5'b?, OPCODE_OPIMM }; +parameter INSTR_SRAI = { 7'b0100000, 10'b?, 3'b101, 5'b?, OPCODE_OPIMM }; +// OP +parameter INSTR_ADD = { 7'b0000000, 10'b?, 3'b000, 5'b?, OPCODE_OP }; +parameter INSTR_SUB = { 7'b0100000, 10'b?, 3'b000, 5'b?, OPCODE_OP }; +parameter INSTR_SLL = { 7'b0000000, 10'b?, 3'b001, 5'b?, OPCODE_OP }; +parameter INSTR_SLT = { 7'b0000000, 10'b?, 3'b010, 5'b?, OPCODE_OP }; +parameter INSTR_SLTU = { 7'b0000000, 10'b?, 3'b011, 5'b?, OPCODE_OP }; +parameter INSTR_XOR = { 7'b0000000, 10'b?, 3'b100, 5'b?, OPCODE_OP }; +parameter INSTR_SRL = { 7'b0000000, 10'b?, 3'b101, 5'b?, OPCODE_OP }; +parameter INSTR_SRA = { 7'b0100000, 10'b?, 3'b101, 5'b?, OPCODE_OP }; +parameter INSTR_OR = { 7'b0000000, 10'b?, 3'b110, 5'b?, OPCODE_OP }; +parameter INSTR_AND = { 7'b0000000, 10'b?, 3'b111, 5'b?, OPCODE_OP }; +// FENCE +parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, OPCODE_FENCE }; +parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, OPCODE_FENCE }; +// SYSTEM +parameter INSTR_CSRRW = { 17'b?, 3'b001, 5'b?, OPCODE_SYSTEM }; +parameter INSTR_CSRRS = { 17'b?, 3'b010, 5'b?, OPCODE_SYSTEM }; +parameter INSTR_CSRRC = { 17'b?, 3'b011, 5'b?, OPCODE_SYSTEM }; +parameter INSTR_CSRRWI = { 17'b?, 3'b101, 5'b?, OPCODE_SYSTEM }; +parameter INSTR_CSRRSI = { 17'b?, 3'b110, 5'b?, OPCODE_SYSTEM }; +parameter INSTR_CSRRCI = { 17'b?, 3'b111, 5'b?, OPCODE_SYSTEM }; +parameter INSTR_ECALL = { 12'b000000000000, 13'b0, OPCODE_SYSTEM }; +parameter INSTR_EBREAK = { 12'b000000000001, 13'b0, OPCODE_SYSTEM }; +parameter INSTR_ERET = { 12'b000100000000, 13'b0, OPCODE_SYSTEM }; +parameter INSTR_WFI = { 12'b000100000010, 13'b0, OPCODE_SYSTEM }; + +// RV32M +parameter INSTR_PMUL = { 7'b0000001, 10'b?, 3'b000, 5'b?, OPCODE_OP }; +parameter INSTR_DIV = { 7'b0000001, 10'b?, 3'b100, 5'b?, OPCODE_OP }; +parameter INSTR_DIVU = { 7'b0000001, 10'b?, 3'b101, 5'b?, OPCODE_OP }; +parameter INSTR_REM = { 7'b0000001, 10'b?, 3'b110, 5'b?, OPCODE_OP }; +parameter INSTR_REMU = { 7'b0000001, 10'b?, 3'b111, 5'b?, OPCODE_OP }; \ No newline at end of file diff --git a/src/util/instruction_tracer_if.sv b/src/util/instruction_tracer_if.sv new file mode 100755 index 000000000..0c6219e4b --- /dev/null +++ b/src/util/instruction_tracer_if.sv @@ -0,0 +1,48 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Interface +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +import ariane_pkg::*; +`ifndef INSTR_TRACER_IF_SV +`define INSTR_TRACER_IF_SV +interface instruction_tracer_if ( + input clk + ); + logic rstn; + logic flush_issue; + logic flush; + // decode + fetch_entry fetch; + logic fetch_valid; + logic fetch_ack; + + // WB stage + logic [4:0] waddr; + logic [63:0] wdata; + logic we; + + // commit stage + scoreboard_entry commit_instr; // commit instruction + logic commit_ack; + + // the tracer just has a passive interface we do not drive anything with it + clocking pck @(posedge clk); + input rstn, flush, fetch, fetch_valid, fetch_ack, waddr, wdata, we, commit_instr, commit_ack; + endclocking + +endinterface +`endif \ No newline at end of file diff --git a/src/util/instruction_tracer_pkg.sv b/src/util/instruction_tracer_pkg.sv new file mode 100755 index 000000000..4add98817 --- /dev/null +++ b/src/util/instruction_tracer_pkg.sv @@ -0,0 +1,25 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Package +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +package instruction_tracer_pkg; + import ariane_pkg::*; + + `include "instruction_tracer_defines.svh" + `include "instruction_trace_item.svh" + `include "instruction_tracer.svh" +endpackage \ No newline at end of file