diff --git a/core/alu_unit.sv b/core/alu_unit.sv index 00633b8..b2a938e 100755 --- a/core/alu_unit.sv +++ b/core/alu_unit.sv @@ -33,8 +33,7 @@ module alu_unit( logic[XLEN:0] add_sub_result; logic add_sub_carry_in; - logic[XLEN-1:0] rshift_result; - logic[XLEN-1:0] lshift_result; + logic[XLEN-1:0] shift_result; logic[XLEN:0] adder_in1; logic[XLEN:0] adder_in2; @@ -66,18 +65,13 @@ module alu_unit( .shift_amount(alu_inputs.shift_amount), .arith(alu_inputs.arith), .lshift(alu_inputs.lshift), - .shifted_resultr(rshift_result), - .shifted_resultl(lshift_result) + .shifted_result(shift_result) ); - //Result mux always_comb begin - case (alu_inputs.op) - ALU_ADD_SUB : result = add_sub_result[XLEN-1:0]; - ALU_SLT : result = {31'b0, add_sub_result[XLEN]}; - ALU_RSHIFT : result = rshift_result; - ALU_LSHIFT : result = lshift_result; - endcase + result = (alu_inputs.shifter_path ? shift_result : add_sub_result[31:0]); + result[31:1] &= {31{~alu_inputs.slt_path}}; + result[0] = alu_inputs.slt_path ? add_sub_result[XLEN] : result[0]; end //////////////////////////////////////////////////// diff --git a/core/barrel_shifter.sv b/core/barrel_shifter.sv index a38da6b..fba48f0 100755 --- a/core/barrel_shifter.sv +++ b/core/barrel_shifter.sv @@ -1,5 +1,5 @@ /* - * Copyright © 2017 Eric Matthews, Lesley Shannon + * Copyright © 2017-2019 Eric Matthews, Lesley Shannon * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,55 +24,21 @@ import taiga_config::*; import taiga_types::*; module barrel_shifter ( - input logic[XLEN-1:0] shifter_input, + input logic[31:0] shifter_input, input logic[4:0] shift_amount, input logic arith, input logic lshift, - output logic[XLEN-1:0] shifted_resultr, - output logic[XLEN-1:0] shifted_resultl + output logic[31:0] shifted_result ); - logic[XLEN-1:0] shiftx8, shiftx2, shiftx1; - logic[XLEN-1:0] preshifted_input; - //Bit flipping shared shifter - always_comb begin - foreach (shifter_input[i]) - preshifted_input[i] = shifter_input[31-i]; - end - - always_comb begin//2 - case ({lshift, shift_amount[0]}) - 0: shiftx1 = shifter_input[31:0]; - 1: shiftx1 = {{1{arith}},shifter_input[31:1]}; - 2: shiftx1 = preshifted_input[31:0]; - 3: shiftx1 = {{1{arith}},preshifted_input[31:1]}; - endcase - end - - always_comb begin//2 - case (shift_amount[2:1]) - 0: shiftx2 = shiftx1[31:0]; - 1: shiftx2 = {{2{arith}},shiftx1[31:2]}; - 2: shiftx2 = {{4{arith}},shiftx1[31:4]}; - 3: shiftx2 = {{6{arith}},shiftx1[31:6]}; - endcase - end - - always_comb begin//8 - case (shift_amount[4:3]) - 0: shiftx8 = shiftx2[31:0]; - 1: shiftx8 = {{8{arith}},shiftx2[31:8]}; - 2: shiftx8 = {{16{arith}},shiftx2[31:16]}; - 3: shiftx8 = {{24{arith}},shiftx2[31:24]}; - endcase - end - assign shifted_resultr = shiftx8; - - always_comb begin - foreach (shifter_input[i]) - shifted_resultl[i] = shiftx8[31-i]; - end - + logic [62:0] shift_in; + logic [4:0] adjusted_shift_amount; + //////////////////////////////////////////////////// + //Implementation + //Performs a 63-bit right shift + //Left shift is handled by placing the left shift in the upper portion shifted by (~shift_amount + 1) + //with the value initially shifted by one so that only the complement of the shift_amount is needed + assign shift_in = lshift ? {shifter_input, 31'b0} : {{31{arith}}, shifter_input}; + assign adjusted_shift_amount = shift_amount ^ {5{lshift}}; + assign shifted_result = 32'(shift_in >> adjusted_shift_amount); endmodule - - diff --git a/core/csr_regs.sv b/core/csr_regs.sv index 54a86ec..51d2c2b 100755 --- a/core/csr_regs.sv +++ b/core/csr_regs.sv @@ -115,7 +115,8 @@ module csr_regs ( logic[COUNTER_W-1:0] mcycle; logic[COUNTER_W-1:0] mtime; logic[COUNTER_W-1:0] minst_ret; - logic [1:0] inst_ret_inc; + localparam INST_RET_INC_W = 2; + logic [INST_RET_INC_W-1:0] inst_ret_inc; //write_logic logic supervisor_write; @@ -452,16 +453,10 @@ endgenerate //Timers and Counters //Register increment for instructions completed always_ff @(posedge clk) begin - if (rst) begin + if (rst) inst_ret_inc <= 0; - end else begin - if (instruction_complete & instruction_issued_no_rd) - inst_ret_inc <= 2; - else if (instruction_complete | instruction_issued_no_rd) - inst_ret_inc <= 1; - else - inst_ret_inc <= 0; - end + else + inst_ret_inc <= INST_RET_INC_W'(instruction_complete) + INST_RET_INC_W'(instruction_issued_no_rd); end always_ff @(posedge clk) begin @@ -533,8 +528,6 @@ endgenerate always_ff @(posedge clk) begin if (read_regs) selected_csr_r <= selected_csr; - else - selected_csr_r <= 0; end assign wb_csr = selected_csr_r; diff --git a/core/decode_and_issue.sv b/core/decode_and_issue.sv index 105470e..e29ec00 100755 --- a/core/decode_and_issue.sv +++ b/core/decode_and_issue.sv @@ -49,8 +49,6 @@ module decode_and_issue ( output logic gc_flush_required, output logic load_store_issue, - output logic store_issued_with_data, - output logic [31:0] store_data, output logic instruction_issued, output logic instruction_issued_no_rd, @@ -188,7 +186,7 @@ module decode_and_issue ( assign issue_valid = fb_valid & ti.id_available & ~gc_issue_hold & ~gc_fetch_flush; assign operands_ready = ~rf_decode.rs1_conflict & ~rf_decode.rs2_conflict; - assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & (opcode_trim == STORE_T))); + assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & (opcode_trim == STORE_T) & load_store_forwarding_possible)); //All units share the same operand ready logic except load-store which has an internal forwarding path always_comb begin @@ -227,7 +225,7 @@ module decode_and_issue ( endcase end - assign alu_inputs.in1 = {(alu_rs1_data[XLEN-1] & ~fn3[0]), alu_rs1_data};//(fn3[0] is SLTU_fn3); + assign alu_inputs.in1 = {(rf_decode.rs1_data[XLEN-1] & ~fn3[0]), alu_rs1_data};//(fn3[0] is SLTU_fn3); assign alu_inputs.in2 = {(alu_rs2_data[XLEN-1] & ~fn3[0]), alu_rs2_data}; assign alu_inputs.shifter_in = rf_decode.rs1_data; assign alu_inputs.shift_amount = opcode[5] ? rf_decode.rs2_data[4:0] : rs2_addr; @@ -235,7 +233,8 @@ module decode_and_issue ( assign alu_inputs.arith = alu_rs1_data[XLEN-1] & fb.instruction[30];//shift in bit assign alu_inputs.lshift = ~fn3[2]; assign alu_inputs.logic_op = fb.alu_logic_op; - assign alu_inputs.op = fb.alu_op; + assign alu_inputs.shifter_path = ~(opcode[2] | fn3 inside {SLT_fn3, SLTU_fn3, XOR_fn3, OR_fn3, AND_fn3, ADD_SUB_fn3}); //opcode[2] LUI AUIPC JAL JALR + assign alu_inputs.slt_path = ~opcode[2] & fn3 inside {SLT_fn3, SLTU_fn3}; //////////////////////////////////////////////////// //Load Store unit inputs @@ -247,6 +246,10 @@ module decode_and_issue ( logic load_reserve; logic [4:0] amo_type; + logic load_store_forwarding_possible; + logic [31:0] last_use_was_load; + logic [4:0] last_load_rd; + assign amo_op = USE_AMO ? (opcode_trim == AMO_T) : 1'b0; assign amo_type = fb.instruction[31:27]; assign store_conditional = (amo_type == AMO_SC); @@ -267,19 +270,29 @@ module decode_and_issue ( assign is_store = (opcode_trim == STORE_T) || (amo_op && store_conditional);//Used for LS unit and for ID tracking assign ls_offset = opcode[5] ? {fb.instruction[31:25], fb.instruction[11:7]} : fb.instruction[31:20]; + + always_ff @(posedge clk) begin + if (instruction_issued) + last_use_was_load[future_rd_addr] <= unit_needed[LS_UNIT_WB_ID] & is_load; + end + + always_ff @(posedge clk) begin + if (issue[LS_UNIT_WB_ID]) + last_load_rd <= future_rd_addr; + end + + assign load_store_forwarding_possible = last_use_was_load[rs2_addr] && (last_load_rd == rs2_addr); + assign ls_inputs.rs1 = rf_decode.rs1_data; + assign ls_inputs.rs2 = rf_decode.rs2_data; assign ls_inputs.offset = ls_offset; assign ls_inputs.pc = fb.pc; assign ls_inputs.fn3 = amo_op ? LS_W_fn3 : fn3; assign ls_inputs.load = is_load; assign ls_inputs.store = is_store; - assign ls_inputs.load_store_forward = rf_decode.rs2_conflict; + assign ls_inputs.load_store_forward = rf_decode.rs2_conflict & load_store_forwarding_possible; assign ls_inputs.store_forward_id = rf_decode.rs2_id; - //Store data to commit/store buffer - assign store_issued_with_data = ~ls_inputs.load_store_forward & issue[LS_UNIT_WB_ID]; - assign store_data = rf_decode.rs2_data; - //////////////////////////////////////////////////// //Branch unit inputs assign branch_inputs.rs1 = rf_decode.rs1_data; @@ -337,11 +350,8 @@ module decode_and_issue ( logic [4:0] prev_div_rs1_addr; logic [4:0] prev_div_rs2_addr; logic prev_div_result_valid; - logic prev_div_result_valid_r; - //If a subsequent div request uses the same inputs then - //don't rerun div operation - logic div_rd_overwrites_rs1_or_rs2; - logic rd_overwrites_previously_saved_rs1_or_rs2; + logic set_prev_div_result_valid; + logic clear_prev_div_result_valid; logic current_op_resuses_rs1_rs2; always_ff @(posedge clk) begin @@ -351,29 +361,23 @@ module decode_and_issue ( end end - assign div_rd_overwrites_rs1_or_rs2 = (future_rd_addr == rs1_addr || future_rd_addr == rs2_addr); - assign rd_overwrites_previously_saved_rs1_or_rs2 = (future_rd_addr == prev_div_rs1_addr || future_rd_addr == prev_div_rs2_addr); assign current_op_resuses_rs1_rs2 = (prev_div_rs1_addr == rs1_addr) && (prev_div_rs2_addr == rs2_addr); + assign set_prev_div_result_valid = unit_needed[DIV_UNIT_WB_ID]; - always_comb begin - prev_div_result_valid = prev_div_result_valid_r; - if ((unit_needed[DIV_UNIT_WB_ID] & ~div_rd_overwrites_rs1_or_rs2)) - prev_div_result_valid = 1; - else if ((unit_needed[DIV_UNIT_WB_ID] & div_rd_overwrites_rs1_or_rs2) | (uses_rd & rd_overwrites_previously_saved_rs1_or_rs2)) - prev_div_result_valid = 0; - end + //If current div operation overwrites an input register OR any other instruction overwrites the last div operations input registers + assign clear_prev_div_result_valid = uses_rd & ((future_rd_addr == (unit_needed[DIV_UNIT_WB_ID] ? rs1_addr : prev_div_rs1_addr)) || (future_rd_addr == (unit_needed[DIV_UNIT_WB_ID] ? rs2_addr : prev_div_rs2_addr))); always_ff @(posedge clk) begin if (rst) - prev_div_result_valid_r <= 0; + prev_div_result_valid <= 0; else if (instruction_issued) - prev_div_result_valid_r <= prev_div_result_valid; + prev_div_result_valid <= (set_prev_div_result_valid | prev_div_result_valid) & ~clear_prev_div_result_valid; end assign div_inputs.rs1 = rf_decode.rs1_data; assign div_inputs.rs2 = rf_decode.rs2_data; assign div_inputs.op = fn3[1:0]; - assign div_inputs.reuse_result = prev_div_result_valid_r & current_op_resuses_rs1_rs2; + assign div_inputs.reuse_result = prev_div_result_valid & current_op_resuses_rs1_rs2; end endgenerate diff --git a/core/gc_unit.sv b/core/gc_unit.sv index 655773c..f423c83 100644 --- a/core/gc_unit.sv +++ b/core/gc_unit.sv @@ -247,7 +247,7 @@ module gc_unit( second_cycle_flush <= gc_flush_required; gc_fetch_pc_override <= gc_flush_required | second_cycle_flush | ls_exception_first_cycle; gc_fetch_pc <= ls_exception_second_cycle ? trap_pc : - stage1.is_i_fence ? stage1.pc + 4 : //Could stall on dec_pc valid and use instead of another adder + //stage1.is_i_fence ? stage1.pc + 4 : //Could stall on dec_pc valid and use instead of another adder csr_mepc;// gc_inputs.is_ret end diff --git a/core/load_store_unit.sv b/core/load_store_unit.sv index c76d425..a892ef8 100755 --- a/core/load_store_unit.sv +++ b/core/load_store_unit.sv @@ -51,8 +51,6 @@ module load_store_unit ( output instruction_id_t store_done_id, output logic store_complete, - post_issue_forwarding_interface.unit store_forwarding, - input logic[31:0] csr_rd, input instruction_id_t csr_id, input logic csr_done, @@ -83,6 +81,8 @@ module load_store_unit ( logic issue_request; logic load_complete; + logic [31:0] prev_load; + logic [31:0] virtual_address; logic [3:0] be; @@ -105,6 +105,7 @@ module load_store_unit ( typedef struct packed{ logic [31:0] virtual_address; + logic [31:0] store_data; logic [2:0] fn3; logic load; logic store; @@ -139,6 +140,7 @@ module load_store_unit ( ls_input_fifo (.fifo(input_fifo), .*); assign fifo_inputs.virtual_address = ls_inputs.rs1 + 32'(signed'(ls_inputs.offset)); + assign fifo_inputs.store_data = ls_inputs.rs2; assign fifo_inputs.fn3 = ls_inputs.fn3; assign fifo_inputs.load = ls_inputs.load; assign fifo_inputs.store = ls_inputs.store; @@ -175,7 +177,7 @@ module load_store_unit ( //When switching units, ensure no outstanding loads so that there can be no timing collisions with results assign unit_stall = (current_unit != last_unit) && load_attributes.valid; - assign store_ready = stage1.store & store_forwarding.data_valid; + assign store_ready = stage1.store & ((stage1.load_store_forward & ~load_attributes.valid) | ~stage1.load_store_forward); assign issue_request = input_fifo.valid & units_ready & ~unit_stall & ~unaligned_addr & (~stage1.store | store_ready); //////////////////////////////////////////////////// @@ -237,9 +239,8 @@ module load_store_unit ( assign shared_inputs.be = be; assign shared_inputs.fn3 = stage1.fn3; - //Store forwarding request - assign store_forwarding.id = stage1.load_store_forward ? stage1.store_forward_id : stage1.instruction_id; - assign stage1_raw_data = store_forwarding.data; + //Store forwarding + assign stage1_raw_data = stage1.load_store_forward ? prev_load : stage1.store_data; //Input: ABCD //Assuming aligned requests, @@ -340,6 +341,11 @@ module load_store_unit ( endcase end + always_ff @ (posedge clk) begin + if (load_complete) + prev_load <= final_load_data; + end + //////////////////////////////////////////////////// //Output bank assign wb.rd = ls_done ? final_load_data : csr_rd; diff --git a/core/pre_decode.sv b/core/pre_decode.sv index 894f890..ffc3d20 100755 --- a/core/pre_decode.sv +++ b/core/pre_decode.sv @@ -164,21 +164,6 @@ module pre_decode data_in.alu_logic_op = opcode[2] ? ALU_LOGIC_ADD : data_in.alu_logic_op; end - always_comb begin - case (fn3) - SLT_fn3 : data_in.alu_op = ALU_SLT; - SLTU_fn3 : data_in.alu_op = ALU_SLT; - SLL_fn3 : data_in.alu_op = ALU_LSHIFT; - XOR_fn3 : data_in.alu_op = ALU_ADD_SUB; - OR_fn3 : data_in.alu_op = ALU_ADD_SUB; - AND_fn3 : data_in.alu_op = ALU_ADD_SUB; - SRA_fn3 : data_in.alu_op = ALU_RSHIFT; - ADD_SUB_fn3 : data_in.alu_op = ALU_ADD_SUB; - endcase - //put LUI, AUIPC, JAL and JALR through adder path - data_in.alu_op = opcode[2] ? ALU_ADD_SUB : data_in.alu_op; - end - logic non_mul_div_arith_op; assign non_mul_div_arith_op = ((opcode_trimmed == ARITH_T) && ~pre_decode_instruction[25]);//pre_decode_instruction[25] denotes multiply/divide instructions assign data_in.alu_request = non_mul_div_arith_op || (opcode_trimmed inside {ARITH_IMM_T, AUIPC_T, LUI_T, JAL_T, JALR_T}); diff --git a/core/register_file.sv b/core/register_file.sv index e239523..b3157da 100755 --- a/core/register_file.sv +++ b/core/register_file.sv @@ -64,12 +64,14 @@ module register_file( register[rf_wb.rd_addr] <= rf_wb.rd_data; end - id_inuse inuse_mem (.*, + assign in_use_match = (rf_wb.id == in_use_by[rf_wb.rd_addr]) && valid_write; + + reg_inuse inuse (.*, + .clr(1'b0), .rs1_addr(rf_decode.rs1_addr),.rs2_addr(rf_decode.rs2_addr), .issued_rd_addr(rf_decode.future_rd_addr), + .retired_rd_addr(rf_wb.rd_addr), .issued(rf_decode.instruction_issued), - .issue_id(rf_decode.id), - .retired_id(rf_wb.id), - .retired(valid_write), + .retired(in_use_match), .rs1_inuse(rs1_inuse), .rs2_inuse(rs2_inuse) ); diff --git a/core/taiga_config.sv b/core/taiga_config.sv index 4f548a4..7252e4f 100755 --- a/core/taiga_config.sv +++ b/core/taiga_config.sv @@ -31,7 +31,7 @@ package taiga_config; //Privileged ISA Options //Enable Machine level privilege spec - parameter ENABLE_M_MODE = 1; + parameter ENABLE_M_MODE = 0; //Enable Supervisor level privilege spec parameter ENABLE_S_MODE = 0; diff --git a/core/taiga_types.sv b/core/taiga_types.sv index 818ffd9..22cfdf2 100755 --- a/core/taiga_types.sv +++ b/core/taiga_types.sv @@ -286,7 +286,6 @@ package taiga_types; logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way; logic alu_sub; logic [1:0] alu_logic_op; - logic [1:0] alu_op; logic alu_request; alu_rs1_op_t alu_rs1_sel; alu_rs2_op_t alu_rs2_sel; @@ -308,6 +307,8 @@ package taiga_types; logic lshift; logic [1:0] logic_op; logic [1:0] op; + logic shifter_path; + logic slt_path; } alu_inputs_t; typedef struct packed { @@ -368,6 +369,7 @@ package taiga_types; typedef struct packed{ logic [XLEN-1:0] rs1; + logic [XLEN-1:0] rs2; logic [11:0] offset; logic [2:0] fn3; logic load; diff --git a/core/write_back.sv b/core/write_back.sv index e59fda2..bf64a1d 100755 --- a/core/write_back.sv +++ b/core/write_back.sv @@ -39,10 +39,6 @@ module write_back( input instruction_id_t store_done_id, input logic store_complete, - post_issue_forwarding_interface.wb store_forwarding, - - input logic store_issued_with_data, - input logic [31:0] store_data, //Trace signals output logic tr_wb_mux_contention @@ -55,8 +51,7 @@ module write_back( //aliases for write-back-interface signals instruction_id_t unit_instruction_id [NUM_WB_UNITS-1:0]; logic [NUM_WB_UNITS-1:0] unit_done; - //Force usage of f7 muxes - (* keep = "true" *) logic [XLEN-1:0] unit_rd [2*NUM_WB_UNITS-1:0]; + logic [XLEN-1:0] unit_rd [NUM_WB_UNITS-1:0]; //Per-ID muxes for commit buffer logic [$clog2(NUM_WB_UNITS)-1:0] id_unit_select [MAX_INFLIGHT_COUNT-1:0]; logic [$clog2(NUM_WB_UNITS)-1:0] id_unit_select_r [MAX_INFLIGHT_COUNT-1:0]; @@ -87,11 +82,6 @@ module write_back( assign unit_done[i] = unit_wb[i].done; assign unit_rd[i] = unit_wb[i].rd; end - for (i=NUM_WB_UNITS; i< 2*NUM_WB_UNITS; i++) begin - assign unit_rd[i] = store_data; - end - - endgenerate //////////////////////////////////////////////////// @@ -113,7 +103,6 @@ module write_back( //Set unit_ID for each ID as they are issued //If ID is not in use, use the current issue_unit_id value //This is used to support single cycle units, such as the ALU - //Stores are not tracked for id_inuse as their data is placed in the buffer at issue time always_comb begin id_issued_one_hot = 0; id_issued_one_hot[ti.issue_id] = ti.issued & ~ti.inflight_packet.is_store; @@ -131,16 +120,10 @@ module write_back( //Writeback Buffer //Mux outputs of units based on IDs //If ID is done write result to buffer - logic [MAX_INFLIGHT_COUNT-1:0] store_mux; - always_comb begin - store_mux = 0; - store_mux[ti.issue_id] = store_issued_with_data; - end - generate for (i=0; i< MAX_INFLIGHT_COUNT; i++) begin always_ff @ (posedge clk) begin - if (id_writing_to_buffer[i] |store_mux[i]) - results_by_id[i] <= unit_rd[{store_mux[i],id_unit_select[i]}]; + if (id_writing_to_buffer[i]) + results_by_id[i] <= unit_rd[id_unit_select[i]]; end end endgenerate @@ -154,11 +137,6 @@ module write_back( id_inuse <= (id_issued_one_hot | id_inuse) & ~id_writing_to_buffer; end - //As IDs are freed for reuse in repeating order, the results will not be overwritten before the instruction - //needing them has itself completed - assign store_forwarding.data_valid = ~id_inuse[store_forwarding.id]; - assign store_forwarding.data = results_by_id[store_forwarding.id]; - //////////////////////////////////////////////////// //ID Tracking //Provides ordering of IDs, ID for issue and oldest ID for committing to register file diff --git a/tools/Makefile b/tools/Makefile index 6333854..9de8690 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -96,7 +96,7 @@ build_coremark: .PHONY: run_coremark_verilator run_coremark_verilator : - ./build_taiga_sim/Vtaiga_local_mem "/dev/null" "/dev/null" $(TAIGA_DIR)/tools/coremark.hw_init $(VERILATOR_TRACE_FILE) >> $@ + ./build_taiga_sim/Vtaiga_local_mem "/dev/null" "/dev/null" $(TAIGA_DIR)/tools/coremark.hw_init $(VERILATOR_TRACE_FILE) > $@ #Benchmarks already built diff --git a/tools/taiga_compile_order b/tools/taiga_compile_order index f52a00f..6a25c5d 100644 --- a/tools/taiga_compile_order +++ b/tools/taiga_compile_order @@ -94,6 +94,7 @@ ../core/decode_and_issue.sv ../core/id_inuse.sv +../core/reg_inuse.sv ../core/register_file.sv ../core/id_tracking.sv