barrel shifter changes, Store forwarding changed to be internal to L/S unit, various minor cleanups

2025-04-23 21:47:15 -04:00 · 2019-12-13 12:54:32 -08:00 · 2019-12-13 12:54:32 -08:00 · 928282ade9
commit 928282ade9
parent 3abc83c503
13 changed files with 82 additions and 151 deletions
--- a/core/alu_unit.sv
+++ b/core/alu_unit.sv
@ -33,8 +33,7 @@ module alu_unit(

    logic[XLEN:0] add_sub_result;
    logic add_sub_carry_in;
-    logic[XLEN-1:0] rshift_result;
-    logic[XLEN-1:0] lshift_result;
+    logic[XLEN-1:0] shift_result;

    logic[XLEN:0] adder_in1;
    logic[XLEN:0] adder_in2;
@ -66,18 +65,13 @@ module alu_unit(
            .shift_amount(alu_inputs.shift_amount),
            .arith(alu_inputs.arith),
            .lshift(alu_inputs.lshift),
-            .shifted_resultr(rshift_result),
-            .shifted_resultl(lshift_result)
+            .shifted_result(shift_result)
        );

-    //Result mux
    always_comb begin
-        case (alu_inputs.op)
-            ALU_ADD_SUB : result = add_sub_result[XLEN-1:0];
-            ALU_SLT : result = {31'b0, add_sub_result[XLEN]};
-            ALU_RSHIFT : result = rshift_result;
-            ALU_LSHIFT : result = lshift_result;
-        endcase
+        result = (alu_inputs.shifter_path ? shift_result : add_sub_result[31:0]);
+        result[31:1] &= {31{~alu_inputs.slt_path}};
+        result[0] = alu_inputs.slt_path ? add_sub_result[XLEN] : result[0];
    end

    ////////////////////////////////////////////////////
--- a/core/barrel_shifter.sv
+++ b/core/barrel_shifter.sv
@ -1,5 +1,5 @@
 /*
- * Copyright © 2017 Eric Matthews,  Lesley Shannon
+ * Copyright © 2017-2019 Eric Matthews,  Lesley Shannon
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -24,55 +24,21 @@ import taiga_config::*;
 import taiga_types::*;

 module barrel_shifter (
-        input logic[XLEN-1:0] shifter_input,
+        input logic[31:0] shifter_input,
        input logic[4:0] shift_amount,
        input logic arith,
        input logic lshift,
-        output logic[XLEN-1:0] shifted_resultr,
-        output logic[XLEN-1:0] shifted_resultl
+        output logic[31:0] shifted_result
        );

-    logic[XLEN-1:0] shiftx8, shiftx2, shiftx1;
-    logic[XLEN-1:0] preshifted_input;
-    //Bit flipping shared shifter
-    always_comb begin
-        foreach (shifter_input[i])
-            preshifted_input[i] = shifter_input[31-i];
-    end
-
-    always_comb begin//2
-        case ({lshift, shift_amount[0]})
-            0: shiftx1 = shifter_input[31:0];
-            1: shiftx1 = {{1{arith}},shifter_input[31:1]};
-            2: shiftx1 = preshifted_input[31:0];
-            3: shiftx1 = {{1{arith}},preshifted_input[31:1]};
-        endcase
-    end
-
-        always_comb begin//2
-        case (shift_amount[2:1])
-            0: shiftx2 = shiftx1[31:0];
-            1: shiftx2 = {{2{arith}},shiftx1[31:2]};
-            2: shiftx2 = {{4{arith}},shiftx1[31:4]};
-            3: shiftx2 = {{6{arith}},shiftx1[31:6]};
-        endcase
-    end
-
-    always_comb begin//8
-        case (shift_amount[4:3])
-            0: shiftx8 = shiftx2[31:0];
-            1: shiftx8 = {{8{arith}},shiftx2[31:8]};
-            2: shiftx8 = {{16{arith}},shiftx2[31:16]};
-            3: shiftx8 = {{24{arith}},shiftx2[31:24]};
-        endcase
-    end
-    assign shifted_resultr = shiftx8;
-
-    always_comb begin
-        foreach (shifter_input[i])
-            shifted_resultl[i] = shiftx8[31-i];
-    end
-
+    logic [62:0] shift_in;
+    logic [4:0] adjusted_shift_amount;
+    ////////////////////////////////////////////////////
+    //Implementation
+    //Performs a 63-bit right shift
+    //Left shift is handled by placing the left shift in the upper portion shifted by (~shift_amount + 1)
+    //with the value initially shifted by one so that only the complement of the shift_amount is needed
+    assign shift_in = lshift ? {shifter_input, 31'b0} : {{31{arith}}, shifter_input};
+    assign adjusted_shift_amount = shift_amount ^ {5{lshift}};
+    assign shifted_result = 32'(shift_in >> adjusted_shift_amount);
 endmodule
-
-
--- a/core/csr_regs.sv
+++ b/core/csr_regs.sv
@ -115,7 +115,8 @@ module csr_regs (
    logic[COUNTER_W-1:0] mcycle;
    logic[COUNTER_W-1:0] mtime;
    logic[COUNTER_W-1:0] minst_ret;
-    logic [1:0] inst_ret_inc;
+    localparam INST_RET_INC_W = 2;
+    logic [INST_RET_INC_W-1:0] inst_ret_inc;

    //write_logic
    logic supervisor_write;
@ -452,16 +453,10 @@ endgenerate
    //Timers and Counters
    //Register increment for instructions completed
    always_ff @(posedge clk) begin
-        if (rst) begin
+        if (rst)
            inst_ret_inc <= 0;
-        end else begin
-            if (instruction_complete & instruction_issued_no_rd)
-                inst_ret_inc <= 2;
-            else if (instruction_complete | instruction_issued_no_rd)
-                inst_ret_inc <= 1;
-            else
-                inst_ret_inc <= 0;
-        end
+        else
+            inst_ret_inc <= INST_RET_INC_W'(instruction_complete) + INST_RET_INC_W'(instruction_issued_no_rd);
    end

    always_ff @(posedge clk) begin
@ -533,8 +528,6 @@ endgenerate
    always_ff @(posedge clk) begin
        if (read_regs)
            selected_csr_r <= selected_csr;
-        else
-            selected_csr_r <= 0;
    end

    assign wb_csr = selected_csr_r;
--- a/core/decode_and_issue.sv
+++ b/core/decode_and_issue.sv
@ -49,8 +49,6 @@ module decode_and_issue (
        output logic gc_flush_required,

        output logic load_store_issue,
-        output logic store_issued_with_data,
-        output logic [31:0] store_data,

        output logic instruction_issued,
        output logic instruction_issued_no_rd,
@ -188,7 +186,7 @@ module decode_and_issue (
    assign issue_valid = fb_valid & ti.id_available & ~gc_issue_hold & ~gc_fetch_flush;

    assign operands_ready = ~rf_decode.rs1_conflict & ~rf_decode.rs2_conflict;
-    assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & (opcode_trim == STORE_T)));
+    assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & (opcode_trim == STORE_T) & load_store_forwarding_possible));

    //All units share the same operand ready logic except load-store which has an internal forwarding path
    always_comb begin
@ -227,7 +225,7 @@ module decode_and_issue (
        endcase
    end

-    assign alu_inputs.in1 = {(alu_rs1_data[XLEN-1] & ~fn3[0]), alu_rs1_data};//(fn3[0]  is SLTU_fn3);
+    assign alu_inputs.in1 = {(rf_decode.rs1_data[XLEN-1] & ~fn3[0]), alu_rs1_data};//(fn3[0]  is SLTU_fn3);
    assign alu_inputs.in2 = {(alu_rs2_data[XLEN-1] & ~fn3[0]), alu_rs2_data};
    assign alu_inputs.shifter_in = rf_decode.rs1_data;
    assign alu_inputs.shift_amount = opcode[5] ? rf_decode.rs2_data[4:0] : rs2_addr;
@ -235,7 +233,8 @@ module decode_and_issue (
    assign alu_inputs.arith = alu_rs1_data[XLEN-1] & fb.instruction[30];//shift in bit
    assign alu_inputs.lshift = ~fn3[2];
    assign alu_inputs.logic_op = fb.alu_logic_op;
-    assign alu_inputs.op = fb.alu_op;
+    assign alu_inputs.shifter_path = ~(opcode[2] | fn3 inside {SLT_fn3, SLTU_fn3, XOR_fn3, OR_fn3, AND_fn3, ADD_SUB_fn3}); //opcode[2] LUI AUIPC JAL JALR
+    assign alu_inputs.slt_path = ~opcode[2] & fn3 inside {SLT_fn3, SLTU_fn3};

    ////////////////////////////////////////////////////
    //Load Store unit inputs
@ -247,6 +246,10 @@ module decode_and_issue (
    logic load_reserve;
    logic [4:0] amo_type;

+    logic load_store_forwarding_possible;
+    logic [31:0] last_use_was_load;
+    logic [4:0] last_load_rd;
+
    assign amo_op =  USE_AMO ? (opcode_trim == AMO_T) : 1'b0;
    assign amo_type = fb.instruction[31:27];
    assign store_conditional = (amo_type == AMO_SC);
@ -267,19 +270,29 @@ module decode_and_issue (
    assign is_store = (opcode_trim == STORE_T) || (amo_op && store_conditional);//Used for LS unit and for ID tracking
    assign ls_offset = opcode[5] ? {fb.instruction[31:25], fb.instruction[11:7]} : fb.instruction[31:20];

+
+    always_ff @(posedge clk) begin
+        if (instruction_issued)
+            last_use_was_load[future_rd_addr] <= unit_needed[LS_UNIT_WB_ID] & is_load;
+    end
+
+    always_ff @(posedge clk) begin
+        if (issue[LS_UNIT_WB_ID])
+            last_load_rd <= future_rd_addr;
+    end
+
+    assign load_store_forwarding_possible = last_use_was_load[rs2_addr] && (last_load_rd == rs2_addr);
+
    assign ls_inputs.rs1 = rf_decode.rs1_data;
+    assign ls_inputs.rs2 = rf_decode.rs2_data;
    assign ls_inputs.offset = ls_offset;
    assign ls_inputs.pc = fb.pc;
    assign ls_inputs.fn3 = amo_op ? LS_W_fn3 : fn3;
    assign ls_inputs.load = is_load;
    assign ls_inputs.store = is_store;
-    assign ls_inputs.load_store_forward = rf_decode.rs2_conflict;
+    assign ls_inputs.load_store_forward = rf_decode.rs2_conflict & load_store_forwarding_possible;
    assign ls_inputs.store_forward_id = rf_decode.rs2_id;

-    //Store data to commit/store buffer
-    assign store_issued_with_data = ~ls_inputs.load_store_forward & issue[LS_UNIT_WB_ID];
-    assign store_data = rf_decode.rs2_data;
-
    ////////////////////////////////////////////////////
    //Branch unit inputs
    assign branch_inputs.rs1 = rf_decode.rs1_data;
@ -337,11 +350,8 @@ module decode_and_issue (
            logic [4:0] prev_div_rs1_addr;
            logic [4:0] prev_div_rs2_addr;
            logic prev_div_result_valid;
-            logic prev_div_result_valid_r;
-            //If a subsequent div request uses the same inputs then
-            //don't rerun div operation
-            logic div_rd_overwrites_rs1_or_rs2;
-            logic rd_overwrites_previously_saved_rs1_or_rs2;
+            logic set_prev_div_result_valid;
+            logic clear_prev_div_result_valid;
            logic current_op_resuses_rs1_rs2;

            always_ff @(posedge clk) begin
@ -351,29 +361,23 @@ module decode_and_issue (
                end
            end

-            assign div_rd_overwrites_rs1_or_rs2 = (future_rd_addr == rs1_addr || future_rd_addr == rs2_addr);
-            assign rd_overwrites_previously_saved_rs1_or_rs2 = (future_rd_addr == prev_div_rs1_addr || future_rd_addr == prev_div_rs2_addr);
            assign current_op_resuses_rs1_rs2 = (prev_div_rs1_addr == rs1_addr) && (prev_div_rs2_addr == rs2_addr);
+            assign set_prev_div_result_valid = unit_needed[DIV_UNIT_WB_ID];

-            always_comb begin
-                prev_div_result_valid = prev_div_result_valid_r;
-                if ((unit_needed[DIV_UNIT_WB_ID] & ~div_rd_overwrites_rs1_or_rs2))
-                    prev_div_result_valid = 1;
-                else if ((unit_needed[DIV_UNIT_WB_ID] & div_rd_overwrites_rs1_or_rs2) | (uses_rd & rd_overwrites_previously_saved_rs1_or_rs2))
-                    prev_div_result_valid = 0;
-            end
+            //If current div operation overwrites an input register OR any other instruction overwrites the last div operations input registers
+            assign clear_prev_div_result_valid = uses_rd & ((future_rd_addr == (unit_needed[DIV_UNIT_WB_ID] ? rs1_addr : prev_div_rs1_addr)) || (future_rd_addr == (unit_needed[DIV_UNIT_WB_ID] ? rs2_addr : prev_div_rs2_addr)));

            always_ff @(posedge clk) begin
                if (rst)
-                    prev_div_result_valid_r <= 0;
+                    prev_div_result_valid <= 0;
                else if (instruction_issued)
-                    prev_div_result_valid_r <= prev_div_result_valid;
+                    prev_div_result_valid <= (set_prev_div_result_valid | prev_div_result_valid) & ~clear_prev_div_result_valid;
            end

            assign div_inputs.rs1 = rf_decode.rs1_data;
            assign div_inputs.rs2 = rf_decode.rs2_data;
            assign div_inputs.op = fn3[1:0];
-            assign div_inputs.reuse_result = prev_div_result_valid_r & current_op_resuses_rs1_rs2;
+            assign div_inputs.reuse_result = prev_div_result_valid & current_op_resuses_rs1_rs2;
        end
    endgenerate

--- a/core/gc_unit.sv
+++ b/core/gc_unit.sv
@ -247,7 +247,7 @@ module gc_unit(
        second_cycle_flush <= gc_flush_required;
        gc_fetch_pc_override <= gc_flush_required | second_cycle_flush | ls_exception_first_cycle;
        gc_fetch_pc <= ls_exception_second_cycle ? trap_pc :
-            stage1.is_i_fence ? stage1.pc + 4 : //Could stall on dec_pc valid and use instead of another adder
+            //stage1.is_i_fence ? stage1.pc + 4 : //Could stall on dec_pc valid and use instead of another adder
            csr_mepc;// gc_inputs.is_ret
    end

--- a/core/load_store_unit.sv
+++ b/core/load_store_unit.sv
@ -51,8 +51,6 @@ module load_store_unit (
        output instruction_id_t store_done_id,
        output logic store_complete,

-        post_issue_forwarding_interface.unit store_forwarding,
-
        input logic[31:0] csr_rd,
        input instruction_id_t csr_id,
        input logic csr_done,
@ -83,6 +81,8 @@ module load_store_unit (
    logic issue_request;
    logic load_complete;

+    logic [31:0] prev_load;
+
    logic [31:0] virtual_address;
    logic [3:0] be;

@ -105,6 +105,7 @@ module load_store_unit (

    typedef struct packed{
        logic [31:0] virtual_address;
+        logic [31:0] store_data;
        logic [2:0] fn3;
        logic load;
        logic store;
@ -139,6 +140,7 @@ module load_store_unit (
        ls_input_fifo (.fifo(input_fifo), .*);

    assign fifo_inputs.virtual_address = ls_inputs.rs1 + 32'(signed'(ls_inputs.offset));
+    assign fifo_inputs.store_data = ls_inputs.rs2;
    assign fifo_inputs.fn3 = ls_inputs.fn3;
    assign fifo_inputs.load = ls_inputs.load;
    assign fifo_inputs.store = ls_inputs.store;
@ -175,7 +177,7 @@ module load_store_unit (

    //When switching units, ensure no outstanding loads so that there can be no timing collisions with results
    assign unit_stall = (current_unit != last_unit) && load_attributes.valid;
-    assign store_ready = stage1.store & store_forwarding.data_valid;
+    assign store_ready = stage1.store & ((stage1.load_store_forward & ~load_attributes.valid) | ~stage1.load_store_forward);
    assign issue_request = input_fifo.valid & units_ready & ~unit_stall & ~unaligned_addr & (~stage1.store | store_ready);

    ////////////////////////////////////////////////////
@ -237,9 +239,8 @@ module load_store_unit (
    assign shared_inputs.be = be;
    assign shared_inputs.fn3 = stage1.fn3;

-    //Store forwarding request
-    assign store_forwarding.id = stage1.load_store_forward ? stage1.store_forward_id : stage1.instruction_id;
-    assign stage1_raw_data = store_forwarding.data;
+    //Store forwarding
+    assign stage1_raw_data = stage1.load_store_forward ? prev_load : stage1.store_data;

    //Input: ABCD
    //Assuming aligned requests,
@ -340,6 +341,11 @@ module load_store_unit (
        endcase
    end

+    always_ff @ (posedge clk) begin
+        if (load_complete)
+            prev_load <= final_load_data;
+    end
+
    ////////////////////////////////////////////////////
    //Output bank
    assign wb.rd = ls_done ? final_load_data : csr_rd;
--- a/core/pre_decode.sv
+++ b/core/pre_decode.sv
@ -164,21 +164,6 @@ module pre_decode
        data_in.alu_logic_op = opcode[2] ? ALU_LOGIC_ADD : data_in.alu_logic_op;
    end

-    always_comb begin
-        case (fn3)
-            SLT_fn3 : data_in.alu_op = ALU_SLT;
-            SLTU_fn3 : data_in.alu_op = ALU_SLT;
-            SLL_fn3 : data_in.alu_op = ALU_LSHIFT;
-            XOR_fn3 : data_in.alu_op = ALU_ADD_SUB;
-            OR_fn3 : data_in.alu_op = ALU_ADD_SUB;
-            AND_fn3 : data_in.alu_op = ALU_ADD_SUB;
-            SRA_fn3 : data_in.alu_op = ALU_RSHIFT;
-            ADD_SUB_fn3 : data_in.alu_op = ALU_ADD_SUB;
-        endcase
-        //put LUI, AUIPC, JAL and JALR through adder path
-        data_in.alu_op = opcode[2] ? ALU_ADD_SUB : data_in.alu_op;
-    end
-
    logic non_mul_div_arith_op;
    assign non_mul_div_arith_op = ((opcode_trimmed == ARITH_T) && ~pre_decode_instruction[25]);//pre_decode_instruction[25] denotes multiply/divide instructions
    assign data_in.alu_request = non_mul_div_arith_op || (opcode_trimmed inside {ARITH_IMM_T, AUIPC_T, LUI_T, JAL_T, JALR_T});
--- a/core/register_file.sv
+++ b/core/register_file.sv
@ -64,12 +64,14 @@ module register_file(
            register[rf_wb.rd_addr] <= rf_wb.rd_data;
    end

-    id_inuse inuse_mem (.*,
+    assign in_use_match = (rf_wb.id == in_use_by[rf_wb.rd_addr]) && valid_write;
+
+    reg_inuse inuse (.*,
+            .clr(1'b0),
            .rs1_addr(rf_decode.rs1_addr),.rs2_addr(rf_decode.rs2_addr), .issued_rd_addr(rf_decode.future_rd_addr),
+            .retired_rd_addr(rf_wb.rd_addr),
            .issued(rf_decode.instruction_issued),
-            .issue_id(rf_decode.id),
-            .retired_id(rf_wb.id),
-            .retired(valid_write),
+            .retired(in_use_match),
            .rs1_inuse(rs1_inuse),
            .rs2_inuse(rs2_inuse)
            );
--- a/core/taiga_config.sv
+++ b/core/taiga_config.sv
@ -31,7 +31,7 @@ package taiga_config;
    //Privileged ISA Options

    //Enable Machine level privilege spec
-    parameter ENABLE_M_MODE = 1;
+    parameter ENABLE_M_MODE = 0;
    //Enable Supervisor level privilege spec
    parameter ENABLE_S_MODE = 0;

--- a/core/taiga_types.sv
+++ b/core/taiga_types.sv
@ -286,7 +286,6 @@ package taiga_types;
        logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way;
        logic alu_sub;
        logic [1:0] alu_logic_op;
-        logic [1:0] alu_op;
        logic alu_request;
        alu_rs1_op_t alu_rs1_sel;
        alu_rs2_op_t alu_rs2_sel;
@ -308,6 +307,8 @@ package taiga_types;
        logic lshift;
        logic [1:0] logic_op;
        logic [1:0] op;
+        logic shifter_path;
+        logic slt_path;
    } alu_inputs_t;

    typedef struct packed {
@ -368,6 +369,7 @@ package taiga_types;

    typedef struct packed{
        logic [XLEN-1:0] rs1;
+        logic [XLEN-1:0] rs2;
        logic [11:0] offset;
        logic [2:0] fn3;
        logic load;
--- a/core/write_back.sv
+++ b/core/write_back.sv
@ -39,10 +39,6 @@ module write_back(

        input instruction_id_t store_done_id,
        input logic store_complete,
-        post_issue_forwarding_interface.wb store_forwarding,
-
-        input logic store_issued_with_data,
-        input logic [31:0] store_data,

        //Trace signals
        output logic tr_wb_mux_contention
@ -55,8 +51,7 @@ module write_back(
    //aliases for write-back-interface signals
    instruction_id_t unit_instruction_id [NUM_WB_UNITS-1:0];
    logic [NUM_WB_UNITS-1:0] unit_done;
-    //Force usage of f7 muxes
-    (* keep = "true" *) logic [XLEN-1:0] unit_rd [2*NUM_WB_UNITS-1:0];
+    logic [XLEN-1:0] unit_rd [NUM_WB_UNITS-1:0];
    //Per-ID muxes for commit buffer
    logic [$clog2(NUM_WB_UNITS)-1:0] id_unit_select [MAX_INFLIGHT_COUNT-1:0];
    logic [$clog2(NUM_WB_UNITS)-1:0] id_unit_select_r [MAX_INFLIGHT_COUNT-1:0];
@ -87,11 +82,6 @@ module write_back(
            assign unit_done[i] = unit_wb[i].done;
            assign unit_rd[i] = unit_wb[i].rd;
        end
-        for (i=NUM_WB_UNITS; i< 2*NUM_WB_UNITS; i++) begin
-            assign unit_rd[i] = store_data;
-        end
-
-
    endgenerate

    ////////////////////////////////////////////////////
@ -113,7 +103,6 @@ module write_back(
    //Set unit_ID for each ID as they are issued
    //If ID is not in use, use the current issue_unit_id value
    //This is used to support single cycle units, such as the ALU
-    //Stores are not tracked for id_inuse as their data is placed in the buffer at issue time
    always_comb begin
        id_issued_one_hot = 0;
        id_issued_one_hot[ti.issue_id] = ti.issued & ~ti.inflight_packet.is_store;
@ -131,16 +120,10 @@ module write_back(
    //Writeback Buffer
    //Mux outputs of units based on IDs
    //If ID is done write result to buffer
-    logic [MAX_INFLIGHT_COUNT-1:0] store_mux;
-    always_comb begin
-        store_mux = 0;
-        store_mux[ti.issue_id] = store_issued_with_data;
-    end
-
    generate for (i=0; i< MAX_INFLIGHT_COUNT; i++) begin
        always_ff @ (posedge clk) begin
-            if (id_writing_to_buffer[i] |store_mux[i])
-                results_by_id[i] <= unit_rd[{store_mux[i],id_unit_select[i]}];
+            if (id_writing_to_buffer[i])
+                results_by_id[i] <= unit_rd[id_unit_select[i]];
        end
    end endgenerate

@ -154,11 +137,6 @@ module write_back(
            id_inuse <= (id_issued_one_hot | id_inuse) & ~id_writing_to_buffer;
    end

-    //As IDs are freed for reuse in repeating order, the results will not be overwritten before the instruction
-    //needing them has itself completed
-    assign store_forwarding.data_valid = ~id_inuse[store_forwarding.id];
-    assign store_forwarding.data = results_by_id[store_forwarding.id];
-
    ////////////////////////////////////////////////////
    //ID Tracking
    //Provides ordering of IDs, ID for issue and oldest ID for committing to register file
--- a/tools/Makefile
+++ b/tools/Makefile
@ -96,7 +96,7 @@ build_coremark:

 .PHONY: run_coremark_verilator
 run_coremark_verilator :
-	./build_taiga_sim/Vtaiga_local_mem "/dev/null" "/dev/null" $(TAIGA_DIR)/tools/coremark.hw_init $(VERILATOR_TRACE_FILE) >> $@
+	./build_taiga_sim/Vtaiga_local_mem "/dev/null" "/dev/null" $(TAIGA_DIR)/tools/coremark.hw_init $(VERILATOR_TRACE_FILE) > $@


 #Benchmarks already built
--- a/tools/taiga_compile_order
+++ b/tools/taiga_compile_order
@ -94,6 +94,7 @@
 ../core/decode_and_issue.sv

 ../core/id_inuse.sv
+../core/reg_inuse.sv
 ../core/register_file.sv  

 ../core/id_tracking.sv