barrel shifter changes, Store forwarding changed to be internal to L/S unit, various minor cleanups

This commit is contained in:
Eric Matthews 2019-12-13 12:54:32 -08:00
parent 3abc83c503
commit 928282ade9
13 changed files with 82 additions and 151 deletions

View file

@ -33,8 +33,7 @@ module alu_unit(
logic[XLEN:0] add_sub_result;
logic add_sub_carry_in;
logic[XLEN-1:0] rshift_result;
logic[XLEN-1:0] lshift_result;
logic[XLEN-1:0] shift_result;
logic[XLEN:0] adder_in1;
logic[XLEN:0] adder_in2;
@ -66,18 +65,13 @@ module alu_unit(
.shift_amount(alu_inputs.shift_amount),
.arith(alu_inputs.arith),
.lshift(alu_inputs.lshift),
.shifted_resultr(rshift_result),
.shifted_resultl(lshift_result)
.shifted_result(shift_result)
);
//Result mux
always_comb begin
case (alu_inputs.op)
ALU_ADD_SUB : result = add_sub_result[XLEN-1:0];
ALU_SLT : result = {31'b0, add_sub_result[XLEN]};
ALU_RSHIFT : result = rshift_result;
ALU_LSHIFT : result = lshift_result;
endcase
result = (alu_inputs.shifter_path ? shift_result : add_sub_result[31:0]);
result[31:1] &= {31{~alu_inputs.slt_path}};
result[0] = alu_inputs.slt_path ? add_sub_result[XLEN] : result[0];
end
////////////////////////////////////////////////////

View file

@ -1,5 +1,5 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -24,55 +24,21 @@ import taiga_config::*;
import taiga_types::*;
module barrel_shifter (
input logic[XLEN-1:0] shifter_input,
input logic[31:0] shifter_input,
input logic[4:0] shift_amount,
input logic arith,
input logic lshift,
output logic[XLEN-1:0] shifted_resultr,
output logic[XLEN-1:0] shifted_resultl
output logic[31:0] shifted_result
);
logic[XLEN-1:0] shiftx8, shiftx2, shiftx1;
logic[XLEN-1:0] preshifted_input;
//Bit flipping shared shifter
always_comb begin
foreach (shifter_input[i])
preshifted_input[i] = shifter_input[31-i];
end
always_comb begin//2
case ({lshift, shift_amount[0]})
0: shiftx1 = shifter_input[31:0];
1: shiftx1 = {{1{arith}},shifter_input[31:1]};
2: shiftx1 = preshifted_input[31:0];
3: shiftx1 = {{1{arith}},preshifted_input[31:1]};
endcase
end
always_comb begin//2
case (shift_amount[2:1])
0: shiftx2 = shiftx1[31:0];
1: shiftx2 = {{2{arith}},shiftx1[31:2]};
2: shiftx2 = {{4{arith}},shiftx1[31:4]};
3: shiftx2 = {{6{arith}},shiftx1[31:6]};
endcase
end
always_comb begin//8
case (shift_amount[4:3])
0: shiftx8 = shiftx2[31:0];
1: shiftx8 = {{8{arith}},shiftx2[31:8]};
2: shiftx8 = {{16{arith}},shiftx2[31:16]};
3: shiftx8 = {{24{arith}},shiftx2[31:24]};
endcase
end
assign shifted_resultr = shiftx8;
always_comb begin
foreach (shifter_input[i])
shifted_resultl[i] = shiftx8[31-i];
end
logic [62:0] shift_in;
logic [4:0] adjusted_shift_amount;
////////////////////////////////////////////////////
//Implementation
//Performs a 63-bit right shift
//Left shift is handled by placing the left shift in the upper portion shifted by (~shift_amount + 1)
//with the value initially shifted by one so that only the complement of the shift_amount is needed
assign shift_in = lshift ? {shifter_input, 31'b0} : {{31{arith}}, shifter_input};
assign adjusted_shift_amount = shift_amount ^ {5{lshift}};
assign shifted_result = 32'(shift_in >> adjusted_shift_amount);
endmodule

View file

@ -115,7 +115,8 @@ module csr_regs (
logic[COUNTER_W-1:0] mcycle;
logic[COUNTER_W-1:0] mtime;
logic[COUNTER_W-1:0] minst_ret;
logic [1:0] inst_ret_inc;
localparam INST_RET_INC_W = 2;
logic [INST_RET_INC_W-1:0] inst_ret_inc;
//write_logic
logic supervisor_write;
@ -452,16 +453,10 @@ endgenerate
//Timers and Counters
//Register increment for instructions completed
always_ff @(posedge clk) begin
if (rst) begin
if (rst)
inst_ret_inc <= 0;
end else begin
if (instruction_complete & instruction_issued_no_rd)
inst_ret_inc <= 2;
else if (instruction_complete | instruction_issued_no_rd)
inst_ret_inc <= 1;
else
inst_ret_inc <= 0;
end
else
inst_ret_inc <= INST_RET_INC_W'(instruction_complete) + INST_RET_INC_W'(instruction_issued_no_rd);
end
always_ff @(posedge clk) begin
@ -533,8 +528,6 @@ endgenerate
always_ff @(posedge clk) begin
if (read_regs)
selected_csr_r <= selected_csr;
else
selected_csr_r <= 0;
end
assign wb_csr = selected_csr_r;

View file

@ -49,8 +49,6 @@ module decode_and_issue (
output logic gc_flush_required,
output logic load_store_issue,
output logic store_issued_with_data,
output logic [31:0] store_data,
output logic instruction_issued,
output logic instruction_issued_no_rd,
@ -188,7 +186,7 @@ module decode_and_issue (
assign issue_valid = fb_valid & ti.id_available & ~gc_issue_hold & ~gc_fetch_flush;
assign operands_ready = ~rf_decode.rs1_conflict & ~rf_decode.rs2_conflict;
assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & (opcode_trim == STORE_T)));
assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & (opcode_trim == STORE_T) & load_store_forwarding_possible));
//All units share the same operand ready logic except load-store which has an internal forwarding path
always_comb begin
@ -227,7 +225,7 @@ module decode_and_issue (
endcase
end
assign alu_inputs.in1 = {(alu_rs1_data[XLEN-1] & ~fn3[0]), alu_rs1_data};//(fn3[0] is SLTU_fn3);
assign alu_inputs.in1 = {(rf_decode.rs1_data[XLEN-1] & ~fn3[0]), alu_rs1_data};//(fn3[0] is SLTU_fn3);
assign alu_inputs.in2 = {(alu_rs2_data[XLEN-1] & ~fn3[0]), alu_rs2_data};
assign alu_inputs.shifter_in = rf_decode.rs1_data;
assign alu_inputs.shift_amount = opcode[5] ? rf_decode.rs2_data[4:0] : rs2_addr;
@ -235,7 +233,8 @@ module decode_and_issue (
assign alu_inputs.arith = alu_rs1_data[XLEN-1] & fb.instruction[30];//shift in bit
assign alu_inputs.lshift = ~fn3[2];
assign alu_inputs.logic_op = fb.alu_logic_op;
assign alu_inputs.op = fb.alu_op;
assign alu_inputs.shifter_path = ~(opcode[2] | fn3 inside {SLT_fn3, SLTU_fn3, XOR_fn3, OR_fn3, AND_fn3, ADD_SUB_fn3}); //opcode[2] LUI AUIPC JAL JALR
assign alu_inputs.slt_path = ~opcode[2] & fn3 inside {SLT_fn3, SLTU_fn3};
////////////////////////////////////////////////////
//Load Store unit inputs
@ -247,6 +246,10 @@ module decode_and_issue (
logic load_reserve;
logic [4:0] amo_type;
logic load_store_forwarding_possible;
logic [31:0] last_use_was_load;
logic [4:0] last_load_rd;
assign amo_op = USE_AMO ? (opcode_trim == AMO_T) : 1'b0;
assign amo_type = fb.instruction[31:27];
assign store_conditional = (amo_type == AMO_SC);
@ -267,19 +270,29 @@ module decode_and_issue (
assign is_store = (opcode_trim == STORE_T) || (amo_op && store_conditional);//Used for LS unit and for ID tracking
assign ls_offset = opcode[5] ? {fb.instruction[31:25], fb.instruction[11:7]} : fb.instruction[31:20];
always_ff @(posedge clk) begin
if (instruction_issued)
last_use_was_load[future_rd_addr] <= unit_needed[LS_UNIT_WB_ID] & is_load;
end
always_ff @(posedge clk) begin
if (issue[LS_UNIT_WB_ID])
last_load_rd <= future_rd_addr;
end
assign load_store_forwarding_possible = last_use_was_load[rs2_addr] && (last_load_rd == rs2_addr);
assign ls_inputs.rs1 = rf_decode.rs1_data;
assign ls_inputs.rs2 = rf_decode.rs2_data;
assign ls_inputs.offset = ls_offset;
assign ls_inputs.pc = fb.pc;
assign ls_inputs.fn3 = amo_op ? LS_W_fn3 : fn3;
assign ls_inputs.load = is_load;
assign ls_inputs.store = is_store;
assign ls_inputs.load_store_forward = rf_decode.rs2_conflict;
assign ls_inputs.load_store_forward = rf_decode.rs2_conflict & load_store_forwarding_possible;
assign ls_inputs.store_forward_id = rf_decode.rs2_id;
//Store data to commit/store buffer
assign store_issued_with_data = ~ls_inputs.load_store_forward & issue[LS_UNIT_WB_ID];
assign store_data = rf_decode.rs2_data;
////////////////////////////////////////////////////
//Branch unit inputs
assign branch_inputs.rs1 = rf_decode.rs1_data;
@ -337,11 +350,8 @@ module decode_and_issue (
logic [4:0] prev_div_rs1_addr;
logic [4:0] prev_div_rs2_addr;
logic prev_div_result_valid;
logic prev_div_result_valid_r;
//If a subsequent div request uses the same inputs then
//don't rerun div operation
logic div_rd_overwrites_rs1_or_rs2;
logic rd_overwrites_previously_saved_rs1_or_rs2;
logic set_prev_div_result_valid;
logic clear_prev_div_result_valid;
logic current_op_resuses_rs1_rs2;
always_ff @(posedge clk) begin
@ -351,29 +361,23 @@ module decode_and_issue (
end
end
assign div_rd_overwrites_rs1_or_rs2 = (future_rd_addr == rs1_addr || future_rd_addr == rs2_addr);
assign rd_overwrites_previously_saved_rs1_or_rs2 = (future_rd_addr == prev_div_rs1_addr || future_rd_addr == prev_div_rs2_addr);
assign current_op_resuses_rs1_rs2 = (prev_div_rs1_addr == rs1_addr) && (prev_div_rs2_addr == rs2_addr);
assign set_prev_div_result_valid = unit_needed[DIV_UNIT_WB_ID];
always_comb begin
prev_div_result_valid = prev_div_result_valid_r;
if ((unit_needed[DIV_UNIT_WB_ID] & ~div_rd_overwrites_rs1_or_rs2))
prev_div_result_valid = 1;
else if ((unit_needed[DIV_UNIT_WB_ID] & div_rd_overwrites_rs1_or_rs2) | (uses_rd & rd_overwrites_previously_saved_rs1_or_rs2))
prev_div_result_valid = 0;
end
//If current div operation overwrites an input register OR any other instruction overwrites the last div operations input registers
assign clear_prev_div_result_valid = uses_rd & ((future_rd_addr == (unit_needed[DIV_UNIT_WB_ID] ? rs1_addr : prev_div_rs1_addr)) || (future_rd_addr == (unit_needed[DIV_UNIT_WB_ID] ? rs2_addr : prev_div_rs2_addr)));
always_ff @(posedge clk) begin
if (rst)
prev_div_result_valid_r <= 0;
prev_div_result_valid <= 0;
else if (instruction_issued)
prev_div_result_valid_r <= prev_div_result_valid;
prev_div_result_valid <= (set_prev_div_result_valid | prev_div_result_valid) & ~clear_prev_div_result_valid;
end
assign div_inputs.rs1 = rf_decode.rs1_data;
assign div_inputs.rs2 = rf_decode.rs2_data;
assign div_inputs.op = fn3[1:0];
assign div_inputs.reuse_result = prev_div_result_valid_r & current_op_resuses_rs1_rs2;
assign div_inputs.reuse_result = prev_div_result_valid & current_op_resuses_rs1_rs2;
end
endgenerate

View file

@ -247,7 +247,7 @@ module gc_unit(
second_cycle_flush <= gc_flush_required;
gc_fetch_pc_override <= gc_flush_required | second_cycle_flush | ls_exception_first_cycle;
gc_fetch_pc <= ls_exception_second_cycle ? trap_pc :
stage1.is_i_fence ? stage1.pc + 4 : //Could stall on dec_pc valid and use instead of another adder
//stage1.is_i_fence ? stage1.pc + 4 : //Could stall on dec_pc valid and use instead of another adder
csr_mepc;// gc_inputs.is_ret
end

View file

@ -51,8 +51,6 @@ module load_store_unit (
output instruction_id_t store_done_id,
output logic store_complete,
post_issue_forwarding_interface.unit store_forwarding,
input logic[31:0] csr_rd,
input instruction_id_t csr_id,
input logic csr_done,
@ -83,6 +81,8 @@ module load_store_unit (
logic issue_request;
logic load_complete;
logic [31:0] prev_load;
logic [31:0] virtual_address;
logic [3:0] be;
@ -105,6 +105,7 @@ module load_store_unit (
typedef struct packed{
logic [31:0] virtual_address;
logic [31:0] store_data;
logic [2:0] fn3;
logic load;
logic store;
@ -139,6 +140,7 @@ module load_store_unit (
ls_input_fifo (.fifo(input_fifo), .*);
assign fifo_inputs.virtual_address = ls_inputs.rs1 + 32'(signed'(ls_inputs.offset));
assign fifo_inputs.store_data = ls_inputs.rs2;
assign fifo_inputs.fn3 = ls_inputs.fn3;
assign fifo_inputs.load = ls_inputs.load;
assign fifo_inputs.store = ls_inputs.store;
@ -175,7 +177,7 @@ module load_store_unit (
//When switching units, ensure no outstanding loads so that there can be no timing collisions with results
assign unit_stall = (current_unit != last_unit) && load_attributes.valid;
assign store_ready = stage1.store & store_forwarding.data_valid;
assign store_ready = stage1.store & ((stage1.load_store_forward & ~load_attributes.valid) | ~stage1.load_store_forward);
assign issue_request = input_fifo.valid & units_ready & ~unit_stall & ~unaligned_addr & (~stage1.store | store_ready);
////////////////////////////////////////////////////
@ -237,9 +239,8 @@ module load_store_unit (
assign shared_inputs.be = be;
assign shared_inputs.fn3 = stage1.fn3;
//Store forwarding request
assign store_forwarding.id = stage1.load_store_forward ? stage1.store_forward_id : stage1.instruction_id;
assign stage1_raw_data = store_forwarding.data;
//Store forwarding
assign stage1_raw_data = stage1.load_store_forward ? prev_load : stage1.store_data;
//Input: ABCD
//Assuming aligned requests,
@ -340,6 +341,11 @@ module load_store_unit (
endcase
end
always_ff @ (posedge clk) begin
if (load_complete)
prev_load <= final_load_data;
end
////////////////////////////////////////////////////
//Output bank
assign wb.rd = ls_done ? final_load_data : csr_rd;

View file

@ -164,21 +164,6 @@ module pre_decode
data_in.alu_logic_op = opcode[2] ? ALU_LOGIC_ADD : data_in.alu_logic_op;
end
always_comb begin
case (fn3)
SLT_fn3 : data_in.alu_op = ALU_SLT;
SLTU_fn3 : data_in.alu_op = ALU_SLT;
SLL_fn3 : data_in.alu_op = ALU_LSHIFT;
XOR_fn3 : data_in.alu_op = ALU_ADD_SUB;
OR_fn3 : data_in.alu_op = ALU_ADD_SUB;
AND_fn3 : data_in.alu_op = ALU_ADD_SUB;
SRA_fn3 : data_in.alu_op = ALU_RSHIFT;
ADD_SUB_fn3 : data_in.alu_op = ALU_ADD_SUB;
endcase
//put LUI, AUIPC, JAL and JALR through adder path
data_in.alu_op = opcode[2] ? ALU_ADD_SUB : data_in.alu_op;
end
logic non_mul_div_arith_op;
assign non_mul_div_arith_op = ((opcode_trimmed == ARITH_T) && ~pre_decode_instruction[25]);//pre_decode_instruction[25] denotes multiply/divide instructions
assign data_in.alu_request = non_mul_div_arith_op || (opcode_trimmed inside {ARITH_IMM_T, AUIPC_T, LUI_T, JAL_T, JALR_T});

View file

@ -64,12 +64,14 @@ module register_file(
register[rf_wb.rd_addr] <= rf_wb.rd_data;
end
id_inuse inuse_mem (.*,
assign in_use_match = (rf_wb.id == in_use_by[rf_wb.rd_addr]) && valid_write;
reg_inuse inuse (.*,
.clr(1'b0),
.rs1_addr(rf_decode.rs1_addr),.rs2_addr(rf_decode.rs2_addr), .issued_rd_addr(rf_decode.future_rd_addr),
.retired_rd_addr(rf_wb.rd_addr),
.issued(rf_decode.instruction_issued),
.issue_id(rf_decode.id),
.retired_id(rf_wb.id),
.retired(valid_write),
.retired(in_use_match),
.rs1_inuse(rs1_inuse),
.rs2_inuse(rs2_inuse)
);

View file

@ -31,7 +31,7 @@ package taiga_config;
//Privileged ISA Options
//Enable Machine level privilege spec
parameter ENABLE_M_MODE = 1;
parameter ENABLE_M_MODE = 0;
//Enable Supervisor level privilege spec
parameter ENABLE_S_MODE = 0;

View file

@ -286,7 +286,6 @@ package taiga_types;
logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way;
logic alu_sub;
logic [1:0] alu_logic_op;
logic [1:0] alu_op;
logic alu_request;
alu_rs1_op_t alu_rs1_sel;
alu_rs2_op_t alu_rs2_sel;
@ -308,6 +307,8 @@ package taiga_types;
logic lshift;
logic [1:0] logic_op;
logic [1:0] op;
logic shifter_path;
logic slt_path;
} alu_inputs_t;
typedef struct packed {
@ -368,6 +369,7 @@ package taiga_types;
typedef struct packed{
logic [XLEN-1:0] rs1;
logic [XLEN-1:0] rs2;
logic [11:0] offset;
logic [2:0] fn3;
logic load;

View file

@ -39,10 +39,6 @@ module write_back(
input instruction_id_t store_done_id,
input logic store_complete,
post_issue_forwarding_interface.wb store_forwarding,
input logic store_issued_with_data,
input logic [31:0] store_data,
//Trace signals
output logic tr_wb_mux_contention
@ -55,8 +51,7 @@ module write_back(
//aliases for write-back-interface signals
instruction_id_t unit_instruction_id [NUM_WB_UNITS-1:0];
logic [NUM_WB_UNITS-1:0] unit_done;
//Force usage of f7 muxes
(* keep = "true" *) logic [XLEN-1:0] unit_rd [2*NUM_WB_UNITS-1:0];
logic [XLEN-1:0] unit_rd [NUM_WB_UNITS-1:0];
//Per-ID muxes for commit buffer
logic [$clog2(NUM_WB_UNITS)-1:0] id_unit_select [MAX_INFLIGHT_COUNT-1:0];
logic [$clog2(NUM_WB_UNITS)-1:0] id_unit_select_r [MAX_INFLIGHT_COUNT-1:0];
@ -87,11 +82,6 @@ module write_back(
assign unit_done[i] = unit_wb[i].done;
assign unit_rd[i] = unit_wb[i].rd;
end
for (i=NUM_WB_UNITS; i< 2*NUM_WB_UNITS; i++) begin
assign unit_rd[i] = store_data;
end
endgenerate
////////////////////////////////////////////////////
@ -113,7 +103,6 @@ module write_back(
//Set unit_ID for each ID as they are issued
//If ID is not in use, use the current issue_unit_id value
//This is used to support single cycle units, such as the ALU
//Stores are not tracked for id_inuse as their data is placed in the buffer at issue time
always_comb begin
id_issued_one_hot = 0;
id_issued_one_hot[ti.issue_id] = ti.issued & ~ti.inflight_packet.is_store;
@ -131,16 +120,10 @@ module write_back(
//Writeback Buffer
//Mux outputs of units based on IDs
//If ID is done write result to buffer
logic [MAX_INFLIGHT_COUNT-1:0] store_mux;
always_comb begin
store_mux = 0;
store_mux[ti.issue_id] = store_issued_with_data;
end
generate for (i=0; i< MAX_INFLIGHT_COUNT; i++) begin
always_ff @ (posedge clk) begin
if (id_writing_to_buffer[i] |store_mux[i])
results_by_id[i] <= unit_rd[{store_mux[i],id_unit_select[i]}];
if (id_writing_to_buffer[i])
results_by_id[i] <= unit_rd[id_unit_select[i]];
end
end endgenerate
@ -154,11 +137,6 @@ module write_back(
id_inuse <= (id_issued_one_hot | id_inuse) & ~id_writing_to_buffer;
end
//As IDs are freed for reuse in repeating order, the results will not be overwritten before the instruction
//needing them has itself completed
assign store_forwarding.data_valid = ~id_inuse[store_forwarding.id];
assign store_forwarding.data = results_by_id[store_forwarding.id];
////////////////////////////////////////////////////
//ID Tracking
//Provides ordering of IDs, ID for issue and oldest ID for committing to register file

View file

@ -96,7 +96,7 @@ build_coremark:
.PHONY: run_coremark_verilator
run_coremark_verilator :
./build_taiga_sim/Vtaiga_local_mem "/dev/null" "/dev/null" $(TAIGA_DIR)/tools/coremark.hw_init $(VERILATOR_TRACE_FILE) >> $@
./build_taiga_sim/Vtaiga_local_mem "/dev/null" "/dev/null" $(TAIGA_DIR)/tools/coremark.hw_init $(VERILATOR_TRACE_FILE) > $@
#Benchmarks already built

View file

@ -94,6 +94,7 @@
../core/decode_and_issue.sv
../core/id_inuse.sv
../core/reg_inuse.sv
../core/register_file.sv
../core/id_tracking.sv