Merge branch 'id-rework' into 'master'

Id rework

See merge request sfu-rcl/Taiga-dev!5
This commit is contained in:
Eric Matthews 2020-06-16 23:59:29 +00:00
commit 7d35b3de94
49 changed files with 1767 additions and 2939 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -29,7 +29,7 @@ module alu_unit(
input logic rst,
unit_issue_interface.unit issue,
input alu_inputs_t alu_inputs,
output unit_writeback_t wb
unit_writeback_interface.unit wb
);
logic[XLEN:0] add_sub_result;
@ -79,8 +79,8 @@ module alu_unit(
//Output
assign issue.ready = 1;
assign wb.rd = result;
assign wb.done = issue.new_request;
assign wb.id = issue.instruction_id;
assign wb.done = issue.possible_issue;
assign wb.id = issue.id;
////////////////////////////////////////////////////
//Assertions

View file

@ -29,6 +29,7 @@ module branch_comparator(
input logic less_than,
input logic [31:0] a,
input logic [31:0] b,
input logic xor_result,
output logic result
);
@ -45,6 +46,7 @@ module branch_comparator(
logic [15:0] sub_eq_a;
logic [15:0] sub_toss;
logic carry_out;
logic eq_carry_in;
logic ls_carry_in;
@ -87,6 +89,11 @@ module branch_comparator(
sub_eq_a[i] = (eq_a[2*i] | eq_b[2*i]) & (eq_a[2*i + 1] | eq_b[2*i + 1]); //bits are equal
end
//branch_inputs.fn3[0] is xor_result and selects the inverse result
//i.e. (not eq, greater than). Included here to reduce the number of inputs
//in the branch target adder
sub_ls_a[15] ^= xor_result;
sub_eq_a[15] ^= xor_result;
end

View file

@ -28,6 +28,8 @@ module branch_predictor (
input logic clk,
input logic rst,
branch_predictor_interface.branch_predictor bp,
output branch_metadata_t branch_metadata_if,
input branch_metadata_t branch_metadata_ex,
input branch_results_t br_results
);
@ -54,7 +56,9 @@ module branch_predictor (
typedef struct packed {
logic valid;
logic [BTAG_W-1:0] tag;
logic use_ras;
logic is_branch;
logic is_return;
logic is_call;
branch_predictor_metadata_t metadata;
} branch_table_entry_t;
@ -62,24 +66,25 @@ module branch_predictor (
branch_table_entry_t if_entry [BRANCH_PREDICTOR_WAYS-1:0];
branch_table_entry_t ex_entry;
logic branch_predictor_direction_changed;
logic [31:0] new_jump_addr;
logic [31:0] predicted_pc [BRANCH_PREDICTOR_WAYS-1:0];
logic [BRANCH_PREDICTOR_WAYS-1:0] tag_matches;
logic [BRANCH_PREDICTOR_WAYS-1:0] replacement_way;
logic [BRANCH_PREDICTOR_WAYS-1:0] update_way;
logic [BRANCH_PREDICTOR_WAYS-1:0] tag_update_way;
logic [BRANCH_PREDICTOR_WAYS-1:0] target_update_way;
logic [$clog2(BRANCH_PREDICTOR_WAYS > 1 ? BRANCH_PREDICTOR_WAYS : 2)-1:0] hit_way;
logic tag_match;
logic use_predicted_pc;
/////////////////////////////////////////
cycler #(BRANCH_PREDICTOR_WAYS) replacement_policy (.*, .en(1'b1), .one_hot(replacement_way));
genvar i;
generate if (USE_BRANCH_PREDICTOR)
for (i=0; i<BRANCH_PREDICTOR_WAYS; i++) begin : branch_tag_banks
branch_predictor_ram #(.C_DATA_WIDTH($bits(branch_table_entry_t)), .C_DEPTH(BRANCH_TABLE_ENTRIES))
tag_bank (.*,
.write_addr(br_results.pc_ex[2 +: BRANCH_ADDR_W]), .write_en(update_way[i]), .write_data(ex_entry),
.write_addr(br_results.pc_ex[2 +: BRANCH_ADDR_W]), .write_en(tag_update_way[i]), .write_data(ex_entry),
.read_addr(bp.next_pc[2 +: BRANCH_ADDR_W]), .read_en(bp.new_mem_request), .read_data(if_entry[i]));
end
endgenerate
@ -88,7 +93,7 @@ module branch_predictor (
for (i=0; i<BRANCH_PREDICTOR_WAYS; i++) begin : branch_table_banks
branch_predictor_ram #(.C_DATA_WIDTH(32), .C_DEPTH(BRANCH_TABLE_ENTRIES))
addr_table (.*,
.write_addr(br_results.pc_ex[2 +: BRANCH_ADDR_W]), .write_en(update_way[i]), .write_data(new_jump_addr),
.write_addr(br_results.pc_ex[2 +: BRANCH_ADDR_W]), .write_en(target_update_way[i]), .write_data(br_results.new_pc),
.read_addr(bp.next_pc[2 +: BRANCH_ADDR_W]), .read_en(bp.new_mem_request), .read_data(predicted_pc[i]));
end
endgenerate
@ -99,6 +104,8 @@ module branch_predictor (
end
endgenerate
////////////////////////////////////////////////////
//Instruction Fetch Response
generate if (BRANCH_PREDICTOR_WAYS > 1)
one_hot_to_integer #(BRANCH_PREDICTOR_WAYS) hit_way_conv (.*, .one_hot(tag_matches), .int_out(hit_way));
else
@ -106,38 +113,51 @@ module branch_predictor (
endgenerate
assign tag_match = |tag_matches;
assign bp.predicted_pc = predicted_pc[hit_way];
assign bp.metadata = if_entry[hit_way].metadata;
assign bp.use_ras = if_entry[hit_way].use_ras;
assign bp.update_way = tag_matches;
assign use_predicted_pc = USE_BRANCH_PREDICTOR & tag_match;
//Predict next branch to same location/direction as current
//Predicted PC and whether the prediction is valid
assign bp.predicted_pc = predicted_pc[hit_way];
assign bp.use_prediction = use_predicted_pc;
assign bp.is_branch = if_entry[hit_way].is_branch;
assign bp.is_return = if_entry[hit_way].is_return;
assign bp.is_call = if_entry[hit_way].is_call;
////////////////////////////////////////////////////
//Execution stage update
assign ex_entry.valid = 1;
assign ex_entry.tag = get_tag(br_results.pc_ex);
assign ex_entry.use_ras = br_results.is_return_ex;
assign ex_entry.is_branch = br_results.is_branch_ex;
assign ex_entry.is_return = br_results.is_return_ex;
assign ex_entry.is_call = br_results.is_call_ex;
assign new_jump_addr = ex_entry.metadata[1] ? br_results.jump_pc : br_results.njump_pc;
//2-bit saturating counter
always_comb begin
case(br_results.branch_ex_metadata)
case(branch_metadata_ex.branch_predictor_metadata)
2'b00 : ex_entry.metadata = br_results.branch_taken ? 2'b01 : 2'b00;
2'b01 : ex_entry.metadata = br_results.branch_taken ? 2'b10 : 2'b00;
2'b10 : ex_entry.metadata = br_results.branch_taken ? 2'b11 : 2'b01;
2'b11 : ex_entry.metadata = br_results.branch_taken ? 2'b11 : 2'b10;
endcase
if (~br_results.branch_prediction_used)
if (~branch_metadata_ex.branch_prediction_used)
ex_entry.metadata = br_results.branch_taken ? 2'b11 : 2'b00;
end
assign branch_predictor_direction_changed =
(~branch_metadata_ex.branch_prediction_used) |
(branch_metadata_ex.branch_predictor_metadata[1] ^ ex_entry.metadata[1]);
assign update_way = {BRANCH_PREDICTOR_WAYS{br_results.branch_ex}} & (br_results.branch_prediction_used ? br_results.bp_update_way : replacement_way);
assign tag_update_way = {BRANCH_PREDICTOR_WAYS{br_results.branch_ex}} & (branch_metadata_ex.branch_predictor_update_way);
assign target_update_way = {BRANCH_PREDICTOR_WAYS{branch_predictor_direction_changed}} & tag_update_way;
////////////////////////////////////////////////////
//Target PC if branch flush occured
assign bp.branch_flush_pc = br_results.new_pc;
assign bp.branch_flush_pc = br_results.branch_taken ? br_results.jump_pc : br_results.njump_pc;
////////////////////////////////////////////////////
//Instruction Fetch metadata
cycler #(BRANCH_PREDICTOR_WAYS) replacement_policy (.*, .en(1'b1), .one_hot(replacement_way));
generate if (USE_BRANCH_PREDICTOR) begin
assign bp.use_prediction = tag_match;
end else begin
assign bp.use_prediction = 0;
end endgenerate
assign branch_metadata_if.branch_predictor_metadata = if_entry[hit_way].metadata;
assign branch_metadata_if.branch_prediction_used = use_predicted_pc;
assign branch_metadata_if.branch_predictor_update_way = tag_match ? tag_matches : replacement_way;
endmodule

View file

@ -37,16 +37,20 @@ module branch_predictor_ram
input logic [C_DATA_WIDTH-1:0] write_data,
output logic [C_DATA_WIDTH-1:0] read_data
);
(* ram_style = "block", rw_addr_collision = "no" *)logic [C_DATA_WIDTH-1:0] branch_ram [C_DEPTH-1:0];
(* ram_style = "block" *)logic [C_DATA_WIDTH-1:0] branch_ram [C_DEPTH-1:0];
//implementation
////////////////////////////////////////////////////
//Write first RAM needed to handle the following potential collision:
//An update from a miss occurs on the same cycle as a subsequent fetch to the same instruction
initial branch_ram = '{default: 0};
always_ff @(posedge clk) begin
if (write_en)
branch_ram[write_addr] <= write_data;
end
always_ff @(posedge clk) begin
// synthesis translate_off
if (write_en)
branch_ram[write_addr] = write_data;//Forcing write first behaviour for simulation
// synthesis translate_on
if (read_en)
read_data <= branch_ram[read_addr];
end

View file

@ -34,6 +34,10 @@ module branch_unit(
ras_interface.branch_unit ras,
output logic branch_flush,
output logic branch_complete,
output id_t branch_id,
input branch_metadata_t branch_metadata_ex,
output logic potential_branch_exception,
output logic branch_exception_is_jump,
output exception_packet_t br_exception,
@ -47,41 +51,23 @@ module branch_unit(
logic branch_issued_r;
logic[19:0] jal_imm;
logic[11:0] jalr_imm;
logic[11:0] br_imm;
logic [31:0] pc_offset;
logic [31:0] jump_base;
logic [31:0] jump_pc_dec;
logic result;
logic result_ex;
logic [2:0] fn3_ex;
logic jump_ex;
//Branch Predictor
logic branch_taken;
logic branch_correctly_taken;
logic branch_correclty_not_taken;
logic miss_predict;
logic branch_taken_ex;
id_t id_ex;
logic [31:0] new_pc;
logic [31:0] new_pc_ex;
logic [31:0] pc_ex;
logic [31:0] jump_pc;
logic [31:0] njump_pc;
logic [1:0] branch_metadata;
logic branch_prediction_used;
logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way;
logic instruction_is_completing;
//RAS
logic is_call;
logic is_return;
//implementation
logic jal_jalr_ex;
////////////////////////////////////////////////////
//Implementation
//Only stall condition is if the following instruction is not valid for pc comparisons.
//If the next instruction isn't valid, no instruction can be issued anyways, so it
//is safe to hardcode this to one.
@ -91,122 +77,93 @@ module branch_unit(
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) branch_issued_m (
.clk, .rst,
.set(issue.new_request),
.clr(branch_inputs.dec_pc_valid | br_exception.valid),
.clr(branch_inputs.issue_pc_valid | br_exception.valid),
.result(branch_issued_r)
);
assign instruction_is_completing = branch_issued_r & branch_inputs.dec_pc_valid;
//To determine if the branch was predicted correctly we need to wait until the
//subsequent instruction has reached the issue stage
assign instruction_is_completing = branch_issued_r & branch_inputs.issue_pc_valid;
////////////////////////////////////////////////////
//Branch/Jump target determination
//Branch comparison and final address calculation
//are performed in the issue stage
branch_comparator bc (
.use_signed(branch_inputs.use_signed),
.less_than(branch_inputs.fn3[2]),
.a(branch_inputs.rs1),
.b(branch_inputs.rs2),
.xor_result(branch_inputs.fn3[0]),
.result(result)
);
assign branch_taken = (~jump_ex & (result_ex ^ fn3_ex[0])) | jump_ex;
assign branch_taken = result | branch_inputs.jalr | branch_inputs.jal;
assign jal_imm = {branch_inputs.instruction[31], branch_inputs.instruction[19:12], branch_inputs.instruction[20], branch_inputs.instruction[30:21]};
assign jalr_imm = branch_inputs.instruction[31:20];
assign br_imm = {branch_inputs.instruction[31], branch_inputs.instruction[7], branch_inputs.instruction[30:25], branch_inputs.instruction[11:8]};
always_comb begin
unique if (branch_inputs.jalr)
pc_offset = 32'(signed'(jalr_imm));
else if (branch_inputs.jal)
pc_offset = 32'(signed'({jal_imm, 1'b0}));
else
pc_offset = 32'(signed'({br_imm, 1'b0}));
end
always_comb begin
if (branch_inputs.jalr)
jump_base = branch_inputs.rs1;
else
jump_base = branch_inputs.dec_pc;
end
assign jump_pc_dec = jump_base + pc_offset;
assign jump_base = branch_inputs.jalr ? branch_inputs.rs1 : branch_inputs.issue_pc;
assign new_pc = jump_base + (branch_taken ? 32'(signed'(branch_inputs.pc_offset)) : 4);
always_ff @(posedge clk) begin
if (instruction_is_completing | ~branch_issued_r) begin
fn3_ex <= branch_inputs.fn3;
result_ex <= result;
jump_ex <= (branch_inputs.jal | branch_inputs.jalr);
branch_taken_ex <= branch_taken;
new_pc_ex[31:1] <= new_pc[31:1];
new_pc_ex[0] <= new_pc[0] & ~branch_inputs.jalr;
id_ex <= issue.id;
jal_jalr_ex <= branch_inputs.jal | branch_inputs.jalr;
end
end
////////////////////////////////////////////////////
//Exception support
instruction_id_t jmp_instruction_id;
id_t jmp_id;
generate if (ENABLE_M_MODE) begin
always_ff @(posedge clk) begin
if (instruction_is_completing | ~branch_issued_r)
jmp_instruction_id <= issue.instruction_id;
if (instruction_is_completing | ~branch_issued_r) begin
jmp_id <= issue.id;
branch_exception_is_jump <= (branch_inputs.jal | branch_inputs.jalr);
end
end
assign potential_branch_exception = jump_pc_dec[1] & issue.new_request;
assign br_exception.valid = (jump_pc[1] & branch_taken) & branch_issued_r;
assign potential_branch_exception = new_pc[1] & issue.new_request;
assign br_exception.valid = new_pc_ex[1] & branch_taken_ex & branch_issued_r;
assign br_exception.code = INST_ADDR_MISSALIGNED;
assign br_exception.pc = pc_ex;
assign br_exception.tval = jump_pc;
assign br_exception.id = jmp_instruction_id;
assign branch_exception_is_jump = (branch_inputs.jal | branch_inputs.jalr);
assign br_exception.tval = new_pc_ex;
assign br_exception.id = jmp_id;
end
endgenerate
////////////////////////////////////////////////////
//ID Management
assign branch_complete = instruction_is_completing & (~jal_jalr_ex) & (~br_exception.valid);
assign branch_id = id_ex;
////////////////////////////////////////////////////
//RAS support
assign ras.branch_retired = branch_complete & branch_metadata_ex.branch_prediction_used;
////////////////////////////////////////////////////
//Predictor support
logic is_return;
logic is_call;
always_ff @(posedge clk) begin
if (instruction_is_completing | ~branch_issued_r) begin
pc_ex <= branch_inputs.dec_pc;
jump_pc <= {jump_pc_dec[31:1], 1'b0};
njump_pc <= branch_inputs.dec_pc + 4;
branch_metadata <= branch_inputs.branch_metadata;
branch_prediction_used <= branch_inputs.branch_prediction_used;
bp_update_way <= branch_inputs.bp_update_way;
is_return <= branch_inputs.is_return;
is_call <= branch_inputs.is_call;
pc_ex <= branch_inputs.issue_pc;
end
end
assign br_results.pc_ex = pc_ex;
assign br_results.jump_pc = jump_pc;
assign br_results.njump_pc = njump_pc;
assign br_results.branch_ex_metadata = branch_metadata;
assign br_results.branch_taken = branch_taken;
assign br_results.new_pc = new_pc_ex;
assign br_results.branch_taken = branch_taken_ex;
assign br_results.branch_ex = instruction_is_completing;
assign br_results.is_branch_ex = ~jal_jalr_ex;
assign br_results.is_return_ex = is_return;
assign br_results.branch_prediction_used = branch_prediction_used;
assign br_results.bp_update_way = bp_update_way;
assign br_results.is_call_ex = is_call;
assign branch_flush = instruction_is_completing && (branch_inputs.issue_pc[31:1] != new_pc_ex[31:1]);
assign branch_correctly_taken = {branch_taken, branch_inputs.dec_pc[31:1]} == {1'b1, jump_pc[31:1]};
assign branch_correclty_not_taken = {branch_taken, branch_inputs.dec_pc[31:1]} == {1'b0, njump_pc[31:1]};
assign miss_predict = ~(branch_correctly_taken | branch_correclty_not_taken);
assign branch_flush = USE_BRANCH_PREDICTOR ?
instruction_is_completing & miss_predict:
instruction_is_completing & branch_taken;
////////////////////////////////////////////////////
//RAS support
generate if (USE_BRANCH_PREDICTOR) begin
always_ff @(posedge clk) begin
if (instruction_is_completing | ~branch_issued_r) begin
is_call <= branch_inputs.is_call;
is_return <= branch_inputs.is_return;
end
end
assign ras.push = instruction_is_completing & is_call;
assign ras.pop = instruction_is_completing & is_return;
assign ras.new_addr = njump_pc;
end
endgenerate
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
@ -217,10 +174,10 @@ module branch_unit(
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin
assign tr_branch_correct = instruction_is_completing & ~is_return & ~miss_predict;
assign tr_branch_misspredict = instruction_is_completing & ~is_return & miss_predict;
assign tr_return_correct = instruction_is_completing & is_return & ~miss_predict;
assign tr_return_misspredict = instruction_is_completing & is_return & miss_predict;
assign tr_branch_correct = instruction_is_completing & ~is_return & ~branch_flush;
assign tr_branch_misspredict = instruction_is_completing & ~is_return & branch_flush;
assign tr_return_correct = instruction_is_completing & is_return & ~branch_flush;
assign tr_return_misspredict = instruction_is_completing & is_return & branch_flush;
end
endgenerate

View file

@ -39,10 +39,7 @@ module csr_regs
input exception_packet_t gc_exception,
output exception_packet_t csr_exception,
output logic [1:0] current_privilege,
input logic gc_supress_writeback,
//Decode
input logic instruction_issued_no_rd,
input logic [31:0] exception_pc,
//exception_control
input logic mret,
@ -57,8 +54,7 @@ module csr_regs
mmu_interface.csr dmmu,
//WB
input logic instruction_complete,
input logic [$clog2(MAX_COMPLETE_COUNT)-1:0] retire_inc,
//External
input logic interrupt,
@ -126,14 +122,13 @@ module csr_regs
//convert addr into packed struct form
assign csr_addr = csr_inputs.csr_addr;
assign supervisor_write = commit && (csr_addr.rw_bits != CSR_READ_ONLY && csr_addr.privilege == SUPERVISOR_PRIVILEGE);
assign machine_write = commit && (csr_addr.rw_bits != CSR_READ_ONLY && csr_addr.privilege == MACHINE_PRIVILEGE);
////////////////////////////////////////////////////
//Exception Check
assign privilege_exception = new_request & (csr_addr.privilege > privilege_level);
assign supervisor_write = commit && !privilege_exception && (csr_addr.rw_bits != CSR_READ_ONLY && csr_addr.privilege == SUPERVISOR_PRIVILEGE);
assign machine_write = commit && !privilege_exception && (csr_addr.rw_bits != CSR_READ_ONLY && csr_addr.privilege == MACHINE_PRIVILEGE);
logic illegal_instruction;
assign illegal_instruction = invalid_addr | privilege_exception;
assign csr_exception.valid = new_request & illegal_instruction;
assign csr_exception.valid = new_request & (invalid_addr | privilege_exception);
always_comb begin
case (csr_inputs.csr_op)
@ -382,7 +377,7 @@ generate if (ENABLE_M_MODE) begin
always_ff @(posedge clk) begin
mepc[1:0] <= '0;
if (mwrite_decoder[MEPC[7:0]] | gc_exception.valid)
mepc[XLEN-1:2] <= gc_exception.valid ? gc_exception.pc[XLEN-1:2] : updated_csr[XLEN-1:2];
mepc[XLEN-1:2] <= gc_exception.valid ? exception_pc[XLEN-1:2] : updated_csr[XLEN-1:2];
end
assign csr_mepc = mepc;
@ -490,22 +485,13 @@ endgenerate
////////////////////////////////////////////////////
//Timers and Counters
//Register increment for instructions completed
logic instruction_retired;
assign instruction_retired = instruction_complete & ~gc_supress_writeback;
always_ff @(posedge clk) begin
if (rst)
inst_ret_inc <= 0;
else
inst_ret_inc <= INST_RET_INC_W'(instruction_retired) + INST_RET_INC_W'(instruction_issued_no_rd);
end
always_ff @(posedge clk) begin
if (rst) begin
mcycle <= 0;
minst_ret <= 0;
end else begin
mcycle <= mcycle + 1;
minst_ret <= minst_ret + COUNTER_W'(inst_ret_inc);
minst_ret <= minst_ret + COUNTER_W'(retire_inc);
end
end

View file

@ -28,12 +28,12 @@ module decode_and_issue (
input logic clk,
input logic rst,
output logic pre_decode_pop,
input logic fb_valid,
input fetch_buffer_packet_t fb,
//ID Management
input decode_packet_t decode,
output logic decode_advance,
tracking_interface.decode ti,
register_file_issue_interface.issue rf_issue,
output issue_packet_t issue,
input logic [31:0] rs_data [REGFILE_READ_PORTS],
output alu_inputs_t alu_inputs,
output load_store_inputs_t ls_inputs,
@ -43,17 +43,20 @@ module decode_and_issue (
output div_inputs_t div_inputs,
unit_issue_interface.decode unit_issue [NUM_UNITS-1:0],
input logic potential_branch_exception,
output logic alu_issued,
input logic gc_issue_hold,
input logic gc_fetch_flush,
input logic gc_issue_flush,
output logic gc_flush_required,
output logic load_store_issue,
//ID Management
input logic rs_inuse [REGFILE_READ_PORTS],
input id_t rs_id [REGFILE_READ_PORTS],
input logic rs_id_inuse [REGFILE_READ_PORTS],
output logic instruction_issued,
output logic instruction_issued_no_rd,
output logic instruction_issued_with_rd,
output logic illegal_instruction,
//Trace signals
@ -87,13 +90,13 @@ module decode_and_issue (
logic uses_rs1;
logic uses_rs2;
logic uses_rd;
logic rd_zero;
logic [4:0] rs1_addr;
logic [4:0] rs2_addr;
logic [4:0] rd_addr;
logic nop;
logic csr_imm_op;
logic environment_op;
logic issue_valid;
logic operands_ready;
@ -103,141 +106,202 @@ module decode_and_issue (
logic [NUM_WB_UNITS-1:0] unit_needed_for_id_gen;
logic [WB_UNITS_WIDTH-1:0] unit_needed_for_id_gen_int;
logic [NUM_UNITS-1:0] unit_needed;
logic [NUM_UNITS-1:0] unit_needed_issue_stage;
logic [NUM_UNITS-1:0] unit_ready;
logic [NUM_UNITS-1:0] issue_ready;
logic [NUM_UNITS-1:0] issue;
logic [NUM_UNITS-1:0] issue_to;
logic illegal_instruction_pattern;
logic issue_stage_ready;
logic rs1_conflict;
logic rs2_conflict;
genvar i;
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
//Instruction Buffer
assign pre_decode_pop = instruction_issued;
assign uses_rs1 = fb.uses_rs1;
assign uses_rs2 = fb.uses_rs2;
assign uses_rd = fb.uses_rd;
assign issue_stage_ready = (~issue.stage_valid) | instruction_issued;
assign decode_advance = decode.valid & issue_stage_ready;
//Instruction aliases
assign opcode = fb.instruction[6:0];
assign opcode = decode.instruction[6:0];
assign opcode_trim = opcode[6:2];
assign fn3 = fb.instruction[14:12];
assign rs1_addr = fb.instruction[19:15];
assign rs2_addr = fb.instruction[24:20];
assign rd_addr = fb.instruction[11:7];
assign fn3 = decode.instruction[14:12];
assign rs1_addr = decode.instruction[19:15];
assign rs2_addr = decode.instruction[24:20];
assign rd_addr = decode.instruction[11:7];
assign rd_zero = ~|rd_addr;
assign nop = (opcode_trim inside {LUI_T, AUIPC_T, ARITH_T, ARITH_IMM_T} && rd_zero);
assign csr_imm_op = (opcode_trim == SYSTEM_T) && fn3[2];
assign environment_op = (opcode_trim == SYSTEM_T) && (fn3 == 0);
////////////////////////////////////////////////////
//Register File interface inputs
assign rf_issue.rs1_addr = rs1_addr;
assign rf_issue.rs2_addr = rs2_addr;
assign rf_issue.rd_addr = rd_addr;
assign rf_issue.instruction_issued = instruction_issued_with_rd & ~rd_zero;
assign rf_issue.id = ti.issue_id;
assign rf_issue.uses_rs1 = uses_rs1;
assign rf_issue.uses_rs2 = uses_rs2;
//Register File Support
assign uses_rs1 = !(opcode_trim inside {LUI_T, AUIPC_T, JAL_T, FENCE_T} || csr_imm_op || environment_op);
assign uses_rs2 = opcode_trim inside {BRANCH_T, STORE_T, ARITH_T, AMO_T};
assign uses_rd = !(opcode_trim inside {BRANCH_T, STORE_T, FENCE_T} || environment_op);
////////////////////////////////////////////////////
//Tracking Interface
//CSR results are passed through the load/store output
always_comb begin
unit_needed_for_id_gen = unit_needed[NUM_WB_UNITS-1:0];
unit_needed_for_id_gen[LS_UNIT_WB_ID] |= (unit_needed[GC_UNIT_ID] & is_csr);
always_ff @(posedge clk) begin
if (rst | gc_fetch_flush)
issue.stage_valid <= 0;
else if (issue_stage_ready)
issue.stage_valid <= decode.valid;
end
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
issue.pc <= decode.pc;
issue.instruction <= decode.instruction;
issue.fn3 <= fn3;
issue.opcode <= opcode;
issue.rs_addr[RS1] <= rs1_addr;
issue.rs_addr[RS2] <= rs2_addr;
issue.rd_addr <= rd_addr;
issue.id <= decode.id;
issue.uses_rs1 <= uses_rs1;
issue.uses_rs2 <= uses_rs2;
issue.uses_rd <= uses_rd;
end
end
one_hot_to_integer #(NUM_WB_UNITS) unit_id_gen (.*, .one_hot(unit_needed_for_id_gen), .int_out(unit_needed_for_id_gen_int));
assign ti.inflight_packet.rd_addr = rd_addr;
assign ti.inflight_packet.is_store = is_store;
assign ti.issued = instruction_issued & (uses_rd | unit_needed[LS_UNIT_WB_ID]);
assign ti.issue_unit_id = unit_needed_for_id_gen_int;
assign ti.exception_possible = opcode_trim inside {LOAD_T, STORE_T, AMO_T};
////////////////////////////////////////////////////
//Unit Determination
assign mult_div_op = fb.instruction[25];
assign unit_needed[BRANCH_UNIT_ID] = opcode_trim inside {BRANCH_T, JAL_T, JALR_T};
assign unit_needed[ALU_UNIT_WB_ID] = fb.alu_request;
assign unit_needed[ALU_UNIT_WB_ID] = ((opcode_trim == ARITH_T) && ~decode.instruction[25]) || (opcode_trim inside {ARITH_IMM_T, AUIPC_T, LUI_T, JAL_T, JALR_T});
assign unit_needed[LS_UNIT_WB_ID] = opcode_trim inside {LOAD_T, STORE_T, AMO_T};
assign unit_needed[GC_UNIT_ID] = opcode_trim inside {SYSTEM_T, FENCE_T};
assign mult_div_op = (opcode_trim == ARITH_T) && decode.instruction[25];
generate if (USE_MUL)
assign unit_needed[MUL_UNIT_WB_ID] = (opcode_trim == ARITH_T) && mult_div_op && ~fn3[2];
assign unit_needed[MUL_UNIT_WB_ID] = mult_div_op && ~fn3[2];
endgenerate
generate if (USE_DIV)
assign unit_needed[DIV_UNIT_WB_ID] = (opcode_trim == ARITH_T) && mult_div_op && fn3[2];
assign unit_needed[DIV_UNIT_WB_ID] = mult_div_op && fn3[2];
endgenerate
always_ff @(posedge clk) begin
if (issue_stage_ready)
unit_needed_issue_stage <= unit_needed;
end
////////////////////////////////////////////////////
//Unit ready
generate
for (i=0; i<NUM_UNITS; i++) begin
assign unit_ready[i] = unit_issue[i].ready;
end
endgenerate
generate for (i=0; i<NUM_UNITS; i++) begin
assign unit_ready[i] = unit_issue[i].ready;
end endgenerate
////////////////////////////////////////////////////
//Issue Determination
assign issue_valid = fb_valid & ti.id_available & ~gc_issue_hold & ~gc_fetch_flush & ~illegal_instruction_pattern;
assign issue_valid = issue.stage_valid & ~gc_issue_hold & ~gc_fetch_flush;
assign operands_ready = ~rf_issue.rs1_conflict & ~rf_issue.rs2_conflict;
assign rs1_conflict = rs_inuse[RS1] & rs_id_inuse[RS1] & issue.uses_rs1;
assign rs2_conflict = rs_inuse[RS2] & rs_id_inuse[RS2] & issue.uses_rs2;
assign operands_ready = ~rs1_conflict & ~rs2_conflict;
//All units share the same operand ready logic except load-store which has an internal forwarding path
always_comb begin
unit_operands_ready = {NUM_UNITS{operands_ready}};
unit_operands_ready[LS_UNIT_WB_ID] = ~rf_issue.rs1_conflict;
unit_operands_ready[LS_UNIT_WB_ID] = ~rs1_conflict;
end
assign issue_ready = unit_needed & unit_ready;
assign issue = {NUM_UNITS{issue_valid}} & unit_operands_ready & issue_ready;
assign issue_ready = unit_needed_issue_stage & unit_ready;
assign issue_to = {NUM_UNITS{issue_valid}} & unit_operands_ready & issue_ready;
assign instruction_issued = issue_valid & |(unit_operands_ready & issue_ready);
assign instruction_issued_no_rd = instruction_issued & ~uses_rd;
assign instruction_issued_with_rd = instruction_issued & uses_rd;
//Decode outputs
assign load_store_issue = issue[LS_UNIT_WB_ID];
////////////////////////////////////////////////////
//ALU unit inputs
logic [XLEN-1:0] alu_rs1_data;
logic [XLEN-1:0] alu_rs2_data;
logic [31:0] pre_alu_rs2;
logic [31:0] pre_alu_rs2_r;
logic [31:0] pre_alu_rs1_r;
logic rs1_use_regfile;
logic rs2_use_regfile;
always_comb begin
case(fb.alu_rs1_sel)
ALU_RS1_ZERO : alu_rs1_data = '0;
ALU_RS1_PC : alu_rs1_data = fb.pc;
default : alu_rs1_data = rf_issue.rs1_data; //ALU_RS1_RF
endcase
case(fb.alu_rs2_sel)
ALU_RS2_LUI_AUIPC : alu_rs2_data = {fb.instruction[31:12], 12'b0};
ALU_RS2_ARITH_IMM : alu_rs2_data = 32'(signed'(fb.instruction[31:20]));
ALU_RS2_JAL_JALR : alu_rs2_data = 4;
ALU_RS2_RF : alu_rs2_data = rf_issue.rs2_data;
endcase
if (opcode_trim inside {LUI_T, AUIPC_T}) //LUI or AUIPC
pre_alu_rs2 = {decode.instruction[31:12], 12'b0};
else if (opcode_trim == ARITH_IMM_T) //ARITH_IMM
pre_alu_rs2 = 32'(signed'(decode.instruction[31:20]));
else //JAL JALR
pre_alu_rs2 = 4;
end
assign alu_inputs.in1 = {(rf_issue.rs1_data[XLEN-1] & ~fn3[0]), alu_rs1_data};//(fn3[0] is SLTU_fn3);
assign alu_inputs.in2 = {(alu_rs2_data[XLEN-1] & ~fn3[0]), alu_rs2_data};
assign alu_inputs.shifter_in = rf_issue.rs1_data;
assign alu_inputs.shift_amount = opcode[5] ? rf_issue.rs2_data[4:0] : rs2_addr;
assign alu_inputs.subtract = fb.alu_sub;
assign alu_inputs.arith = alu_rs1_data[XLEN-1] & fb.instruction[30];//shift in bit
assign alu_inputs.lshift = ~fn3[2];
assign alu_inputs.logic_op = fb.alu_logic_op;
assign alu_inputs.shifter_path = ~(opcode[2] | fn3 inside {SLT_fn3, SLTU_fn3, XOR_fn3, OR_fn3, AND_fn3, ADD_SUB_fn3}); //opcode[2] LUI AUIPC JAL JALR
assign alu_inputs.slt_path = ~opcode[2] & fn3 inside {SLT_fn3, SLTU_fn3};
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
if (opcode_trim inside {LUI_T})
pre_alu_rs1_r <= '0;
else
pre_alu_rs1_r <= decode.pc;
end
end
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
pre_alu_rs2_r <= pre_alu_rs2;
rs1_use_regfile <= !(opcode_trim inside {LUI_T, AUIPC_T, JAL_T, JALR_T});
rs2_use_regfile <= !(opcode_trim inside {LUI_T, AUIPC_T, JAL_T, JALR_T, ARITH_IMM_T});
end
end
//Add cases: JAL, JALR, LUI, AUIPC, ADD[I], all logic ops
//sub cases: SUB, SLT[U][I]
logic sub_instruction;
assign sub_instruction = (fn3 == ADD_SUB_fn3) && decode.instruction[30] && opcode[5];//If ARITH instruction
alu_logic_op_t alu_logic_op;
always_comb begin
case (fn3)
SLT_fn3 : alu_logic_op = ALU_LOGIC_ADD;
SLTU_fn3 : alu_logic_op = ALU_LOGIC_ADD;
SLL_fn3 : alu_logic_op = ALU_LOGIC_ADD;
XOR_fn3 : alu_logic_op = ALU_LOGIC_XOR;
OR_fn3 : alu_logic_op = ALU_LOGIC_OR;
AND_fn3 : alu_logic_op = ALU_LOGIC_AND;
SRA_fn3 : alu_logic_op = ALU_LOGIC_ADD;
ADD_SUB_fn3 : alu_logic_op = ALU_LOGIC_ADD;
endcase
//put LUI, AUIPC, JAL and JALR through adder path
alu_logic_op = opcode[2] ? ALU_LOGIC_ADD : alu_logic_op;
end
alu_logic_op_t alu_logic_op_r;
logic alu_subtract;
logic alu_lshift;
logic alu_shifter_path;
logic alu_slt_path;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
alu_logic_op_r <= alu_logic_op;
alu_subtract <= ~opcode[2] & (fn3 inside {SLTU_fn3, SLT_fn3} || sub_instruction);//opcode[2] covers LUI,AUIPC,JAL,JALR
alu_lshift <= ~fn3[2];
alu_shifter_path <= ~(opcode[2] | fn3 inside {SLT_fn3, SLTU_fn3, XOR_fn3, OR_fn3, AND_fn3, ADD_SUB_fn3}); //opcode[2] LUI AUIPC JAL JALR
alu_slt_path <= ~opcode[2] & fn3 inside {SLT_fn3, SLTU_fn3};
end
end
assign alu_inputs.logic_op = alu_logic_op_r;
assign alu_inputs.subtract = alu_subtract;
assign alu_inputs.arith = alu_rs1_data[XLEN-1] & issue.instruction[30];//shift in bit
assign alu_inputs.lshift = alu_lshift;
assign alu_inputs.shifter_path = alu_shifter_path;
assign alu_inputs.slt_path = alu_slt_path;
assign alu_rs1_data = rs1_use_regfile ? rs_data[RS1] : pre_alu_rs1_r;
assign alu_rs2_data = rs2_use_regfile ? rs_data[RS2] : pre_alu_rs2_r;
assign alu_inputs.in1 = {(rs_data[RS1][XLEN-1] & ~issue.fn3[0]), alu_rs1_data};//(fn3[0] is SLTU_fn3);
assign alu_inputs.in2 = {(alu_rs2_data[XLEN-1] & ~issue.fn3[0]), alu_rs2_data};
assign alu_inputs.shifter_in = rs_data[RS1];
assign alu_inputs.shift_amount = issue.opcode[5] ? rs_data[RS2][4:0] : issue.rs_addr[RS2];
assign alu_issued = issue_to[ALU_UNIT_WB_ID] & ~potential_branch_exception;
////////////////////////////////////////////////////
//Load Store unit inputs
logic [11:0] ls_offset;
logic is_load;
logic is_store;
logic amo_op;
@ -246,7 +310,7 @@ module decode_and_issue (
logic [4:0] amo_type;
assign amo_op = USE_AMO ? (opcode_trim == AMO_T) : 1'b0;
assign amo_type = fb.instruction[31:27];
assign amo_type = decode.instruction[31:27];
assign store_conditional = (amo_type == AMO_SC);
assign load_reserve = (amo_type == AMO_LR);
@ -263,129 +327,203 @@ module decode_and_issue (
assign is_load = (opcode_trim inside {LOAD_T, AMO_T}) && !(amo_op & store_conditional); //LR and AMO_ops perform a read operation as well
assign is_store = (opcode_trim == STORE_T) || (amo_op && store_conditional);//Used for LS unit and for ID tracking
assign ls_offset = opcode[5] ? {fb.instruction[31:25], fb.instruction[11:7]} : fb.instruction[31:20];
assign ls_inputs.rs1 = rf_issue.rs1_data;
assign ls_inputs.rs2 = rf_issue.rs2_data;
logic [11:0] ls_offset;
logic is_load_r;
logic is_store_r;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
ls_offset <= opcode[5] ? {decode.instruction[31:25], decode.instruction[11:7]} : decode.instruction[31:20];
is_load_r <= is_load;
is_store_r <= is_store;
end
end
assign ls_inputs.offset = ls_offset;
assign ls_inputs.pc = fb.pc;
assign ls_inputs.fn3 = amo_op ? LS_W_fn3 : fn3;
assign ls_inputs.load = is_load;
assign ls_inputs.store = is_store;
assign ls_inputs.forwarded_store = rf_issue.rs2_conflict;
assign ls_inputs.store_forward_id = rf_issue.rs2_id;
assign ls_inputs.load = is_load_r;
assign ls_inputs.store = is_store_r;
assign ls_inputs.fn3 = amo_op ? LS_W_fn3 : issue.fn3;
assign ls_inputs.rs1 = rs_data[RS1];
assign ls_inputs.rs2 = rs_data[RS2];
assign ls_inputs.forwarded_store = rs2_conflict;
assign ls_inputs.store_forward_id = rs_id[RS2];
////////////////////////////////////////////////////
//Branch unit inputs
assign branch_inputs.rs1 = rf_issue.rs1_data;
assign branch_inputs.rs2 = rf_issue.rs2_data;
assign branch_inputs.fn3 = fn3;
assign branch_inputs.dec_pc = fb.pc;
assign branch_inputs.dec_pc_valid = fb_valid;
assign branch_inputs.use_signed = !(fn3 inside {BLTU_fn3, BGEU_fn3});
assign branch_inputs.jal = opcode[3];//(opcode == JAL);
assign branch_inputs.jalr = ~opcode[3] & opcode[2];//(opcode == JALR);
assign branch_inputs.is_call = fb.is_call;
assign branch_inputs.is_return = fb.is_return;
assign branch_inputs.instruction = fb.instruction;
assign branch_inputs.branch_metadata = fb.branch_metadata;
assign branch_inputs.branch_prediction_used = fb.branch_prediction_used;
assign branch_inputs.bp_update_way = fb.bp_update_way;
////////////////////////////////////////////////////
//RAS Support
logic rs1_link;
logic rd_link;
logic rs1_eq_rd;
logic is_return;
logic is_call;
assign rs1_link = (rs1_addr inside {1,5});
assign rd_link = (rd_addr inside {1,5});
assign rs1_eq_rd = (rs1_addr == rd_addr);
logic br_use_signed;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
is_return <= (opcode_trim == JALR_T) && ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd));
is_call <= (opcode_trim inside {JAL_T, JALR_T}) && rd_link;
br_use_signed <= !(fn3 inside {BLTU_fn3, BGEU_fn3});
end
end
logic[19:0] jal_imm;
logic[11:0] jalr_imm;
logic[11:0] br_imm;
logic [20:0] pc_offset;
logic [20:0] pc_offset_r;
assign jal_imm = {decode.instruction[31], decode.instruction[19:12], decode.instruction[20], decode.instruction[30:21]};
assign jalr_imm = decode.instruction[31:20];
assign br_imm = {decode.instruction[31], decode.instruction[7], decode.instruction[30:25], decode.instruction[11:8]};
always_comb begin
unique if (~opcode[3] & opcode[2])
pc_offset = 21'(signed'(jalr_imm));
else if (opcode[3])
pc_offset = 21'(signed'({jal_imm, 1'b0}));
else
pc_offset = 21'(signed'({br_imm, 1'b0}));
end
always_ff @(posedge clk) begin
if (issue_stage_ready)
pc_offset_r <= pc_offset;
end
assign branch_inputs.is_return = is_return;
assign branch_inputs.is_call = is_call;
assign branch_inputs.fn3 = issue.fn3;
assign branch_inputs.pc_offset = pc_offset_r;
assign branch_inputs.use_signed = br_use_signed;
assign branch_inputs.jal = issue.opcode[3];//(opcode == JAL);
assign branch_inputs.jalr = ~issue.opcode[3] & issue.opcode[2];//(opcode == JALR);
assign branch_inputs.issue_pc = issue.pc;
assign branch_inputs.issue_pc_valid = issue.stage_valid;
assign branch_inputs.rs1 = rs_data[RS1];
assign branch_inputs.rs2 = rs_data[RS2];
////////////////////////////////////////////////////
//Global Control unit inputs
logic sfence;
logic ifence;
logic environment_op;
logic is_csr;
assign sfence = fb.instruction[25];
logic is_csr_r;
logic potential_flush;
assign sfence = decode.instruction[25];
assign ifence = (opcode_trim == FENCE_T) && fn3[0];
assign environment_op = (opcode_trim == SYSTEM_T) && (fn3 == 0);
assign is_csr = (opcode_trim == SYSTEM_T) && (fn3 != 0);
assign gc_inputs.pc = fb.pc;
assign gc_inputs.instruction = fb.instruction;
assign gc_inputs.rs1 = rf_issue.rs1_data;
assign gc_inputs.rs2 = rf_issue.rs2_data;
assign gc_inputs.is_fence = ENABLE_M_MODE && (opcode_trim == FENCE_T) && ~fn3[0];
assign gc_inputs.is_i_fence = ENABLE_M_MODE & issue[GC_UNIT_ID] & ifence;
assign gc_inputs.is_csr = is_csr;
logic is_ecall;
logic is_ebreak;
logic is_ret;
logic is_fence;
logic is_ifence_r;
assign gc_inputs.is_ecall = ENABLE_M_MODE && environment_op && (fb.instruction[21:20] == 0);
assign gc_inputs.is_ebreak = ENABLE_M_MODE && environment_op && (fb.instruction[21:20] == 2'b01);
assign gc_inputs.is_ret = ENABLE_M_MODE && environment_op && (fb.instruction[21:20] == 2'b10);
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
is_csr_r <= is_csr;
is_ecall <= ENABLE_M_MODE && environment_op && (decode.instruction[21:20] == 0);
is_ebreak <= ENABLE_M_MODE && environment_op && (decode.instruction[21:20] == 2'b01);
is_ret <= ENABLE_M_MODE && environment_op && (decode.instruction[21:20] == 2'b10);
is_fence <= ENABLE_M_MODE && (opcode_trim == FENCE_T) && ~fn3[0];
is_ifence_r <= ifence;
potential_flush <= (environment_op | ifence);
end
end
assign gc_flush_required = ENABLE_M_MODE && issue[GC_UNIT_ID] && (environment_op | ifence);
assign gc_inputs.is_ecall = is_ecall;
assign gc_inputs.is_ebreak = is_ebreak;
assign gc_inputs.is_ret = is_ret;
assign gc_inputs.pc = issue.pc;
assign gc_inputs.instruction = issue.instruction;
assign gc_inputs.is_csr = is_csr_r;
assign gc_inputs.is_fence = is_fence;
assign gc_inputs.is_i_fence = ENABLE_M_MODE & issue_to[GC_UNIT_ID] & is_ifence_r;
assign gc_inputs.rs1 = rs_data[RS1];
assign gc_inputs.rs2 = rs_data[RS2];
assign gc_flush_required = ENABLE_M_MODE && issue_to[GC_UNIT_ID] && potential_flush;
////////////////////////////////////////////////////
//Mul unit inputs
generate if (USE_MUL) begin
assign mul_inputs.rs1 = rf_issue.rs1_data;
assign mul_inputs.rs2 = rf_issue.rs2_data;
assign mul_inputs.op = fn3[1:0];
end
endgenerate
assign mul_inputs.rs1 = rs_data[RS1];
assign mul_inputs.rs2 = rs_data[RS2];
assign mul_inputs.op = issue.fn3[1:0];
end endgenerate
////////////////////////////////////////////////////
//Div unit inputs
generate if (USE_DIV) begin
logic [4:0] prev_div_rs1_addr;
logic [4:0] prev_div_rs2_addr;
logic prev_div_result_valid;
logic set_prev_div_result_valid;
logic clear_prev_div_result_valid;
logic current_op_resuses_rs1_rs2;
logic [4:0] prev_div_rs1_addr;
logic [4:0] prev_div_rs2_addr;
logic prev_div_result_valid;
logic set_prev_div_result_valid;
logic clear_prev_div_result_valid;
logic current_op_resuses_rs1_rs2;
always_ff @(posedge clk) begin
if (issue[DIV_UNIT_WB_ID]) begin
prev_div_rs1_addr <= rs1_addr;
prev_div_rs2_addr <= rs2_addr;
end
always_ff @(posedge clk) begin
if (issue_to[DIV_UNIT_WB_ID]) begin
prev_div_rs1_addr <= rs1_addr;
prev_div_rs2_addr <= rs2_addr;
end
assign current_op_resuses_rs1_rs2 = (prev_div_rs1_addr == rs1_addr) && (prev_div_rs2_addr == rs2_addr);
assign set_prev_div_result_valid = unit_needed[DIV_UNIT_WB_ID];
//If current div operation overwrites an input register OR any other instruction overwrites the last div operations input registers
assign clear_prev_div_result_valid = uses_rd & ((rd_addr == (unit_needed[DIV_UNIT_WB_ID] ? rs1_addr : prev_div_rs1_addr)) || (rd_addr == (unit_needed[DIV_UNIT_WB_ID] ? rs2_addr : prev_div_rs2_addr)));
set_clr_reg_with_rst #(.SET_OVER_CLR(0), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
.clk, .rst,
.set(instruction_issued & set_prev_div_result_valid),
.clr(instruction_issued & clear_prev_div_result_valid),
.result(prev_div_result_valid)
);
assign div_inputs.rs1 = rf_issue.rs1_data;
assign div_inputs.rs2 = rf_issue.rs2_data;
assign div_inputs.op = fn3[1:0];
assign div_inputs.reuse_result = prev_div_result_valid & current_op_resuses_rs1_rs2;
end
endgenerate
assign current_op_resuses_rs1_rs2 = (prev_div_rs1_addr == issue.rs_addr[RS1]) && (prev_div_rs2_addr == issue.rs_addr[RS2]);
assign set_prev_div_result_valid = unit_needed_issue_stage[DIV_UNIT_WB_ID];
//If current div operation overwrites an input register OR any other instruction overwrites the last div operations input registers
assign clear_prev_div_result_valid = issue.uses_rd & ((issue.rd_addr == (unit_needed_issue_stage[DIV_UNIT_WB_ID] ? issue.rs_addr[RS1] : prev_div_rs1_addr)) || (issue.rd_addr == (unit_needed_issue_stage[DIV_UNIT_WB_ID] ? issue.rs_addr[RS2] : prev_div_rs2_addr)));
set_clr_reg_with_rst #(.SET_OVER_CLR(0), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
.clk, .rst,
.set(instruction_issued & set_prev_div_result_valid),
.clr(instruction_issued & clear_prev_div_result_valid),
.result(prev_div_result_valid)
);
assign div_inputs.rs1 = rs_data[RS1];
assign div_inputs.rs2 = rs_data[RS2];
assign div_inputs.op = issue.fn3[1:0];
assign div_inputs.reuse_result = prev_div_result_valid & current_op_resuses_rs1_rs2;
end endgenerate
////////////////////////////////////////////////////
//Unit EX signals
generate
for (i = 0; i < NUM_UNITS; i++) begin
assign unit_issue[i].possible_issue = unit_needed[i] & unit_operands_ready[i] & fb_valid & ti.id_available & ~gc_issue_hold;
assign unit_issue[i].new_request = issue[i];
assign unit_issue[i].instruction_id = ti.issue_id;
always_ff @(posedge clk) begin
unit_issue[i].new_request_r <= issue[i];
end
generate for (i = 0; i < NUM_UNITS; i++) begin
assign unit_issue[i].possible_issue = issue.stage_valid & unit_needed_issue_stage[i] & unit_ready[i];
assign unit_issue[i].new_request = issue_to[i];
assign unit_issue[i].id = issue.id;
always_ff @(posedge clk) begin
unit_issue[i].new_request_r <= issue_to[i];
end
endgenerate
end endgenerate
////////////////////////////////////////////////////
//Illegal Instruction check
logic illegal_instruction_pattern_r;
generate if (ENABLE_M_MODE) begin
illegal_instruction_checker illegal_op_check (
.instruction(fb.instruction), .illegal_instruction(illegal_instruction_pattern)
.instruction(decode.instruction), .illegal_instruction(illegal_instruction_pattern)
);
always_ff @(posedge clk) begin
if (rst)
illegal_instruction_pattern_r <= 0;
else if (issue_stage_ready)
illegal_instruction_pattern_r <= illegal_instruction_pattern;
end
//Illegal instruction if the instruction is invalid, but could otherwise be issued
assign illegal_instruction = illegal_instruction_pattern & fb_valid & ti.id_available & ~gc_issue_hold & ~gc_fetch_flush;
assign illegal_instruction = illegal_instruction_pattern_r & issue.stage_valid & ~gc_issue_hold & ~gc_fetch_flush;
end endgenerate
////////////////////////////////////////////////////
//End of Implementation
@ -397,29 +535,32 @@ module decode_and_issue (
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin
assign tr_operand_stall = |(unit_needed & unit_ready) & issue_valid & ~|(unit_operands_ready & issue_ready);
assign tr_unit_stall = ~|(unit_needed & unit_ready) & issue_valid & |(unit_operands_ready & issue_ready);
assign tr_no_id_stall = |(unit_needed & unit_ready) & (fb_valid & ~ti.id_available & ~gc_issue_hold & ~gc_fetch_flush) & |(unit_operands_ready & issue_ready);
assign tr_no_instruction_stall = ~fb_valid | gc_fetch_flush;
assign tr_other_stall = fb_valid & ~instruction_issued & ~(tr_operand_stall | tr_unit_stall | tr_no_id_stall | tr_no_instruction_stall);
assign tr_branch_operand_stall = tr_operand_stall & unit_needed[BRANCH_UNIT_ID];
assign tr_alu_operand_stall = tr_operand_stall & unit_needed[ALU_UNIT_WB_ID] & ~unit_needed[BRANCH_UNIT_ID];
assign tr_ls_operand_stall = tr_operand_stall & unit_needed[LS_UNIT_WB_ID];
assign tr_div_operand_stall = tr_operand_stall & unit_needed[DIV_UNIT_WB_ID];
assign tr_operand_stall = |(unit_needed_issue_stage & unit_ready) & issue_valid & ~|(unit_operands_ready & unit_needed_issue_stage);
assign tr_unit_stall = ~|(unit_needed_issue_stage & unit_ready) & issue_valid & |(unit_operands_ready & unit_needed_issue_stage);
assign tr_no_id_stall = 0;
assign tr_no_instruction_stall = ~issue.stage_valid | gc_fetch_flush;
assign tr_other_stall = issue.stage_valid & ~instruction_issued & ~(tr_operand_stall | tr_unit_stall | tr_no_id_stall | tr_no_instruction_stall);
assign tr_branch_operand_stall = tr_operand_stall & unit_needed_issue_stage[BRANCH_UNIT_ID];
assign tr_alu_operand_stall = tr_operand_stall & unit_needed_issue_stage[ALU_UNIT_WB_ID] & ~unit_needed_issue_stage[BRANCH_UNIT_ID];
assign tr_ls_operand_stall = tr_operand_stall & unit_needed_issue_stage[LS_UNIT_WB_ID];
assign tr_div_operand_stall = tr_operand_stall & unit_needed_issue_stage[DIV_UNIT_WB_ID];
//Instruction Mix
assign tr_alu_op = instruction_issued && (opcode_trim inside {ARITH_T, ARITH_IMM_T, AUIPC_T, LUI_T} && ~tr_mul_op && ~tr_div_op);
assign tr_branch_or_jump_op = instruction_issued && (opcode_trim inside {JAL_T, JALR_T, BRANCH_T});
assign tr_load_op = instruction_issued && (opcode_trim inside {LOAD_T, AMO_T});
assign tr_store_op = instruction_issued && (opcode_trim inside {STORE_T});
assign tr_mul_op = instruction_issued && unit_needed[MUL_UNIT_WB_ID];
assign tr_div_op = instruction_issued && unit_needed[DIV_UNIT_WB_ID];
assign tr_misc_op = instruction_issued & ~(tr_alu_op | tr_branch_or_jump_op | tr_load_op | tr_store_op | tr_mul_op | tr_div_op);
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
tr_alu_op <= instruction_issued && (opcode_trim inside {ARITH_T, ARITH_IMM_T, AUIPC_T, LUI_T} && ~tr_mul_op && ~tr_div_op);
tr_branch_or_jump_op <= instruction_issued && (opcode_trim inside {JAL_T, JALR_T, BRANCH_T});
tr_load_op <= instruction_issued && (opcode_trim inside {LOAD_T, AMO_T});
tr_store_op <= instruction_issued && (opcode_trim inside {STORE_T});
tr_mul_op <= instruction_issued && unit_needed_issue_stage[MUL_UNIT_WB_ID];
tr_div_op <= instruction_issued && unit_needed_issue_stage[DIV_UNIT_WB_ID];
tr_misc_op <= instruction_issued & ~(tr_alu_op | tr_branch_or_jump_op | tr_load_op | tr_store_op | tr_mul_op | tr_div_op);
end
end
assign tr_instruction_issued_dec = instruction_issued;
assign tr_instruction_pc_dec = fb.pc;
assign tr_instruction_data_dec = fb.instruction;
end
endgenerate
assign tr_instruction_pc_dec = issue.pc;
assign tr_instruction_data_dec = issue.instruction;
end endgenerate
endmodule

View file

@ -34,19 +34,7 @@ module div_algorithm
generate
case(DIV_ALGORITHM)
RADIX_2 : div_radix2 div_block (.*);
RADIX_2_EARLY_TERMINATE : div_radix2_ET div_block (.*);
RADIX_2_EARLY_TERMINATE_FULL : div_radix2_ET_full div_block (.*);
RADIX_4 : div_radix4 div_block (.*);
RADIX_4_EARLY_TERMINATE : div_radix4_ET div_block (.*);
RADIX_4_EARLY_TERMINATE_FULL: div_radix4_ET_full div_block (.*);
RADIX_8 : div_radix8 div_block (.*);
RADIX_8_EARLY_TERMINATE : div_radix8_ET div_block (.*);
RADIX_16 : div_radix16 div_block (.*);
QUICK_NAIVE : div_quick_naive div_block (.*);
QUICK_CLZ : div_quick_clz div_block (.*);
QUICK_CLZ_MK2 : div_quick_clz_mk2 div_block (.*);
QUICK_RADIX_4 : div_quick_radix_4 div_block (.*);
default : $error("invalid div selection");
endcase
endgenerate

View file

@ -31,6 +31,7 @@ module div_quick_clz
logic running;
logic terminate;
logic [div.DATA_WIDTH-1:0] divisor_r;
logic [div.DATA_WIDTH-1:0] normalized_divisor;
@ -52,6 +53,7 @@ module div_quick_clz
logic [CLZ_W-1:0] divisor_CLZ;
logic [CLZ_W-1:0] divisor_CLZ_r;
logic [CLZ_W-1:0] CLZ_delta;
logic divisor_is_zero_first_cycle;
////////////////////////////////////////////////////
//Implementation
clz remainder_clz_block (.clz_input(div.remainder), .clz(remainder_CLZ));
@ -59,28 +61,35 @@ module div_quick_clz
////////////////////////////////////////////////////
//Control Signals
assign div.divisor_is_zero = (&divisor_CLZ) & ~div.divisor[0];
assign divisor_is_zero_first_cycle = (&divisor_CLZ) & ~div.divisor[0];
always @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= divisor_is_zero_first_cycle;
end
always_ff @ (posedge clk) begin
if (rst)
running <= 0;
else if (div.start & ~div.divisor_is_zero)
else if (div.start & ~divisor_is_zero_first_cycle)
running <= 1;
else if (terminate)
running <= 0;
end
always_ff @ (posedge clk) begin
div.done <= (running & terminate) | (div.start & div.divisor_is_zero);
div.done <= (running & terminate) | (div.start & divisor_is_zero_first_cycle);
end
assign terminate = div.remainder < div.divisor;
assign terminate = div.remainder < divisor_r;
////////////////////////////////////////////////////
//Divisor Pre-processing
always_ff @ (posedge clk) begin
divisor_CLZ_r <= divisor_CLZ;
normalized_divisor <= div.divisor << divisor_CLZ;
if (div.start) begin
divisor_r <= div.divisor;
divisor_CLZ_r <= divisor_CLZ;
normalized_divisor <= div.divisor << divisor_CLZ;
end
end
////////////////////////////////////////////////////

View file

@ -1,133 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_quick_clz_mk2
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic running;
logic terminate;
logic [div.DATA_WIDTH:0] A0;
logic [div.DATA_WIDTH:0] A1;
logic [div.DATA_WIDTH-1:0] A2;
logic [div.DATA_WIDTH-1:0] new_R;
logic [div.DATA_WIDTH-1:0] new_Q_bit;
logic [div.DATA_WIDTH-1:0] new_R2;
logic [div.DATA_WIDTH-1:0] Q_bit1;
logic [div.DATA_WIDTH-1:0] Q_bit2;
logic [div.DATA_WIDTH-1:0] B1;
logic [div.DATA_WIDTH-1:0] B2;
localparam CLZ_W = $clog2(div.DATA_WIDTH);
logic [CLZ_W-1:0] R_CLZ;
logic [CLZ_W-1:0] B_CLZ;
logic [CLZ_W-1:0] B_CLZ_r;
logic [CLZ_W-1:0] CLZ_delta;
logic [div.DATA_WIDTH-1:0] shiftedB;
//////////////////////////////////////////
clz clz_r (.clz_input(div.remainder), .clz(R_CLZ));
clz clz_b (.clz_input(div.divisor), .clz(B_CLZ));
always_ff @ (posedge clk) begin
B_CLZ_r <= B_CLZ;
shiftedB <= div.divisor << B_CLZ;
end
assign CLZ_delta = B_CLZ_r - R_CLZ;
always_comb begin
Q_bit1 = 0;
Q_bit1[CLZ_delta] = 1;
end
assign Q_bit2 = {1'b0, Q_bit1[div.DATA_WIDTH-1:1]};
always_comb begin
if (A1[div.DATA_WIDTH])
new_Q_bit = Q_bit2;
else if (A0[div.DATA_WIDTH] || CLZ_delta == 0)
new_Q_bit = Q_bit1;
else
new_Q_bit = (Q_bit1 | Q_bit2);
end
assign B1 = shiftedB >> R_CLZ;
assign A1 = div.remainder - B1;
assign B2 = {1'b0, B1[div.DATA_WIDTH-1:1]};
assign A2 = div.remainder - B2;
assign A0 = div.remainder - (B1 + B2);
always_comb begin
if (A1[div.DATA_WIDTH])
new_R = A2[div.DATA_WIDTH-1:0];
else if (A0[div.DATA_WIDTH] || CLZ_delta == 0)
new_R = A1[div.DATA_WIDTH-1:0];
else
new_R = A0[div.DATA_WIDTH-1:0];
end
assign div.divisor_is_zero = (B_CLZ == 5'b11111 && ~div.divisor[0]);
always_ff @ (posedge clk) begin
if (rst)
running <= 0;
else if (div.start & ~div.divisor_is_zero)
running <= 1;
else if (terminate)
running <= 0;
end
always_ff @ (posedge clk) begin
div.done <= (running & terminate) | (div.start & div.divisor_is_zero);
end
assign terminate = div.remainder < div.divisor;
always_ff @ (posedge clk) begin
if (div.start)
div.quotient <= '0;
else if (~terminate & running)
div.quotient <= div.quotient | new_Q_bit;
end
initial begin
div.remainder = 0;
end
always @ (posedge clk) begin
if (div.start)
div.remainder <= div.dividend;
else if (~terminate & running)
div.remainder <= new_R;
end
endmodule

View file

@ -1,106 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_quick_naive
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic running;
logic terminate;
logic [div.DATA_WIDTH:0] A1;
logic [div.DATA_WIDTH-1:0] A2;
logic [div.DATA_WIDTH-1:0] new_R;
logic [div.DATA_WIDTH-1:0] new_Q_bit;
logic [div.DATA_WIDTH-1:0] Q_bit1;
logic [div.DATA_WIDTH-1:0] Q_bit2;
logic [div.DATA_WIDTH-1:0] B1;
logic [div.DATA_WIDTH-1:0] B2;
localparam MSB_W = $clog2(div.DATA_WIDTH);
logic [MSB_W-1:0] R_MSB;
logic [MSB_W-1:0] B_MSB;
logic [MSB_W-1:0] B_MSB_r;
logic [MSB_W-1:0] MSB_delta;
msb_naive msb_r (.msb_input(div.remainder), .msb(R_MSB));
msb_naive msb_b (.msb_input(div.divisor), .msb(B_MSB));
// msb msb_r (.msb_input(div.remainder), .msb(R_MSB));
// msb msb_b (.msb_input(div.divisor), .msb(B_MSB));
assign MSB_delta = R_MSB - B_MSB_r;
assign Q_bit1 = 2**MSB_delta;
assign Q_bit2 = {1'b0, Q_bit1[div.DATA_WIDTH-1:1]};
assign new_Q_bit = div.quotient | (A1[div.DATA_WIDTH] ? Q_bit2 : Q_bit1);
assign B1 = div.divisor << MSB_delta;
assign A1 = div.remainder - B1;
assign B2 = {1'b0,B1[div.DATA_WIDTH-1:1]};
assign A2 = div.remainder - B2;
assign new_R = A1[div.DATA_WIDTH] ? A2 : A1[div.DATA_WIDTH-1:0];
assign div.divisor_is_zero = (B_MSB == 0 && ~div.divisor[0]);
always_ff @ (posedge clk) begin
if (rst)
running <= 0;
else if (div.start & ~div.divisor_is_zero)
running <= 1;
else if (terminate)
running <= 0;
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else if (div.done)
div.done <= 0;
else if ((running & terminate) | (div.start & div.divisor_is_zero))
div.done <= 1;
end
assign terminate = (div.remainder < div.divisor);
always_ff @ (posedge clk) begin
B_MSB_r <= B_MSB;
if (div.start) begin
div.quotient <= 0;
div.remainder <= div.dividend;
end
else if (~terminate & running) begin
div.quotient <= new_Q_bit;
div.remainder <= new_R;
end
end
endmodule

View file

@ -1,196 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_quick_radix_4
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic [div.DATA_WIDTH/2-1:0] shift_count;
logic [div.DATA_WIDTH+1:0] PR;
logic [2:0] new_PR_sign;
logic [div.DATA_WIDTH+2:0] new_PR_1;
logic [div.DATA_WIDTH+2:0] new_PR_2;
logic [div.DATA_WIDTH+2:0] new_PR_3;
logic [div.DATA_WIDTH+1:0] B_1;
logic [div.DATA_WIDTH+1:0] B_2;
logic [div.DATA_WIDTH+1:0] B_3;
logic [div.DATA_WIDTH-1:0] AR_r;
logic [div.DATA_WIDTH-1:0] Q_temp;
logic [5:0] shift_num_R;
logic [6:0] shift_num_R_normalized;
logic [5:0] shift_num_Q;
logic [div.DATA_WIDTH*2+1:0] combined;
logic [div.DATA_WIDTH*2+1:0] combined_normalized;
logic terminate_early;
localparam CLZ_W = $clog2(div.DATA_WIDTH);
logic [CLZ_W-1:0] A_CLZ;
logic [CLZ_W-1:0] B_CLZ;
logic [CLZ_W-1:0] A_CLZ_r;
logic [CLZ_W-1:0] B_CLZ_r;
logic [CLZ_W-1:0] CLZ_delta;
logic firstCycle;
logic greaterDivisor;
logic [div.DATA_WIDTH-1:0] A_shifted;
logic [div.DATA_WIDTH-1:0] B_shifted;
logic [div.DATA_WIDTH-1:0] R_shifted;
logic [div.DATA_WIDTH-1:0] B_shifted_r;
//implementation
////////////////////////////////////////////////////
clz clz_r (.clz_input(div.dividend), .clz(A_CLZ));
clz clz_b (.clz_input(div.divisor), .clz(B_CLZ));
always_ff @ (posedge clk) begin
if (rst) begin
firstCycle <= 0;
A_CLZ_r <= 0;
B_CLZ_r <= 0;
A_shifted <= 0;
B_shifted <= 0;
end else begin
if (div.start) begin
firstCycle <= 1;
A_CLZ_r <= A_CLZ;
B_CLZ_r <= B_CLZ;
greaterDivisor <= div.divisor > div.dividend;
A_shifted <= div.dividend << A_CLZ;
B_shifted <= div.divisor << A_CLZ;
end else begin
firstCycle <= 0;
end
end
end
assign new_PR_1 = {1'b0, PR} - {1'b0, B_1};
assign new_PR_2 = {1'b0, PR} - {1'b0, B_2};
assign new_PR_3 = {1'b0, PR} - {1'b0, B_3};
assign new_PR_sign = {new_PR_3[div.DATA_WIDTH+2], new_PR_2[div.DATA_WIDTH+2], new_PR_1[div.DATA_WIDTH+2]};
//Shift reg for
always_ff @ (posedge clk) begin
if (rst) begin
shift_count <= 0;
end else if (firstCycle) begin
shift_count <= 1;
end else if (terminate) begin
shift_count <= 0;
end else begin
shift_count <= {shift_count[14:0], firstCycle};
end
end
always_ff @ (posedge clk) begin
if (firstCycle) begin
shift_num_R <= 2;
shift_num_R_normalized <= 2 + {2'b0, A_CLZ_r};
shift_num_Q <= 32;
end
else if (~terminate & ~terminate_early) begin
shift_num_R <= shift_num_R + 2;
shift_num_R_normalized <= shift_num_R_normalized + 2;
shift_num_Q <= shift_num_Q - 2;
end
end
assign combined_normalized = {PR, AR_r} >> shift_num_R_normalized;
assign div.remainder = combined_normalized[div.DATA_WIDTH-1:0];
assign combined = {PR, AR_r} >> shift_num_R;
assign R_shifted = combined[div.DATA_WIDTH-1:0];
assign terminate_early = ~firstCycle & ((B_shifted_r > R_shifted) | greaterDivisor);
assign div.quotient = terminate_early ? (Q_temp << shift_num_Q) : Q_temp;
always_ff @ (posedge clk) begin
if (firstCycle) begin
PR <= {{(div.DATA_WIDTH){1'b0}}, A_shifted[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= '0;
AR_r <= {A_shifted[div.DATA_WIDTH-3:0], 2'b00};
B_shifted_r <= B_shifted;
B_1 <= {2'b0, B_shifted}; //1xB
B_2 <= {1'b0, B_shifted, 1'b0}; //2xB
B_3 <= {1'b0, B_shifted, 1'b0} + {2'b0, B_shifted}; //3xB
end else if (~terminate & ~terminate_early) begin
AR_r <= {AR_r[div.DATA_WIDTH-3:0], 2'b00};
case (new_PR_sign)
3'b111 : begin
PR <= {PR[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= {div.quotient[div.DATA_WIDTH-3:0], 2'b00};
end
3'b110 : begin
PR <= {new_PR_1[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= {div.quotient[div.DATA_WIDTH-3:0], 2'b01};
end
3'b100 : begin
PR <= {new_PR_2[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= {div.quotient[div.DATA_WIDTH-3:0], 2'b10};
end
default : begin //3'b000 : begin
PR <= {new_PR_3[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= {div.quotient[div.DATA_WIDTH-3:0], 2'b11};
end
endcase
end
end
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~(|div.divisor);
else if (~terminate & ~terminate_early)
div.divisor_is_zero <= div.divisor_is_zero & ~(|new_PR_sign);
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (firstCycle)
terminate <= 0;
else if (shift_count[15] | terminate_early)
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
if (firstCycle)
div.done <= 0;
else if ((shift_count[15] | terminate_early) & ~div.done & ~terminate)
div.done <= 1;
else if (div.done)
div.done <= 0;
end
end
endmodule

View file

@ -1,146 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_radix16
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic [div.DATA_WIDTH-1:0] shift_count;
logic [div.DATA_WIDTH+3:0] PR;
logic [div.DATA_WIDTH+3:0] PR_lower;
logic [div.DATA_WIDTH+3:0] PR_upper;
logic [div.DATA_WIDTH-1:0] Q_lower;
logic [div.DATA_WIDTH-1:0] Q_upper;
logic [6:0] new_PR_sign;
logic [div.DATA_WIDTH+4:0] new_PR_8;
logic [div.DATA_WIDTH+4:0] new_PR [6:0];
logic [div.DATA_WIDTH+3:0] B_6;
logic [div.DATA_WIDTH+3:0] B_10;
logic [div.DATA_WIDTH+3:0] B_12;
logic [div.DATA_WIDTH+3:0] B_14;
//Shift reg for
always_ff @ (posedge clk) begin
shift_count[0] <= div.start;
shift_count[31:1] <= shift_count[30:0];
end
assign new_PR_8 = {1'b0, PR} - {1'b0, {1'b0, div.divisor, 3'b000}};
assign new_PR[0] = new_PR_8[div.DATA_WIDTH+4] ? {1'b0, PR} - {1'b0, {4'b0000, div.divisor}} : {1'b0, PR} - {1'b0, {1'b0, div.divisor, 3'b000}} - {4'b0000, div.divisor};
assign new_PR[1] = new_PR_8[div.DATA_WIDTH+4] ? {1'b0, PR} - {1'b0, {3'b000, div.divisor, 1'b0}} : {1'b0, PR} - {1'b0, B_10};
assign new_PR[2] = new_PR_8[div.DATA_WIDTH+4] ? {1'b0, PR} - {1'b0, {3'b000, div.divisor, 1'b0}} - {4'b0000, div.divisor} : {1'b0, PR} - {1'b0, B_10} - {4'b0000, div.divisor};
assign new_PR[3] = new_PR_8[div.DATA_WIDTH+4] ? {1'b0, PR} - {1'b0, {2'b00, div.divisor, 2'b00}} : {1'b0, PR} - {1'b0, B_12};
assign new_PR[4] = new_PR_8[div.DATA_WIDTH+4] ? {1'b0, PR} - {1'b0, {2'b00, div.divisor, 2'b00}} - {4'b0000, div.divisor} : {1'b0, PR} - {1'b0, B_12} - {4'b0000, div.divisor};
assign new_PR[5] = new_PR_8[div.DATA_WIDTH+4] ? {1'b0, PR} - {1'b0, B_6} : {1'b0, PR} - {1'b0, B_14};
assign new_PR[6] = new_PR_8[div.DATA_WIDTH+4] ? {1'b0, PR} - {1'b0, B_6} - {4'b0000, div.divisor} : {1'b0, PR} - {1'b0, B_14} - {4'b0000, div.divisor};
assign new_PR_sign = {new_PR[6][div.DATA_WIDTH+4], new_PR[5][div.DATA_WIDTH+4], new_PR[4][div.DATA_WIDTH+4],
new_PR[3][div.DATA_WIDTH+4], new_PR[2][div.DATA_WIDTH+4], new_PR[1][div.DATA_WIDTH+4],
new_PR[0][div.DATA_WIDTH+4]};
always_comb begin
PR_lower = ({PR[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-4]} & {(div.DATA_WIDTH+4){(new_PR_sign[0])}});
Q_lower = ({div.quotient[div.DATA_WIDTH-5:0], 4'b0000} & {div.DATA_WIDTH{(new_PR_sign[0])}});
for (int i = 1; i < 7; i = i+1) begin
PR_lower = PR_lower | ({new_PR[i-1][div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-4]} & {(div.DATA_WIDTH+4){(~new_PR_sign[i-1] & new_PR_sign[i])}});
Q_lower = Q_lower | ({div.quotient[div.DATA_WIDTH-5:0], i[3:0]} & {div.DATA_WIDTH{(~new_PR_sign[i-1] & new_PR_sign[i])}});
end
PR_lower = PR_lower | ({new_PR[6][div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-4]} & {(div.DATA_WIDTH+4){(~new_PR_sign[6])}});
Q_lower = Q_lower | ({div.quotient[div.DATA_WIDTH-5:0], 4'b0111} & {div.DATA_WIDTH{(~new_PR_sign[6])}});
PR_upper = {new_PR_8[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-4]} & {(div.DATA_WIDTH+4){new_PR_sign[0]}};
Q_upper = {div.quotient[div.DATA_WIDTH-5:0], 4'b1000} & {div.DATA_WIDTH{new_PR_sign[0]}};
for (int i = 1; i < 7; i = i+1) begin
PR_upper = PR_upper | ({new_PR[i-1][div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-4]} & {(div.DATA_WIDTH+4){(~new_PR_sign[i-1] & new_PR_sign[i])}});
Q_upper = Q_upper | ({div.quotient[div.DATA_WIDTH-5:0], (i[3:0] | 4'b1000)} & {div.DATA_WIDTH{(~new_PR_sign[i-1] & new_PR_sign[i])}});
end
PR_upper = PR_upper | ({new_PR[6][div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-4]} & {(div.DATA_WIDTH+4){(~new_PR_sign[6])}});
Q_upper = Q_upper | ({div.quotient[div.DATA_WIDTH-5:0], 4'b1111} & {div.DATA_WIDTH{(~new_PR_sign[6])}});
end
always_ff @ (posedge clk) begin
if (div.start) begin
B_6 <= {2'b00, div.divisor, 2'b00} + {3'b000, div.divisor, 1'b0};
B_10 <= {1'b0, div.divisor, 3'b000} + {3'b000, div.divisor, 1'b0};
B_12 <= {1'b0, div.divisor, 3'b000} + {2'b00, div.divisor, 2'b00};
B_14 <= {1'b0, div.divisor, 3'b000} + {2'b00, div.divisor, 2'b00} + {3'b000, div.divisor, 1'b0};
PR <= {{(div.DATA_WIDTH){1'b0}}, div.dividend[div.DATA_WIDTH-1:div.DATA_WIDTH-4]};
div.quotient <= {div.dividend[div.DATA_WIDTH-5:0], 4'b0000};
end
else if (~terminate) begin
case (new_PR_8[div.DATA_WIDTH+4])
1'b1 : begin
PR <= PR_lower;
div.quotient <= Q_lower;
end
1'b0 : begin
PR <= PR_upper;
div.quotient <= Q_upper;
end
endcase
end
end
assign div.remainder = PR[div.DATA_WIDTH+3:4];
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~div.divisor[0];
else if (~terminate)
div.divisor_is_zero <= div.divisor_is_zero & ~(|new_PR_sign);
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (div.start)
terminate <= 0;
if (shift_count[7])
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
if (shift_count[7])
div.done <= 1;
else if (div.done)
div.done <= 0;
end
end
endmodule

View file

@ -31,6 +31,7 @@ module div_radix2
logic terminate;
logic [div.DATA_WIDTH-1:0] divisor_r;
logic [div.DATA_WIDTH:0] new_PR;
logic [div.DATA_WIDTH:0] PR;
logic [div.DATA_WIDTH-1:0] shift_count;
@ -38,7 +39,7 @@ module div_radix2
//implementation
////////////////////////////////////////////////////
assign new_PR = PR - {1'b0, div.divisor};
assign new_PR = PR - {1'b0, divisor_r};
assign negative_sub_rst = new_PR[div.DATA_WIDTH];
//Shift reg for
@ -48,6 +49,7 @@ module div_radix2
always_ff @ (posedge clk) begin
if (div.start) begin
divisor_r <= div.divisor;
PR <= {(div.DATA_WIDTH)'(1'b0), div.dividend[div.DATA_WIDTH-1]};
div.quotient <= {div.dividend[div.DATA_WIDTH-2:0], 1'b0};
end

View file

@ -1,104 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_radix2_ET
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic terminate_early;
logic [div.DATA_WIDTH:0] new_PR;
logic [div.DATA_WIDTH:0] PR;
logic [div.DATA_WIDTH-1:0] shift_count;
logic negative_sub_rst;
//implementation
////////////////////////////////////////////////////
assign new_PR = PR - {1'b0, div.divisor};
assign negative_sub_rst = new_PR[div.DATA_WIDTH];
always_ff @ (posedge clk) begin
shift_count <= {shift_count[30:0], div.start & ~terminate_early};
end
assign terminate_early = div.divisor > div.dividend;
always_ff @ (posedge clk) begin
if (div.start) begin
if (terminate_early) begin
PR <= {div.dividend, 1'b0};
div.quotient <= '0;
end else begin
PR <= {(div.DATA_WIDTH)'(1'b0), div.dividend[div.DATA_WIDTH-1]};
div.quotient <= {div.dividend[div.DATA_WIDTH-2:0], 1'b0};
end
end else if (~terminate) begin
PR <= negative_sub_rst ? {PR[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1]} : {new_PR[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1]};
div.quotient <= {div.quotient[div.DATA_WIDTH-2:0], ~negative_sub_rst};
end
end
assign div.remainder = PR[div.DATA_WIDTH:1];
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~div.divisor[0];
else if (~terminate)
div.divisor_is_zero <= div.divisor_is_zero & ~negative_sub_rst;
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (div.start) begin
if (terminate_early) begin
terminate <= 1;
end else begin
terminate <= 0;
end
end
if (shift_count[31])
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
if (div.done)
div.done <= 0;
else if ((~div.start & (shift_count[31])) | (div.start & terminate_early))
div.done <= 1;
end
end
endmodule

View file

@ -1,123 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_radix2_ET_full
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic [div.DATA_WIDTH:0] new_PR;
logic [div.DATA_WIDTH:0] PR;
logic [div.DATA_WIDTH-1:0] shift_count;
logic negative_sub_rst;
logic [div.DATA_WIDTH-1:0] AR_r;
logic [div.DATA_WIDTH-1:0] Q_temp;
logic [5:0] shift_num_R;
logic [5:0] shift_num_Q;
logic [div.DATA_WIDTH*2:0] combined;
logic [div.DATA_WIDTH*2:0] combined_r;
logic terminate_early;
logic terminate_early_r;
//implementation
////////////////////////////////////////////////////
assign new_PR = PR - {1'b0, div.divisor};
assign negative_sub_rst = new_PR[div.DATA_WIDTH];
always_ff @ (posedge clk) begin
if (div.start)
shift_count <= 32'd1;
else
shift_count <= {shift_count[30:0], div.start};
end
always_ff @ (posedge clk) begin
if (div.start) begin
shift_num_R <= 1;
shift_num_Q <= 32;
end
else if (~terminate & ~terminate_early) begin
shift_num_R <= shift_num_R + 1;
shift_num_Q <= shift_num_Q - 1;
end
end
assign combined = {PR, AR_r} >> shift_num_R;
assign div.remainder = combined[div.DATA_WIDTH-1:0];
assign terminate_early = div.divisor > div.remainder;
assign div.quotient = terminate_early ? (Q_temp << shift_num_Q) : Q_temp;
always_ff @ (posedge clk) begin
if (div.start) begin
PR <= {(div.DATA_WIDTH)'(1'b0), div.dividend[div.DATA_WIDTH-1]};
Q_temp <= '0;
AR_r <= {div.dividend[div.DATA_WIDTH-2:0], 1'b0};
end
else if (~terminate & ~terminate_early) begin
PR <= negative_sub_rst ? {PR[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1]} :
{new_PR[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1]};
Q_temp <= {Q_temp[div.DATA_WIDTH-2:0], ~negative_sub_rst};
AR_r <= {AR_r[div.DATA_WIDTH-2:0], 1'b0};
end
end
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~div.divisor[0];
else if (~terminate)
div.divisor_is_zero <= div.divisor_is_zero & ~negative_sub_rst;
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (div.start)
terminate <= 0;
//if (shift_count[31])
else if (shift_count[31] | terminate_early)
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
// if (shift_count[31])
if (~div.start & (shift_count[31] | terminate_early) & ~div.done & ~terminate)
div.done <= 1;
else if (div.done)
div.done <= 0;
end
end
endmodule

View file

@ -1,117 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_radix4
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic [div.DATA_WIDTH/2-1:0] shift_count;
logic [div.DATA_WIDTH+1:0] PR;
logic [2:0] new_PR_sign;
logic [div.DATA_WIDTH+2:0] new_PR_1;
logic [div.DATA_WIDTH+2:0] new_PR_2;
logic [div.DATA_WIDTH+2:0] new_PR_3;
logic [div.DATA_WIDTH+1:0] B_1;
logic [div.DATA_WIDTH+1:0] B_2;
logic [div.DATA_WIDTH+1:0] B_3;
//implementation
////////////////////////////////////////////////////
assign new_PR_1 = {1'b0, PR} - {1'b0, B_1};
assign new_PR_2 = {1'b0, PR} - {1'b0, B_2};
assign new_PR_3 = {1'b0, PR} - {1'b0, B_3};
assign new_PR_sign = {new_PR_3[div.DATA_WIDTH+2], new_PR_2[div.DATA_WIDTH+2], new_PR_1[div.DATA_WIDTH+2]};
//Shift reg for
always_ff @ (posedge clk) begin
shift_count <= {shift_count[14:0], div.start};
end
always_ff @ (posedge clk) begin
if (div.start) begin
PR <= {{(div.DATA_WIDTH){1'b0}}, div.dividend[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.dividend[div.DATA_WIDTH-3:0], 2'b00};
B_1 <= {2'b0, div.divisor}; //1xB
B_2 <= {1'b0, div.divisor, 1'b0}; //2xB
B_3 <= {1'b0, div.divisor, 1'b0} + {2'b0, div.divisor}; //3xB
end else if (~terminate) begin
case (new_PR_sign)
3'b111 : begin
PR <= {PR[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.quotient[div.DATA_WIDTH-3:0], 2'b00};
end
3'b110 : begin
PR <= {new_PR_1[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.quotient[div.DATA_WIDTH-3:0], 2'b01};
end
3'b100 : begin
PR <= {new_PR_2[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.quotient[div.DATA_WIDTH-3:0], 2'b10};
end
default: begin //3'b000 : begin
PR <= {new_PR_3[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.quotient[div.DATA_WIDTH-3:0], 2'b11};
end
endcase
end
end
assign div.remainder = PR[div.DATA_WIDTH+1:2];
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~div.divisor[0];
else if (~terminate)
div.divisor_is_zero <= div.divisor_is_zero & ~(|new_PR_sign);
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (div.start)
terminate <= 0;
if (shift_count[15])
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
if (shift_count[15])
div.done <= 1;
else if (div.done)
div.done <= 0;
end
end
endmodule

View file

@ -1,130 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_radix4_ET
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic terminate_early;
logic [div.DATA_WIDTH/2-1:0] shift_count;
logic [div.DATA_WIDTH+1:0] PR;
logic [2:0] new_PR_sign;
logic [div.DATA_WIDTH+2:0] new_PR_1;
logic [div.DATA_WIDTH+2:0] new_PR_2;
logic [div.DATA_WIDTH+2:0] new_PR_3;
logic [div.DATA_WIDTH+1:0] B_1;
logic [div.DATA_WIDTH+1:0] B_2;
logic [div.DATA_WIDTH+1:0] B_3;
//implementation
////////////////////////////////////////////////////
assign new_PR_1 = {1'b0, PR} - {1'b0, B_1};
assign new_PR_2 = {1'b0, PR} - {1'b0, B_2};
assign new_PR_3 = {1'b0, PR} - {1'b0, B_3};
assign new_PR_sign = {new_PR_3[div.DATA_WIDTH+2], new_PR_2[div.DATA_WIDTH+2], new_PR_1[div.DATA_WIDTH+2]};
//Shift reg for
always_ff @ (posedge clk) begin
shift_count <= {shift_count[14:0], div.start & ~terminate_early};
end
assign terminate_early = div.divisor > div.dividend;
always_ff @ (posedge clk) begin
if (div.start) begin
if (terminate_early) begin
PR <= {div.dividend, 2'b00};
div.quotient <= '0;
end
else begin
PR <= {{(div.DATA_WIDTH){1'b0}}, div.dividend[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.dividend[div.DATA_WIDTH-3:0], 2'b00};
end
B_1 <= {2'b0, div.divisor}; //1xB
B_2 <= {1'b0, div.divisor, 1'b0}; //2xB
B_3 <= {1'b0, div.divisor, 1'b0} + {2'b0, div.divisor}; //3xB
end
else if (~terminate) begin
case (new_PR_sign)
3'b111 : begin
PR <= {PR[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.quotient[div.DATA_WIDTH-3:0], 2'b00};
end
3'b110 : begin
PR <= {new_PR_1[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.quotient[div.DATA_WIDTH-3:0], 2'b01};
end
3'b100 : begin
PR <= {new_PR_2[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.quotient[div.DATA_WIDTH-3:0], 2'b10};
end
default: begin //3'b000 : begin
PR <= {new_PR_3[div.DATA_WIDTH-1:0], div.quotient[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
div.quotient <= {div.quotient[div.DATA_WIDTH-3:0], 2'b11};
end
endcase
end
end
assign div.remainder = PR[div.DATA_WIDTH+1:2];
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~div.divisor[0];
else if (~terminate)
div.divisor_is_zero <= div.divisor_is_zero & ~(|new_PR_sign);
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (div.start)
if (terminate_early) begin
terminate <= 1;
end else begin
terminate <= 0;
end
if (shift_count[15])
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
if (div.done)
div.done <= 0;
else if ((~div.start & (shift_count[15])) | (div.start & terminate_early))
div.done <= 1;
end
end
endmodule

View file

@ -1,144 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_radix4_ET_full
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic [div.DATA_WIDTH/2-1:0] shift_count;
logic [div.DATA_WIDTH+1:0] PR;
logic [2:0] new_PR_sign;
logic [div.DATA_WIDTH+2:0] new_PR_1;
logic [div.DATA_WIDTH+2:0] new_PR_2;
logic [div.DATA_WIDTH+2:0] new_PR_3;
logic [div.DATA_WIDTH+1:0] B_1;
logic [div.DATA_WIDTH+1:0] B_2;
logic [div.DATA_WIDTH+1:0] B_3;
logic [div.DATA_WIDTH-1:0] B_r;
logic [div.DATA_WIDTH-1:0] AR_r;
logic [div.DATA_WIDTH-1:0] Q_temp;
logic [5:0] shift_num_R;
logic [5:0] shift_num_Q;
logic [div.DATA_WIDTH*2:0] combined;
logic terminate_early;
//implementation
////////////////////////////////////////////////////
assign new_PR_1 = {1'b0, PR} - {1'b0, B_1};
assign new_PR_2 = {1'b0, PR} - {1'b0, B_2};
assign new_PR_3 = {1'b0, PR} - {1'b0, B_3};
assign new_PR_sign = {new_PR_3[div.DATA_WIDTH+2], new_PR_2[div.DATA_WIDTH+2], new_PR_1[div.DATA_WIDTH+2]};
//Shift reg for
always_ff @ (posedge clk) begin
if (div.start)
shift_count <= 1;
else
shift_count <= {shift_count[14:0], div.start};
end
always_ff @ (posedge clk) begin
if (div.start) begin
shift_num_R <= 2;
shift_num_Q <= 32;
end
else if (~terminate & ~terminate_early) begin
shift_num_R <= shift_num_R + 2;
shift_num_Q <= shift_num_Q - 2;
end
end
assign combined = {PR[div.DATA_WIDTH:0], AR_r} >> shift_num_R;
assign div.remainder = combined[div.DATA_WIDTH-1:0];
assign terminate_early = B_r > div.remainder;
assign div.quotient = terminate_early ? (Q_temp << shift_num_Q) : Q_temp;
always_ff @ (posedge clk) begin
if (div.start) begin
PR <= {{(div.DATA_WIDTH){1'b0}}, div.dividend[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= '0;
AR_r <= {div.dividend[div.DATA_WIDTH-3:0], 2'b00};
B_r <= div.divisor;
B_1 <= {2'b0, div.divisor}; //1xB
B_2 <= {1'b0, div.divisor, 1'b0}; //2xB
B_3 <= {1'b0, div.divisor, 1'b0} + {2'b0, div.divisor}; //3xB
end else if (~terminate & ~terminate_early) begin
AR_r <= {AR_r[div.DATA_WIDTH-3:0], 2'b00};
case (new_PR_sign)
3'b111 : begin
PR <= {PR[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= {div.quotient[div.DATA_WIDTH-3:0], 2'b00};
end
3'b110 : begin
PR <= {new_PR_1[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= {div.quotient[div.DATA_WIDTH-3:0], 2'b01};
end
3'b100 : begin
PR <= {new_PR_2[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= {div.quotient[div.DATA_WIDTH-3:0], 2'b10};
end
default: begin //3'b000 : begin
PR <= {new_PR_3[div.DATA_WIDTH-1:0], AR_r[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_temp <= {div.quotient[div.DATA_WIDTH-3:0], 2'b11};
end
endcase
end
end
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~div.divisor[0];
else if (~terminate)
div.divisor_is_zero <= div.divisor_is_zero & ~(|new_PR_sign);
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (div.start)
terminate <= 0;
else if (shift_count[15] | terminate_early)
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
if (~div.start & (shift_count[15] | terminate_early) & ~div.done & ~terminate)
div.done <= 1;
else if (div.done)
div.done <= 0;
end
end
endmodule

View file

@ -1,162 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_radix8
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic [10:0] shift_count;
logic [div.DATA_WIDTH+2:0] PR;
logic [div.DATA_WIDTH:0] Q_33;
logic [6:0] new_PR_sign;
logic [div.DATA_WIDTH+3:0] new_PR_1;
logic [div.DATA_WIDTH+3:0] new_PR_2;
logic [div.DATA_WIDTH+3:0] new_PR_3;
logic [div.DATA_WIDTH+3:0] new_PR_4;
logic [div.DATA_WIDTH+3:0] new_PR_5;
logic [div.DATA_WIDTH+3:0] new_PR_6;
logic [div.DATA_WIDTH+3:0] new_PR_7;
logic [div.DATA_WIDTH+2:0] B_1;
logic [div.DATA_WIDTH+2:0] B_2;
logic [div.DATA_WIDTH+2:0] B_3;
logic [div.DATA_WIDTH+2:0] B_4;
logic [div.DATA_WIDTH+2:0] B_5;
logic [div.DATA_WIDTH+2:0] B_6;
logic [div.DATA_WIDTH+2:0] B_7;
//implementation
////////////////////////////////////////////////////
// assign new_PR_1 = {1'b0, PR} - div.divisor;
// assign new_PR_2 = {1'b0, PR} - {div.divisor, 1'b0};
// assign new_PR_3 = {1'b0, PR} - {div.divisor, 1'b0} - div.divisor;
// assign new_PR_4 = {1'b0, PR} - {div.divisor, 2'b0};
// assign new_PR_5 = {1'b0, PR} - {div.divisor, 2'b0} - div.divisor;
// assign new_PR_6 = {1'b0, PR} - {div.divisor, 2'b0} - {div.divisor, 1'b0};
// assign new_PR_7 = {1'b0, PR} - {div.divisor, 2'b0} - {div.divisor, 1'b0} - div.divisor;
assign new_PR_1 = {1'b0, PR} - B_1;
assign new_PR_2 = {1'b0, PR} - B_2;
assign new_PR_3 = {1'b0, PR} - B_3;
assign new_PR_4 = {1'b0, PR} - B_4;
assign new_PR_5 = {1'b0, PR} - B_5;
assign new_PR_6 = {1'b0, PR} - B_6;
assign new_PR_7 = {1'b0, PR} - B_7;
assign new_PR_sign = {new_PR_7[div.DATA_WIDTH+3], new_PR_6[div.DATA_WIDTH+3], new_PR_5[div.DATA_WIDTH+3],
new_PR_4[div.DATA_WIDTH+3], new_PR_3[div.DATA_WIDTH+3], new_PR_2[div.DATA_WIDTH+3],
new_PR_1[div.DATA_WIDTH+3]};
//Shift reg for
always_ff @ (posedge clk) begin
shift_count <= {shift_count[9:0], div.start};
end
always_ff @ (posedge clk) begin
if (div.start) begin
PR <= {{(div.DATA_WIDTH){1'b0}}, 1'b0, div.dividend[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_33 <= {div.dividend[div.DATA_WIDTH-3:0], 3'b000};
B_1 <= {3'b000, div.divisor};
B_2 <= {2'b00, div.divisor, 1'b0};
B_3 <= {2'b00, div.divisor, 1'b0} + {3'b0, div.divisor};
B_4 <= {1'b0, div.divisor, 2'b00};
B_5 <= {1'b0, div.divisor, 2'b00} + {3'b0, div.divisor};
B_6 <= {1'b0, div.divisor, 2'b00} + {2'b00, div.divisor, 1'b0};
B_7 <= {1'b0, div.divisor, 2'b00} + {2'b00, div.divisor, 1'b0} + {3'b0, div.divisor};
end
else if (~terminate) begin
case (new_PR_sign)
7'b1111111 : begin
PR <= {PR[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b000};
end
7'b1111110 : begin
PR <= {new_PR_1[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b001};
end
7'b1111100 : begin
PR <= {new_PR_2[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b010};
end
7'b1111000 : begin
PR <= {new_PR_3[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b011};
end
7'b1110000 : begin
PR <= {new_PR_4[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b100};
end
7'b1100000 : begin
PR <= {new_PR_5[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b101};
end
7'b1000000 : begin
PR <= {new_PR_6[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b110};
end
default: begin //7'b0000000 : begin
PR <= {new_PR_7[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b111};
end
endcase
end
end
assign div.remainder = PR[div.DATA_WIDTH+2:3];
assign div.quotient = Q_33[div.DATA_WIDTH-1:0];
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~div.divisor[0];
else if (~terminate)
div.divisor_is_zero <= div.divisor_is_zero & ~(|new_PR_sign);
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (div.start)
terminate <= 0;
if (shift_count[10])
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
if (shift_count[10])
div.done <= 1;
else if (div.done)
div.done <= 0;
end
end
endmodule

View file

@ -1,152 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Alec Lu <alec_lu@sfu.ca>
*/
module div_radix8_ET
(
input logic clk,
input logic rst,
unsigned_division_interface.divider div
);
logic terminate;
logic terminate_early;
logic [10:0] shift_count;
logic [div.DATA_WIDTH+2:0] PR;
logic [div.DATA_WIDTH:0] Q_33;
logic [6:0] new_PR_sign;
logic [div.DATA_WIDTH+3:0] new_PR_1;
logic [div.DATA_WIDTH+3:0] new_PR_2;
logic [div.DATA_WIDTH+3:0] new_PR_3;
logic [div.DATA_WIDTH+3:0] new_PR_4;
logic [div.DATA_WIDTH+3:0] new_PR_5;
logic [div.DATA_WIDTH+3:0] new_PR_6;
logic [div.DATA_WIDTH+3:0] new_PR_7;
//implementation
////////////////////////////////////////////////////
assign new_PR_1 = {1'b0, PR} - {4'b0, div.divisor};
assign new_PR_2 = {1'b0, PR} - {3'b0, div.divisor, 1'b0};
assign new_PR_3 = {1'b0, PR} - {3'b0, div.divisor, 1'b0} - {4'b0, div.divisor};
assign new_PR_4 = {1'b0, PR} - {2'b0, div.divisor, 2'b0};
assign new_PR_5 = {1'b0, PR} - {2'b0, div.divisor, 2'b0} - {4'b0, div.divisor};
assign new_PR_6 = {1'b0, PR} - {2'b0, div.divisor, 2'b0} - {3'b0, div.divisor, 1'b0};
assign new_PR_7 = {1'b0, PR} - {2'b0, div.divisor, 2'b0} - {3'b0, div.divisor, 1'b0} - {4'b0, div.divisor};
assign new_PR_sign = {new_PR_7[div.DATA_WIDTH+3], new_PR_6[div.DATA_WIDTH+3], new_PR_5[div.DATA_WIDTH+3],
new_PR_4[div.DATA_WIDTH+3], new_PR_3[div.DATA_WIDTH+3], new_PR_2[div.DATA_WIDTH+3],
new_PR_1[div.DATA_WIDTH+3]};
//Shift reg for
always_ff @ (posedge clk) begin
shift_count <= {shift_count[9:0], div.start & ~terminate_early};
end
assign terminate_early = div.divisor > div.dividend;
always_ff @ (posedge clk) begin
if (div.start) begin
if (terminate_early) begin
PR <= {div.dividend, 3'b000};
Q_33 <= '0;
end else begin
PR <= {{(div.DATA_WIDTH){1'b0}}, 1'b0, div.dividend[div.DATA_WIDTH-1:div.DATA_WIDTH-2]};
Q_33 <= {div.dividend[div.DATA_WIDTH-3:0], 3'b000};
end
end
else if (~terminate) begin
case (new_PR_sign)
7'b1111111 : begin
PR <= {PR[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b000};
end
7'b1111110 : begin
PR <= {new_PR_1[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b001};
end
7'b1111100 : begin
PR <= {new_PR_2[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b010};
end
7'b1111000 : begin
PR <= {new_PR_3[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b011};
end
7'b1110000 : begin
PR <= {new_PR_4[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b100};
end
7'b1100000 : begin
PR <= {new_PR_5[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b101};
end
7'b1000000 : begin
PR <= {new_PR_6[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b110};
end
default: begin //7'b0000000 : begin
PR <= {new_PR_7[div.DATA_WIDTH-1:0], Q_33[div.DATA_WIDTH:div.DATA_WIDTH-2]};
Q_33 <= {Q_33[div.DATA_WIDTH-3:0], 3'b111};
end
endcase
end
end
assign div.remainder = PR[div.DATA_WIDTH+2:3];
assign div.quotient = Q_33[div.DATA_WIDTH-1:0];
always_ff @ (posedge clk) begin
if (div.start)
div.divisor_is_zero <= ~div.divisor[0];
else if (~terminate)
div.divisor_is_zero <= div.divisor_is_zero & ~(|new_PR_sign);
end
always_ff @ (posedge clk) begin
if (rst)
terminate <= 0;
else begin
if (div.start)
if (terminate_early) begin
terminate <= 1;
end else begin
terminate <= 0;
end
if (shift_count[10])
terminate <= 1;
end
end
always_ff @ (posedge clk) begin
if (rst)
div.done <= 0;
else begin
if (div.done)
div.done <= 0;
else if ((~div.start & (shift_count[10])) | (div.start & terminate_early))
div.done <= 1;
end
end
endmodule

View file

@ -33,7 +33,7 @@ module div_unit
input div_inputs_t div_inputs,
unit_issue_interface.unit issue,
output unit_writeback_t wb
unit_writeback_interface.unit wb
);
logic signed_divop;
@ -47,17 +47,22 @@ module div_unit
logic remainder_op;
typedef struct packed{
logic [XLEN-1:0] unsigned_dividend;
logic [XLEN-1:0] unsigned_divisor;
logic remainder_op;
logic negate_quotient;
logic negate_remainder;
logic reuse_result;
instruction_id_t instruction_id;
id_t id;
} div_attributes_t;
typedef struct packed{
logic [XLEN-1:0] unsigned_dividend;
logic [XLEN-1:0] unsigned_divisor;
div_attributes_t attr;
} div_fifo_inputs_t;
div_fifo_inputs_t fifo_inputs;
div_fifo_inputs_t div_op;
div_attributes_t in_progress_attr;
unsigned_division_interface #(.DATA_WIDTH(32)) div_core();
@ -90,36 +95,40 @@ module div_unit
assign fifo_inputs.unsigned_dividend = unsigned_dividend;
assign fifo_inputs.unsigned_divisor = unsigned_divisor;
assign fifo_inputs.remainder_op = div_inputs.op[1];
assign fifo_inputs.negate_quotient = negate_quotient;
assign fifo_inputs.negate_remainder = negate_remainder;
assign fifo_inputs.reuse_result = div_inputs.reuse_result;
assign fifo_inputs.instruction_id = issue.instruction_id;
assign fifo_inputs.attr.remainder_op = div_inputs.op[1];
assign fifo_inputs.attr.negate_quotient = negate_quotient;
assign fifo_inputs.attr.negate_remainder = negate_remainder;
assign fifo_inputs.attr.reuse_result = div_inputs.reuse_result;
assign fifo_inputs.attr.id = issue.id;
////////////////////////////////////////////////////
//Input FIFO
taiga_fifo #(.DATA_WIDTH($bits(div_fifo_inputs_t)), .FIFO_DEPTH(MAX_INFLIGHT_COUNT))
taiga_fifo #(.DATA_WIDTH($bits(div_fifo_inputs_t)), .FIFO_DEPTH(1))
div_input_fifo (.fifo(input_fifo), .*);
assign input_fifo.data_in = fifo_inputs;
assign input_fifo.push = issue.possible_issue;
assign input_fifo.supress_push = gc_fetch_flush;
assign issue.ready = 1; //As FIFO depth is the same as MAX_INFLIGHT_COUNT
assign input_fifo.pop = div_done;
assign input_fifo.push = issue.new_request;
assign input_fifo.potential_push = issue.possible_issue;
assign issue.ready = ~input_fifo.full | input_fifo.pop; //As FIFO depth is the same as MAX_INFLIGHT_COUNT
assign input_fifo.pop = input_fifo.valid & (~in_progress);//wb.done & wb.ack;
assign div_op = input_fifo.data_out;
////////////////////////////////////////////////////
//Control Signals
assign div_core.start = input_fifo.valid & (~in_progress) & ~div_op.reuse_result;
assign div_done = div_core.done | (input_fifo.valid & div_op.reuse_result);
assign div_core.start = input_fifo.valid & (~in_progress) & ~div_op.attr.reuse_result;
assign div_done = div_core.done | (in_progress & in_progress_attr.reuse_result);
//If more than one cycle, set in_progress so that multiple div.start signals are not sent to the div unit.
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE('0)) in_progress_m (
.clk, .rst,
.set(div_core.start),
.clr(div_core.done),
.set(input_fifo.valid & (~in_progress)),
.clr(wb.ack),
.result(in_progress)
);
always_ff @ (posedge clk) begin
if (input_fifo.pop)
in_progress_attr <= div_op.attr;
end
////////////////////////////////////////////////////
//Div core
@ -129,10 +138,18 @@ module div_unit
////////////////////////////////////////////////////
//Output
assign negate_result = div_op.remainder_op ? div_op.negate_remainder : (~div_core.divisor_is_zero & div_op.negate_quotient);
assign wb.rd = negate_if (div_op.remainder_op ? div_core.remainder : ({32{div_core.divisor_is_zero}} | div_core.quotient), negate_result);
assign wb.done = div_done;
assign wb.id = div_op.instruction_id;
logic done_r;
assign negate_result = in_progress_attr.remainder_op ? in_progress_attr.negate_remainder : (~div_core.divisor_is_zero & in_progress_attr.negate_quotient);
assign wb.rd = negate_if (in_progress_attr.remainder_op ? div_core.remainder : ({32{div_core.divisor_is_zero}} | div_core.quotient), negate_result);
always_ff @ (posedge clk) begin
if (wb.ack)
done_r <= 0;
else if (div_done)
done_r <= 1;
end
assign wb.done = div_done | done_r;
assign wb.id = in_progress_attr.id;
////////////////////////////////////////////////////
//Assertions

View file

@ -34,23 +34,23 @@ module fetch(
input logic exception,
input logic [31:0] gc_fetch_pc,
//ID Support
input logic pc_id_available,
output logic pc_id_assigned,
output logic fetch_complete,
branch_predictor_interface.fetch bp,
ras_interface.fetch ras,
//Instruction Metadata
output logic [31:0] if_pc,
output logic [31:0] fetch_instruction,
tlb_interface.mem tlb,
local_memory_interface.master instruction_bram,
input logic icache_on,
l1_arbiter_request_interface.master l1_request,
l1_arbiter_return_interface.master l1_response,
input logic pre_decode_pop,
output logic [31:0] pre_decode_instruction,
output logic [31:0] pre_decode_pc,
output branch_predictor_metadata_t branch_metadata,
output logic branch_prediction_used,
output logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way,
output logic pre_decode_push
l1_arbiter_return_interface.master l1_response
);
localparam NUM_SUB_UNITS = USE_I_SCRATCH_MEM + USE_ICACHE;
@ -59,7 +59,6 @@ module fetch(
localparam BRAM_ID = 0;
localparam ICACHE_ID = USE_I_SCRATCH_MEM;
localparam FETCH_BUFFER_DEPTH_W = $clog2(FETCH_BUFFER_DEPTH);
localparam NEXT_ID_DEPTH = USE_ICACHE ? 2 : 1;
//Subunit signals
@ -76,9 +75,8 @@ module fetch(
logic [31:0] pc;
logic flush_or_rst;
logic [FETCH_BUFFER_DEPTH_W:0] inflight_count;
fifo_interface #(.DATA_WIDTH(NUM_SUB_UNITS_W)) next_unit();
logic space_in_inst_buffer;
logic new_mem_request;
//Cache related
@ -96,22 +94,28 @@ module fetch(
pc <= {next_pc[31:2], 2'b0};
end
logic [31:0] pc_plus_4;
assign pc_plus_4 = pc + 4;
always_comb begin
if (branch_flush)
next_pc = bp.branch_flush_pc;
else if (gc_fetch_pc_override)
if (gc_fetch_pc_override)
next_pc = gc_fetch_pc;
else if (branch_flush)
next_pc = bp.branch_flush_pc;
else if (bp.use_prediction)
next_pc = (bp.use_ras & ras.valid) ? ras.addr : bp.predicted_pc;
next_pc = bp.is_return ? ras.addr : bp.predicted_pc;
else
next_pc = pc + 4;
next_pc = pc_plus_4;
end
assign bp.new_mem_request = new_mem_request | gc_fetch_flush;
assign bp.next_pc = next_pc;
assign bp.if_pc = pc;
assign ras.pop = bp.use_prediction & bp.is_return & ~branch_flush & ~gc_fetch_pc_override & new_mem_request;
assign ras.push = bp.use_prediction & bp.is_call & ~branch_flush & ~gc_fetch_pc_override & new_mem_request;
assign ras.new_addr = pc_plus_4;
assign ras.branch_fetched = bp.use_prediction & bp.is_branch & new_mem_request; //flush not needed as FIFO resets inside of RAS
////////////////////////////////////////////////////
//TLB
assign tlb.virtual_address = pc;
@ -127,19 +131,13 @@ module fetch(
//Issue Control Signals
assign flush_or_rst = (rst | gc_fetch_flush);
always_ff @(posedge clk) begin
if (flush_or_rst)
inflight_count <= '1;
else
inflight_count <= inflight_count - (FETCH_BUFFER_DEPTH_W+1)'(new_mem_request) + (FETCH_BUFFER_DEPTH_W+1)'(pre_decode_pop);
end
assign space_in_inst_buffer = inflight_count[FETCH_BUFFER_DEPTH_W];
assign new_mem_request = tlb.complete & space_in_inst_buffer & units_ready;
assign new_mem_request = tlb.complete & pc_id_available & units_ready;
assign pc_id_assigned = new_mem_request & ~gc_fetch_flush;
//////////////////////////////////////////////
//Subunit Tracking
assign next_unit.push = new_mem_request;
assign next_unit.potential_push = new_mem_request;
assign next_unit.pop = units_data_valid;
one_hot_to_integer #(NUM_SUB_UNITS) hit_way_conv (.*, .one_hot(sub_unit_address_match), .int_out(next_unit.data_in));
taiga_fifo #(.DATA_WIDTH(NUM_SUB_UNITS_W), .FIFO_DEPTH(NEXT_ID_DEPTH))
@ -147,6 +145,9 @@ module fetch(
////////////////////////////////////////////////////
//Subunit Interfaces
//In the case of a gc_fetch_flush, a request may already be in progress
//for any sub unit. That request can either be completed or aborted.
//In either case, data_valid must NOT be asserted.
logic cache_address_match;
generate
for (i = 0; i < NUM_SUB_UNITS; i++) begin
@ -175,17 +176,9 @@ module fetch(
endgenerate
////////////////////////////////////////////////////
//Pre-Decode Output
assign pre_decode_instruction = unit_data_array[next_unit.data_out];
assign pre_decode_pc = stage2_phys_address;
assign pre_decode_push = units_data_valid;//FIFO is cleared on gc_fetch_flush
always_ff @(posedge clk) begin
if (new_mem_request) begin
branch_metadata <= bp.metadata;
branch_prediction_used <= bp.use_prediction;
bp_update_way <= bp.update_way;
end
end
//Instruction metada updates
assign if_pc = pc;
assign fetch_instruction = unit_data_array[next_unit.data_out];
assign fetch_complete = units_data_valid;
endmodule

View file

@ -33,20 +33,19 @@ module gc_unit(
//Decode
unit_issue_interface.unit issue,
input gc_inputs_t gc_inputs,
input logic instruction_issued_no_rd,
input logic gc_flush_required,
//Branch miss predict
input logic branch_flush,
//instruction misalignement
input logic potential_branch_exception,
input exception_packet_t br_exception,
input logic branch_exception_is_jump,
input exception_packet_t br_exception,
//Illegal instruction
input logic illegal_instruction,
//Load Store Unit
input exception_packet_t ls_exception,
input logic ls_exception_valid,
input logic ls_exception_is_store,
//TLBs
output logic tlb_on,
@ -56,10 +55,17 @@ module gc_unit(
mmu_interface.csr immu,
mmu_interface.csr dmmu,
//ID Management
output logic system_op_or_exception_complete,
output logic exception_with_rd_complete,
output id_t system_op_or_exception_id,
//Exception
input logic [31:0] exception_pc,
//WB
input logic instruction_complete,
input logic instruction_queue_empty,
input instruction_id_t oldest_id,
input logic [$clog2(MAX_COMPLETE_COUNT)-1:0] retire_inc,
input logic instruction_retired,
//unit_writeback_interface.unit gc_wb,
//External
@ -73,14 +79,13 @@ module gc_unit(
output logic gc_fetch_pc_override,
output logic gc_supress_writeback,
output logic ls_exception_ack,
output logic [31:0] gc_fetch_pc,
//Write-back to Load-Store Unit
output logic[31:0] csr_rd,
output instruction_id_t csr_id,
output logic csr_done
output id_t csr_id,
output logic csr_done,
input logic ls_is_idle
);
//Largest depth for TLBs
@ -100,42 +105,27 @@ module gc_unit(
//SFENCE
// flush and hold fetch, wait until L/S input FIFO empty, hold fetch until TLB update complete
//ECALL, EBREAK, SRET, MRET:
// flush fetch, update CSRs (could be illegal instruction exception as well)
// flush fetch, update to CSRs is pipelined
//Interrupt
//Hold issue, wait until IDLE state, flush fetch, take exception
//wait until issue/execute exceptions are no longer possible, flush fetch, take exception
//Fetch Exception
//flush fetch, wait until IDLE state, take exception. If decode stage or later exception occurs first, exception is overridden
//Fetch Exception (TLB and MMU) (fetch stage)
//flush fetch, wait until issue/execute exceptions are no longer possible, take exception. If decode stage or later exception occurs first, exception is overridden
//Illegal opcode (decode stage)
//fetch flush, issue hold, wait until IDLE state, take exception. If execute or later exception occurs first, exception is overridden
//Illegal opcode (issue stage)
//fetch flush, take exception. If execute or later exception occurs first, exception is overridden
//CSR exceptions
//fetch flush, issue hold, capture ID/rd_non_zero and drain instruction queue, take exception.
//Branch exceptions (issue/execute stage)
//fetch flush, take exception.
//LS exceptions (miss-aligned, TLB and MMU)
//fetch flush, issue hold, capture ID/rd_non_zero and drain instruction queue, take exception.
//CSR exceptions (issue/execute stage)
//fetch flush, take exception.
//Instruction queue drain:
// Two possibilities:
// 1. Instruction stores to reg file. ID in instruction queue, wait until that ID is oldest (either find oldest valid, or for small cycle penalty just look at last entry and wait for ID and valid)
// 2. Instruction does not store to reg file. If IQ not empty, wait for previously issued ID to complete, if empty no waiting required.
//
// After all preceding instructions have been committed, continue popping instructions from queue but supress write-back operation until queue is drained.
//LS exceptions (miss-aligned, TLB and MMU) (issue stage)
//fetch flush, take exception. If execute or later exception occurs first, exception is overridden
//In-order mode:
// Turn on when an instruction in the execute phase could cause an interrupt (L/S or CSR)
// Turn off when exception can no-longer occur (after one cycle for CSR, when L/S input FIFO will be empty)
//*Complete issued instructions before exception
//*Drain L/S FIFO then Hold fetch/issue during TLB clear
//*Hold fetch until all stores committed
//*Turn on inorder mode when L/S issued, turn off when no instruction can cause interrupt
// *If in-order mode and inflight queue empty, disable zero cycle write-back (eg. ALU)
//*Hold fetch during potential fetch exception, when fetch buffer drained, if no other exceptions trigger exception
typedef enum {RST_STATE, IDLE_STATE, TLB_CLEAR_STATE, IQ_DRAIN, IQ_DISCARD} gc_state;
typedef enum {RST_STATE, IDLE_STATE, TLB_CLEAR_STATE, IQ_DRAIN} gc_state;
gc_state state;
gc_state next_state;
gc_state prev_state;
@ -152,6 +142,9 @@ module gc_unit(
logic [XLEN-1:0] wb_csr;
csr_inputs_t csr_inputs;
exception_packet_t gc_exception;
exception_packet_t gc_exception_r;
id_t exception_or_system_id;
exception_packet_t csr_exception;
logic [1:0] current_privilege;
logic [31:0] trap_pc;
@ -171,19 +164,27 @@ module gc_unit(
logic processing_csr;
logic csr_ready_to_complete;
logic csr_ready_to_complete_r;
instruction_id_t instruction_id;
id_t instruction_id;
instruction_id_t exception_id;
instruction_id_t exception_id_r;
//implementation
////////////////////////////////////////////////////
//Implementation
//Input registering
always_ff @(posedge clk) begin
if (issue.possible_issue) begin
if (issue.possible_issue & ~gc_issue_hold) begin
stage1 <= gc_inputs;
end
end
////////////////////////////////////////////////////
//ID Management
always_ff @(posedge clk) begin
system_op_or_exception_complete <=
(issue.new_request & (gc_inputs.is_ret | gc_inputs.is_fence | gc_inputs.is_i_fence)) |
gc_exception.valid;
system_op_or_exception_id <= exception_or_system_id;
exception_with_rd_complete <= (ls_exception.valid & ~ls_exception_is_store) | (br_exception.valid & branch_exception_is_jump);
end
//Instruction decode
assign opcode = stage1.instruction[6:0];
assign opcode_trim = opcode[6:2];
@ -196,15 +197,11 @@ module gc_unit(
assign gc_fetch_flush = branch_flush | gc_fetch_pc_override;
always_ff @ (posedge clk) begin
gc_issue_hold <= issue.new_request || processing_csr || (next_state inside {TLB_CLEAR_STATE, IQ_DRAIN, IQ_DISCARD}) || potential_branch_exception;
gc_issue_hold <= issue.new_request || second_cycle_flush || processing_csr || (next_state inside {TLB_CLEAR_STATE, IQ_DRAIN}) || potential_branch_exception;
end
always_ff @ (posedge clk) begin
gc_issue_flush <= (next_state == IQ_DISCARD);
end
always_ff @ (posedge clk) begin
gc_supress_writeback <= next_state inside {TLB_CLEAR_STATE, IQ_DISCARD} ? 1 : 0;
gc_supress_writeback <= next_state inside {TLB_CLEAR_STATE} ? 1 : 0;
end
////////////////////////////////////////////////////
@ -228,13 +225,12 @@ module gc_unit(
case (state)
RST_STATE : next_state = IDLE_STATE;
IDLE_STATE : begin
if (ls_exception.valid | (branch_exception_is_jump & potential_branch_exception)) begin
next_state = (exception_id == oldest_id) ? IQ_DISCARD : IQ_DRAIN;
if (ls_exception.valid | potential_branch_exception) begin
next_state = IQ_DRAIN;
end
end
TLB_CLEAR_STATE : if (tlb_clear_done) next_state = IDLE_STATE;
IQ_DRAIN : if (exception_id_r == oldest_id) next_state = IQ_DISCARD;
IQ_DISCARD : if (instruction_queue_empty) next_state = IDLE_STATE;
IQ_DRAIN : next_state = IDLE_STATE;
default : next_state = RST_STATE;
endcase
end
@ -250,7 +246,6 @@ module gc_unit(
end
////////////////////////////////////////////////////
//Exception handling
logic processing_ls_exception;
//The type of call instruction is depedent on the current privilege level
always_comb begin
@ -262,43 +257,27 @@ module gc_unit(
endcase
end
always_ff @(posedge clk) begin
if (gc_exception.valid)
processing_ls_exception <= ls_exception.valid;
end
assign ls_exception_ack = processing_ls_exception && (prev_state inside {IDLE_STATE, IQ_DRAIN}) && (state == IQ_DISCARD);
assign exception_id =
potential_branch_exception ? br_exception.id :
(ls_exception.valid ? ls_exception.id : issue.instruction_id);
always_ff @(posedge clk) begin
if (gc_exception.valid)
exception_id_r <= exception_id;
end
assign exception_or_system_id =
br_exception.valid ? br_exception.id :
(ls_exception.valid ? ls_exception.id : issue.id);
//TODO: check if possible to convert to unique if, verify potential for overlap
always_comb begin
//PC sourced from instruction metadata table
if (br_exception.valid) begin
gc_exception.code = br_exception.code;
gc_exception.pc = br_exception.pc;
gc_exception.tval = br_exception.tval;
end else if (illegal_instruction) begin
gc_exception.code = ILLEGAL_INST;
gc_exception.pc = gc_inputs.pc;
gc_exception.tval = gc_inputs.instruction;//optional, can be zero instead
end else if (ls_exception.valid) begin
gc_exception.code = ls_exception.code;
gc_exception.pc = ls_exception.pc;
gc_exception.tval = ls_exception.tval;
end else if (gc_inputs.is_ecall) begin
gc_exception.code = ecall_code;
gc_exception.pc = gc_inputs.pc;
gc_exception.tval = '0;
end else begin
gc_exception.code = BREAK;
gc_exception.pc = gc_inputs.pc;
gc_exception.tval = '0;
end
end
@ -310,6 +289,7 @@ module gc_unit(
//Two cycles: on first cycle the processor front end is flushed,
//on the second cycle the new PC is fetched
always_ff @ (posedge clk) begin
gc_exception_r <= gc_exception;
second_cycle_flush <= gc_flush_required;
gc_fetch_pc_override <= gc_flush_required | second_cycle_flush | ls_exception.valid | br_exception.valid;
if (gc_exception.valid | stage1.is_i_fence | (issue.new_request & gc_inputs.is_ret)) begin
@ -327,7 +307,30 @@ module gc_unit(
assign csr_inputs.rs1_is_zero = (rs1_addr == 0);
assign csr_inputs.rd_is_zero = (rd_addr == 0);
csr_regs csr_registers (.*, .new_request(stage1.is_csr), .read_regs(csr_ready_to_complete), .commit(csr_ready_to_complete_r));
csr_regs csr_registers (
.clk(clk), .rst(rst),
.csr_inputs(csr_inputs),
.new_request(stage1.is_csr),
.read_regs(csr_ready_to_complete),
.commit(csr_ready_to_complete_r),
.gc_exception(gc_exception_r),
.csr_exception(csr_exception),
.current_privilege(current_privilege),
.exception_pc(exception_pc),
.mret(mret),
.sret(sret),
.tlb_on(tlb_on),
.asid(asid),
.immu(immu),
.dmmu(dmmu),
.retire_inc(retire_inc),
.interrupt(interrupt),
.timer_interrupt(timer_interrupt),
.wb_csr(wb_csr),
.trap_pc(trap_pc),
.csr_mepc(csr_mepc),
.csr_sepc(csr_sepc)
);
////////////////////////////////////////////////////
//Decode / Write-back Handshaking
@ -343,12 +346,12 @@ module gc_unit(
.result(processing_csr)
);
assign csr_ready_to_complete = processing_csr && (oldest_id == instruction_id);
assign csr_ready_to_complete = processing_csr & ls_is_idle;
always_ff @(posedge clk) begin
csr_ready_to_complete_r <= csr_ready_to_complete;
csr_id <= instruction_id;
if (issue.new_request) begin
instruction_id <= issue.instruction_id;
instruction_id <= issue.id;
end
end

View file

@ -29,8 +29,8 @@ module id_inuse (
input logic [4:0] rs1_addr,
input logic [4:0] rs2_addr,
input logic [4:0] issued_rd_addr,
input instruction_id_t issue_id,
input instruction_id_t retired_id,
input id_t issue_id,
input id_t retired_id,
input logic issued,
input logic retired,
output logic rs1_inuse,

View file

@ -1,5 +1,5 @@
/*
* Copyright © 2019 Eric Matthews, Lesley Shannon
* Copyright © 2020 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -20,49 +20,67 @@
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module id_tracking
(
import taiga_config::*;
import taiga_types::*;
(
input logic clk,
input logic rst,
input logic issued,
input logic retired,
input logic gc_fetch_flush,
//ID issuing
output id_t next_id,
output logic id_available,
output instruction_id_t oldest_id,
output instruction_id_t next_id,
output logic empty
);
input id_assigned,
// m
//Decode ID
input id_t decode_id,
input decode_issued,
output decode_id_valid,
//Issue stage
input id_t issue_id,
input instruction_issued,
);
//////////////////////////////////////////
localparam LOG2_MAX_INFLIGHT_COUNT = $clog2(MAX_INFLIGHT_COUNT);
logic [LOG2_MAX_INFLIGHT_COUNT:0] inflight_count;
localparam LOG2_MAX_IDS = $clog2(MAX_IDS);
fifo_interface #(.DATA_WIDTH($bits(id_t))) fetched_ids();
////////////////////////////////////////////////////
//Implementation
always_ff @ (posedge clk) begin
if (rst)
oldest_id <= 0;
else
oldest_id <= oldest_id + LOG2_MAX_INFLIGHT_COUNT'(retired);
end
always_ff @ (posedge clk) begin
if (rst)
next_id <= 0;
else
next_id <= next_id + LOG2_MAX_INFLIGHT_COUNT'(issued);
next_id <= next_id + LOG2_MAX_IDS'(id_assigned);
end
//Upper bit is id_available
assign fetched_ids.push = id_assigned;
assign fetched_ids.pop = decode_issued;
assign fetched_ids.data_in = next_id;
assign decode_id = fetched_ids.data_out;
assign decode_id_valid = fetched_ids.valid;
taiga_fifo #(.DATA_WIDTH($bits(id_t)), .FIFO_DEPTH(MAX_IDS))
fetched_ids_fifo (.fifo(fetched_ids), .rst(rst | gc_fetch_flush), .*);
always_ff @ (posedge clk) begin
if (rst)
inflight_count <= '1;
id_available <= 0;
else
inflight_count <= inflight_count + (LOG2_MAX_INFLIGHT_COUNT+1)'(retired) - (LOG2_MAX_INFLIGHT_COUNT+1)'(issued);
id_available <=
end
assign empty = &inflight_count;//all ones
assign id_available = inflight_count[LOG2_MAX_INFLIGHT_COUNT];
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
@ -70,7 +88,6 @@ module id_tracking
////////////////////////////////////////////////////
//Assertions
always_ff @ (posedge clk) begin
assert (rst | !(~rst & ~id_available & issued)) else $error("Issued without valid ID!");
assert (rst | !(~rst & empty & (retired & ~issued))) else $error("Retired without any instruction inflight!");
assert (rst | !(~rst & ~id_available & id_assigned)) else $error("Issued without valid ID!");
end
endmodule

View file

@ -0,0 +1,369 @@
/*
* Copyright © 2020 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module instruction_metadata_and_id_management
import taiga_config::*;
import taiga_types::*;
(
input logic clk,
input logic rst,
input logic gc_fetch_flush,
//Fetch
output id_t pc_id,
output logic pc_id_available,
input logic [31:0] if_pc,
input logic pc_id_assigned,
output id_t fetch_id,
input logic fetch_complete,
input logic [31:0] fetch_instruction,
//Decode ID
output decode_packet_t decode,
input logic decode_advance,
//Issue stage
input issue_packet_t issue,
input logic instruction_issued,
output id_t rs_id [REGFILE_READ_PORTS],
output logic rs_inuse [REGFILE_READ_PORTS],
output logic rs_id_inuse[REGFILE_READ_PORTS],
//Branch Predictor
input branch_metadata_t branch_metadata_if,
output branch_metadata_t branch_metadata_ex,
//ID freeing
input logic store_complete,
input id_t store_id,
input logic branch_complete,
input id_t branch_id,
input logic system_op_or_exception_complete,
input logic exception_with_rd_complete,
input id_t system_op_or_exception_id,
output logic [$clog2(MAX_COMPLETE_COUNT)-1:0] retire_inc,
//Writeback/Register File
input id_t ids_retiring [COMMIT_PORTS],
input logic retired [COMMIT_PORTS],
output logic [4:0] retired_rd_addr [COMMIT_PORTS],
output id_t id_for_rd [COMMIT_PORTS],
//Exception
output logic [31:0] exception_pc
);
//////////////////////////////////////////
logic [31:0] pc_table [MAX_IDS];
logic [31:0] instruction_table [MAX_IDS];
logic [$bits(branch_metadata_t)-1:0] branch_metadata_table [MAX_IDS];
logic [31:0] rd_table [MAX_IDS];
id_t pc_id_i;
localparam LOG2_MAX_IDS = $clog2(MAX_IDS);
//FIFO to store IDs that have been fetched but not yet decoded
fifo_interface #(.DATA_WIDTH(LOG2_MAX_IDS)) fetch_fifo();
//Toggle memory results for tracking completion after issue
logic decoded_status;
logic decoded_issued_status;
logic issued_status;
logic issued_status_rs [REGFILE_READ_PORTS];
logic branch_complete_status;
logic store_complete_status;
logic system_op_or_exception_complete_status;
logic exception_with_rd_complete_status_rs [REGFILE_READ_PORTS];
logic [COMMIT_PORTS-1:0] retired_status;
logic [COMMIT_PORTS-1:0] retired_status_rs [REGFILE_READ_PORTS];
logic [$clog2(MAX_COMPLETE_COUNT)-1:0] complete_count;
//Writes to register file
id_t rd_to_id_table [32];
genvar i;
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
//Instruction Metadata
//pc table
always_ff @ (posedge clk) begin
if (pc_id_assigned)
pc_table[pc_id] <= if_pc;
end
//branch metadata table
always_ff @ (posedge clk) begin
if (pc_id_assigned)
branch_metadata_table[pc_id] <= branch_metadata_if;
end
//instruction table
always_ff @ (posedge clk) begin
if (fetch_complete)
instruction_table[fetch_id] <= fetch_instruction;
end
//Operand inuse determination
initial rd_to_id_table = '{default: 0};
always_ff @ (posedge clk) begin
if (instruction_issued & issue.uses_rd)//tracks most recently issued instruction that writes to the register file
rd_to_id_table[issue.rd_addr] <= issue.id;
end
////////////////////////////////////////////////////
//ID Management
//Next ID always increases, except on a fetch buffer flush.
//On a fetch buffer flush, the next ID is restored to the current decode ID.
//This prevents a stall in the case where all IDs are either in-flight or
//in the fetch buffer at the point of a fetch flush.
assign pc_id_i = pc_id + LOG2_MAX_IDS'(pc_id_assigned);
always_ff @ (posedge clk) begin
if (rst)
pc_id <= 0;
else if (gc_fetch_flush)
pc_id <= decode.valid ? decode.id : pc_id;
else
pc_id <= pc_id_i;
end
always_ff @ (posedge clk) begin
if (rst)
fetch_id <= 0;
else if (gc_fetch_flush)
fetch_id <= decode.valid ? decode.id : pc_id;
else
fetch_id <= fetch_id + LOG2_MAX_IDS'(fetch_complete);
end
////////////////////////////////////////////////////
//Fetch buffer
assign fetch_fifo.data_in = fetch_id;
assign fetch_fifo.push = fetch_complete;
assign fetch_fifo.potential_push = fetch_complete;
assign fetch_fifo.pop = decode_advance;
taiga_fifo #(.DATA_WIDTH(LOG2_MAX_IDS), .FIFO_DEPTH(MAX_IDS)) fetch_fifo_block (
.fifo(fetch_fifo),
.rst(rst | gc_fetch_flush),
.clk
);
////////////////////////////////////////////////////
//Issue Tracking
//As there are multiple completion sources, each source toggles a bit in its own LUTRAM.
//All LUTRAMs are then xor-ed together to produce the status of the ID.
//TODO: support arbitrary rst assertion (clear signal from global control)
//Instruction decoded and (issued or flushed) pair
toggle_memory decode_toggle_mem (
.clk, .rst,
.toggle(decode_advance & ~gc_fetch_flush),
.toggle_id(decode.id),
.read_id(pc_id_i),
.read_data(decoded_status)
);
toggle_memory decoded_issued_toggle_mem (
.clk, .rst,
.toggle(instruction_issued | (gc_fetch_flush & issue.stage_valid)),
.toggle_id(issue.id),
.read_id(pc_id_i),
.read_data(decoded_issued_status)
);
//Post issue status tracking
toggle_memory issued_toggle_mem (
.clk, .rst,
.toggle(instruction_issued),
.toggle_id(issue.id),
.read_id(pc_id_i),
.read_data(issued_status)
);
generate for (i = 0; i < REGFILE_READ_PORTS; i++) begin
toggle_memory issued_toggle_mem_rs (
.clk, .rst,
.toggle(instruction_issued & issue.uses_rd),
.toggle_id(issue.id),
.read_id(rs_id[i]),
.read_data(issued_status_rs[i])
);
end endgenerate
toggle_memory branch_toggle_mem (
.clk, .rst,
.toggle(branch_complete),
.toggle_id(branch_id),
.read_id(pc_id_i),
.read_data(branch_complete_status)
);
toggle_memory store_toggle_mem (
.clk, .rst,
.toggle(store_complete),
.toggle_id(store_id),
.read_id(pc_id_i),
.read_data(store_complete_status)
);
toggle_memory system_op_or_exception_complete_toggle_mem (
.clk, .rst,
.toggle(system_op_or_exception_complete),
.toggle_id(system_op_or_exception_id),
.read_id(pc_id_i),
.read_data(system_op_or_exception_complete_status)
);
generate for (i = 0; i < REGFILE_READ_PORTS; i++) begin
toggle_memory exception_complete_toggle_mem_rs (
.clk, .rst,
.toggle(exception_with_rd_complete),
.toggle_id(system_op_or_exception_id),
.read_id(rs_id[i]),
.read_data(exception_with_rd_complete_status_rs[i])
);
end endgenerate
//One memory per commit port
genvar j;
generate for (i = 0; i < COMMIT_PORTS; i++) begin
toggle_memory retired_toggle_mem (
.clk, .rst,
.toggle(retired[i]),
.toggle_id(ids_retiring[i]),
.read_id(pc_id_i),
.read_data(retired_status[i])
);
for (j = 0; j < REGFILE_READ_PORTS; j++) begin
toggle_memory retired_toggle_mem_rs (
.clk, .rst,
.toggle(retired[i]),
.toggle_id(ids_retiring[i]),
.read_id(rs_id[j]),
.read_data(retired_status_rs[j][i])
);
end
end endgenerate
//Computed one cycle in advance using pc_id_i
logic id_not_in_decode_issue;
logic id_not_inflight;
assign id_not_in_decode_issue = ~(decoded_status ^ decoded_issued_status);
assign id_not_inflight =
~(issued_status ^
branch_complete_status ^
store_complete_status ^
system_op_or_exception_complete_status ^
(^retired_status)
);
////////////////////////////////////////////////////
//Outputs
//rs1/rs2 conflicts don't check branch or store memories as the only
//IDs stored in the rs to ID table are instructions that write to the register file
always_comb begin
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin
rs_id_inuse[i] = (
issued_status_rs[i] ^
exception_with_rd_complete_status_rs[i] ^
(^retired_status_rs[i])
);
end
end
always_ff @ (posedge clk) begin
if (rst)
pc_id_available <= 1;
else
pc_id_available <= id_not_in_decode_issue & id_not_inflight;
end
localparam MCC_W = $clog2(MAX_COMPLETE_COUNT);
always_comb begin
complete_count = MCC_W'(branch_complete) + MCC_W'(store_complete) + MCC_W'(system_op_or_exception_complete);
for (int i = 0; i < COMMIT_PORTS; i++) begin
complete_count += MCC_W'(retired[i]);
end
end
always_ff @ (posedge clk) begin
retire_inc <= complete_count;
end
//Decode
assign decode.id = fetch_fifo.data_out;
assign decode.valid = fetch_fifo.valid;
assign decode.pc = pc_table[fetch_fifo.data_out];
assign decode.instruction = instruction_table[fetch_fifo.data_out];
//Branch Predictor
assign branch_metadata_ex = branch_metadata_table[branch_id];
//Issue
always_comb begin
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin
rs_id[i] = rd_to_id_table[issue.rs_addr[i]];
rs_inuse[i] = (|issue.rs_addr[i]) & (issue.rs_addr[i] == instruction_table[rs_id[i]][11:7]);//11:7 is rd_addr
end
end
//Writeback support
always_comb begin
for (int i = 0; i < COMMIT_PORTS; i++) begin
retired_rd_addr[i] = instruction_table[ids_retiring[i]][11:7];
id_for_rd[i] = rd_to_id_table[retired_rd_addr[i]];
end
end
//Exception Support
generate if (ENABLE_M_MODE) begin
assign exception_pc = pc_table[system_op_or_exception_id];
end endgenerate
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
pc_id_assigned_without_pc_id_available_assertion:
assert property (@(posedge clk) disable iff (rst) !(~pc_id_available & pc_id_assigned))
else $error("ID assigned without any ID available");
decode_advanced_without_id_assertion:
assert property (@(posedge clk) disable iff (rst) !(~decode.valid & decode_advance))
else $error("Decode advanced without ID");
endmodule

View file

@ -28,6 +28,7 @@ import l2_config_and_types::*;
interface branch_predictor_interface;
//Fetch signals
logic [31:0] if_pc;
id_t if_id;
logic new_mem_request;
logic [31:0] next_pc;
@ -35,17 +36,17 @@ interface branch_predictor_interface;
logic [31:0] branch_flush_pc;
logic [31:0] predicted_pc;
logic use_prediction;
logic [BRANCH_PREDICTOR_WAYS-1:0] update_way;
logic use_ras;
branch_predictor_metadata_t metadata;
logic is_return;
logic is_call;
logic is_branch;
modport branch_predictor (
input if_pc, new_mem_request, next_pc,
output branch_flush_pc, predicted_pc, use_prediction, update_way, use_ras, metadata
input if_pc, if_id, new_mem_request, next_pc,
output branch_flush_pc, predicted_pc, use_prediction, is_return, is_call, is_branch
);
modport fetch (
input branch_flush_pc, predicted_pc, use_prediction, update_way, use_ras, metadata,
output if_pc, new_mem_request, next_pc
input branch_flush_pc, predicted_pc, use_prediction, is_return, is_call, is_branch,
output if_pc, if_id, new_mem_request, next_pc
);
endinterface
@ -54,24 +55,43 @@ interface unit_issue_interface;
logic possible_issue;
logic new_request;
logic new_request_r;
instruction_id_t instruction_id;
id_t id;
logic ready;
modport decode (input ready, output possible_issue, new_request, new_request_r, instruction_id);
modport unit (output ready, input possible_issue, new_request, new_request_r, instruction_id);
modport decode (input ready, output possible_issue, new_request, new_request_r, id);
modport unit (output ready, input possible_issue, new_request, new_request_r, id);
endinterface
interface unit_writeback_interface;
logic ack;
id_t id;
logic done;
logic [XLEN-1:0] rd;
modport unit (
input ack,
output id, done, rd
);
modport wb (
output ack,
input id, done, rd
);
endinterface
interface ras_interface;
logic push;
logic pop;
logic branch_fetched;
logic branch_retired;
logic [31:0] new_addr;
logic [31:0] addr;
logic valid;
modport branch_unit (output push, pop, new_addr);
modport self (input push, pop, new_addr, output addr, valid);
modport fetch (input addr, valid);
modport branch_unit (output branch_retired);
modport self (input push, pop, new_addr, branch_fetched, branch_retired, output addr);
modport fetch (input addr, output pop, push, new_addr, branch_fetched);
endinterface
interface csr_exception_interface;
@ -95,67 +115,12 @@ interface exception_interface;
exception_code_t code;
logic [31:0] pc;
logic [31:0] addr;
instruction_id_t id;
id_t id;
modport econtrol (output valid, code, pc, addr, id, input ack);
modport unit (input valid, code, pc, addr, id, output ack);
endinterface
interface register_file_issue_interface;
logic[4:0] rd_addr; //if not a storing instruction required to be zero
logic[4:0] rs1_addr;
logic[XLEN-1:0] rs1_data;
logic[4:0] rs2_addr; //if not used required to be zero
logic[XLEN-1:0] rs2_data;
instruction_id_t id;
logic uses_rs1;
logic uses_rs2;
logic rs1_conflict;
logic rs2_conflict;
instruction_id_t rs2_id;
logic instruction_issued;
modport issue (output rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, input rs1_conflict, rs2_conflict, rs1_data, rs2_data, rs2_id);
modport rf (input rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, output rs1_conflict, rs2_conflict, rs1_data, rs2_data, rs2_id);
endinterface
interface register_file_writeback_interface;
logic[4:0] rd_addr;
logic retiring;
logic rd_nzero;
logic[XLEN-1:0] rd_data;
instruction_id_t id;
instruction_id_t rs1_id;
instruction_id_t rs2_id;
logic[XLEN-1:0] rs1_data;
logic[XLEN-1:0] rs2_data;
logic rs1_valid;
logic rs2_valid;
modport writeback (output rd_addr, retiring, rd_nzero, rd_data, id, rs1_data, rs2_data, rs1_valid, rs2_valid, input rs1_id, rs2_id);
modport rf (input rd_addr, retiring, rd_nzero, rd_data, id, rs1_data, rs2_data, rs1_valid, rs2_valid, output rs1_id, rs2_id);
endinterface
interface tracking_interface;
instruction_id_t issue_id;
logic id_available;
inflight_instruction_packet inflight_packet;
logic issued;
logic [WB_UNITS_WIDTH-1:0] issue_unit_id;
logic exception_possible;
modport decode (input issue_id, id_available, output inflight_packet, issued, issue_unit_id, exception_possible);
modport wb (output issue_id, id_available, input inflight_packet, issued, issue_unit_id, exception_possible);
endinterface
interface fifo_interface #(parameter DATA_WIDTH = 42);//#(parameter type data_type = logic[31:0]);
logic push;
logic pop;
@ -163,10 +128,10 @@ interface fifo_interface #(parameter DATA_WIDTH = 42);//#(parameter type data_ty
logic [DATA_WIDTH-1:0] data_out;
logic valid;
logic full;
logic supress_push;
modport enqueue (input full, output data_in, push, supress_push);
logic potential_push;
modport enqueue (input full, output data_in, push, potential_push);
modport dequeue (input valid, data_out, output pop);
modport structure(input push, pop, data_in, supress_push, output data_out, valid, full);
modport structure(input push, pop, data_in, potential_push, output data_out, valid, full);
endinterface
interface mmu_interface;
@ -218,36 +183,41 @@ interface load_store_queue_interface;
logic [3:0] be;
logic [2:0] fn3;
logic [31:0] data_in;
instruction_id_t id;
id_t id;
logic forwarded_store;
instruction_id_t data_id;
id_t data_id;
logic possible_issue;
logic new_issue;
logic ready;
instruction_id_t id_needed_by_store;
id_t id_needed_by_store;
data_access_shared_inputs_t transaction_out;
logic transaction_ready;
logic empty;
logic accepted;
modport queue (input addr, load, store, be, fn3, data_in, id, forwarded_store, data_id, possible_issue, new_issue, accepted, output ready, id_needed_by_store, transaction_out, transaction_ready);
modport ls (output addr, load, store, be, fn3, data_in, id, forwarded_store, data_id, possible_issue, new_issue, accepted, input ready, id_needed_by_store, transaction_out, transaction_ready);
modport queue (input addr, load, store, be, fn3, data_in, id, forwarded_store, data_id, possible_issue, new_issue, accepted, output ready, id_needed_by_store, transaction_out, transaction_ready, empty);
modport ls (output addr, load, store, be, fn3, data_in, id, forwarded_store, data_id, possible_issue, new_issue, accepted, input ready, id_needed_by_store, transaction_out, transaction_ready, empty);
endinterface
interface writeback_store_interface;
instruction_id_t id_needed_at_issue;
instruction_id_t id_needed_at_commit;
instruction_id_t commit_id;
logic commit;
logic [MAX_INFLIGHT_COUNT-1:0] hold_for_store_ids;
id_t id_needed;
logic possibly_waiting;
logic waiting;
logic ack;
logic forwarding_data_ready;
logic [31:0] forwarded_data;
logic id_done;
logic [31:0] data;
modport ls (input forwarding_data_ready, forwarded_data, output id_needed_at_issue, id_needed_at_commit, commit_id, commit, hold_for_store_ids);
modport wb (output forwarding_data_ready, forwarded_data, input id_needed_at_issue, id_needed_at_commit, commit_id, commit, hold_for_store_ids);
modport ls (
input id_done, data,
output id_needed, possibly_waiting ,waiting, ack
);
modport wb (
input id_needed, possibly_waiting, waiting, ack,
output id_done, data
);
endinterface
interface ls_sub_unit_interface #(parameter BASE_ADDR = 32'h00000000, parameter UPPER_BOUND = 32'hFFFFFFFF, parameter BIT_CHECK = 4);
@ -293,15 +263,3 @@ interface unsigned_division_interface #(parameter DATA_WIDTH = 32);
modport divider (output remainder, quotient, done, divisor_is_zero, input dividend, divisor, start);
endinterface
//Unit sets the ID of the instruction that will provide the data
//data_valid is high when the data is valid
interface post_issue_forwarding_interface;
instruction_id_t id;
logic [31:0] data;
logic data_valid;
modport unit (input data, data_valid, output id);
modport wb (output data, data_valid, input id);
endinterface

View file

@ -33,16 +33,13 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
input logic gc_issue_flush,
load_store_queue_interface.queue lsq,
output logic [MAX_INFLIGHT_COUNT-1:0] wb_hold_for_store_ids,
//Writeback data
input logic [31:0] writeback_data,
input logic writeback_valid
writeback_store_interface.ls wb_store,
output logic ready_for_forwarded_store
);
logic [MAX_INFLIGHT_COUNT-1:0] valid;
logic [$clog2(MAX_INFLIGHT_COUNT)-1:0] hold_for_store_ids [MAX_INFLIGHT_COUNT];
logic [$clog2(MAX_INFLIGHT_COUNT)-1:0] hold_for_store_ids_r [MAX_INFLIGHT_COUNT];
instruction_id_t oldest_id;
logic [MAX_IDS-1:0] valid;
id_t oldest_id;
typedef struct packed {
logic [31:0] addr;
@ -51,34 +48,36 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
logic [3:0] be;
logic [2:0] fn3;
logic [31:0] data_in;
instruction_id_t id;
id_t id;
logic forwarded_store;
instruction_id_t data_id;
id_t data_id;
} lsq_entry_t;
lsq_entry_t new_lsq_entry;
logic [$bits(lsq_entry_t)-1:0] lsq_entries [MAX_INFLIGHT_COUNT];
logic [$bits(lsq_entry_t)-1:0] lsq_entries [MAX_IDS];
lsq_entry_t oldest_lsq_entry;
fifo_interface #(.DATA_WIDTH($bits(instruction_id_t))) oldest_fifo ();
fifo_interface #(.DATA_WIDTH($bits(id_t))) oldest_fifo ();
////////////////////////////////////////////////////
//Implementation
//Can accept an input so long as it is a load or as long as an update from writeback for an exisiting store is not in progress
//Can always buffer new requests
assign lsq.ready = 1;
//FIFO to store ordering of IDs
taiga_fifo #(.DATA_WIDTH($bits(instruction_id_t)), .FIFO_DEPTH(MAX_INFLIGHT_COUNT)) oldest_id_fifo (
taiga_fifo #(.DATA_WIDTH($bits(id_t)), .FIFO_DEPTH(MAX_IDS)) oldest_id_fifo (
.clk, .rst(rst | gc_issue_flush),
.fifo(oldest_fifo)
);
assign oldest_fifo.data_in = lsq.id;
assign oldest_fifo.push = lsq.possible_issue;
assign oldest_fifo.supress_push = gc_fetch_flush;
assign oldest_fifo.push = lsq.new_issue;
assign oldest_fifo.potential_push = lsq.possible_issue;
assign oldest_fifo.pop = lsq.accepted;
assign oldest_id = oldest_fifo.data_out;
assign lsq.empty = ~oldest_fifo.valid;
////////////////////////////////////////////////////
//Request attributes and input data (LUTRAMs)
always_comb begin
@ -120,41 +119,46 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
// );
////////////////////////////////////////////////////
//Counters for determining if an existing ID's data is needed for a store
//As mutiple stores could need the same ID, there is a counter for each ID.
//Store Forwarding Support
//Only a single store can be forwarded at any given time
//The needed result is registered at the writeback stage when the
//needed ID is retired.
logic possible_new_forwarded_store;//To help shorten logic path for registering results in the writeback stage
logic new_forwarded_store;
logic forwarded_store_complete;
id_t needed_id_r;
logic waiting_r;
assign possible_new_forwarded_store = lsq.possible_issue & lsq.forwarded_store;
assign new_forwarded_store = lsq.new_issue & lsq.forwarded_store;
assign forwarded_store_complete = lsq.accepted & oldest_lsq_entry.forwarded_store;
always_comb begin
hold_for_store_ids = hold_for_store_ids_r;
if (new_forwarded_store)
hold_for_store_ids[lsq.data_id] = hold_for_store_ids_r[lsq.data_id] + 1;
if (forwarded_store_complete)
hold_for_store_ids[oldest_lsq_entry.data_id] = hold_for_store_ids_r[oldest_lsq_entry.data_id] - 1;
always_ff @ (posedge clk) begin
if (rst)
waiting_r <= 0;
else
waiting_r <= new_forwarded_store | (waiting_r & ~wb_store.id_done);
end
assign wb_store.waiting = new_forwarded_store | waiting_r;
assign wb_store.possibly_waiting = possible_new_forwarded_store | waiting_r;
assign wb_store.ack = forwarded_store_complete;
assign ready_for_forwarded_store = ~(waiting_r | wb_store.id_done);
always_ff @ (posedge clk) begin
if (rst | gc_issue_flush)
hold_for_store_ids_r <= '{default: 0};
else
hold_for_store_ids_r <= hold_for_store_ids;
if (new_forwarded_store)
needed_id_r <= lsq.data_id;
end
assign wb_store.id_needed = waiting_r ? needed_id_r : lsq.data_id;
always_comb begin
foreach (hold_for_store_ids_r[i])
wb_hold_for_store_ids[i] = (hold_for_store_ids_r[i] != 0);
end
////////////////////////////////////////////////////
//Output
logic [31:0] data_for_alignment;
assign oldest_lsq_entry = lsq_entries[oldest_id];
assign lsq.transaction_ready = oldest_fifo.valid & (~oldest_lsq_entry.forwarded_store | writeback_valid);
assign lsq.id_needed_by_store = oldest_lsq_entry.data_id;
assign lsq.transaction_ready = oldest_fifo.valid & (~oldest_lsq_entry.forwarded_store | wb_store.id_done);
always_comb begin
lsq.transaction_out.addr = oldest_lsq_entry.addr;
@ -164,7 +168,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
lsq.transaction_out.fn3 = oldest_lsq_entry.fn3;
lsq.transaction_out.id = oldest_id;
data_for_alignment = oldest_lsq_entry.forwarded_store ? writeback_data : oldest_lsq_entry.data_in;
data_for_alignment = oldest_lsq_entry.forwarded_store ? wb_store.data : oldest_lsq_entry.data_in;
//Input: ABCD
//Assuming aligned requests,
//Possible byte selections: (A/C/D, B/D, C/D, D)

View file

@ -48,23 +48,23 @@ module load_store_unit (
local_memory_interface.master data_bram,
//ID Management
output logic store_complete,
output id_t store_id,
//Writeback-Store Interface
writeback_store_interface.ls wb_store,
input instruction_id_t oldest_id,
output logic load_store_exception_clear,
output instruction_id_t load_store_exception_id,
input logic potential_exception,
//CSR support
input logic[31:0] csr_rd,
input instruction_id_t csr_id,
input id_t csr_id,
input logic csr_done,
output logic ls_is_idle,
output exception_packet_t ls_exception,
output logic ls_exception_valid,
input logic ls_exception_ack,
output logic ls_exception_is_store,
output unit_writeback_t wb
unit_writeback_interface.unit wb
);
localparam NUM_SUB_UNITS = USE_D_SCRATCH_MEM+USE_BUS+USE_DCACHE;
@ -108,7 +108,7 @@ module load_store_unit (
typedef struct packed{
logic [2:0] fn3;
logic [1:0] byte_addr;
instruction_id_t instruction_id;
id_t id;
logic [NUM_SUB_UNITS_W-1:0] subunit_id;
} load_attributes_t;
load_attributes_t load_attributes_in, stage2_attr;
@ -121,7 +121,7 @@ module load_store_unit (
logic [31:0] compare_addr;
logic address_conflict;
logic ready_for_forwarded_store;
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
@ -129,12 +129,7 @@ module load_store_unit (
////////////////////////////////////////////////////
//Alignment Exception
instruction_id_t exception_id;
logic exception_is_store;
generate if (ENABLE_M_MODE) begin
assign load_store_exception_clear = issue.new_request;
assign load_store_exception_id = issue.instruction_id;
always_comb begin
case(ls_inputs.fn3)
@ -144,19 +139,12 @@ generate if (ENABLE_M_MODE) begin
default : unaligned_addr = 0;
endcase
end
assign ls_exception_is_store = ls_inputs.store;
assign ls_exception.valid = unaligned_addr & issue.new_request;
assign ls_exception.code = ls_inputs.store ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
assign ls_exception.pc = ls_inputs.pc;
assign ls_exception.tval = virtual_address;
assign ls_exception.id = issue.instruction_id;
assign ls_exception.id = issue.id;
always_ff @ (posedge clk) begin
if (ls_exception.valid) begin
exception_is_store <= ls_inputs.store;
exception_id <= issue.instruction_id;
end
end
end
endgenerate
////////////////////////////////////////////////////
@ -195,26 +183,24 @@ endgenerate
assign lsq.data_in = ls_inputs.rs2;
assign lsq.load = ls_inputs.load;
assign lsq.store = ls_inputs.store;
assign lsq.id = issue.instruction_id;
assign lsq.id = issue.id;
assign lsq.forwarded_store = ls_inputs.forwarded_store;
assign lsq.data_id = ls_inputs.store_forward_id;
assign lsq.possible_issue = issue.possible_issue & ~unaligned_addr;
assign lsq.possible_issue = issue.possible_issue;
assign lsq.new_issue = issue.new_request & ~unaligned_addr;
logic [MAX_INFLIGHT_COUNT-1:0] wb_hold_for_store_ids;
load_store_queue lsq_block (.*, .writeback_valid(wb_store.forwarding_data_ready), .writeback_data(wb_store.forwarded_data));
logic [MAX_IDS-1:0] wb_hold_for_store_ids;
load_store_queue lsq_block (.*);
assign shared_inputs = lsq.transaction_out;
assign lsq.accepted = lsq.transaction_ready & ready_for_issue;
////////////////////////////////////////////////////
//Writeback-Store interface
assign wb_store.id_needed_at_issue = ls_inputs.store_forward_id;
assign wb_store.id_needed_at_commit = lsq.id_needed_by_store;
assign wb_store.commit_id = ls_exception_ack ? exception_id : lsq.transaction_out.id;
assign wb_store.commit = (lsq.accepted & lsq.transaction_out.store) | (ls_exception_ack & exception_is_store);
assign wb_store.hold_for_store_ids = wb_hold_for_store_ids;
//ID Management
assign store_complete = lsq.accepted & lsq.transaction_out.store;
assign store_id = lsq.transaction_out.id;
////////////////////////////////////////////////////
//Unit tracking
assign current_unit = sub_unit_address_match;
@ -236,12 +222,14 @@ endgenerate
////////////////////////////////////////////////////
//Primary Control Signals
assign ls_is_idle = lsq.empty & (~load_attributes.valid);
assign units_ready = &unit_ready;
assign load_complete = |unit_data_valid;
assign ready_for_issue = units_ready & (~unit_switch_stall);
assign issue.ready = lsq.ready;
assign issue.ready = ls_inputs.forwarded_store ? lsq.ready & ready_for_forwarded_store : lsq.ready;
assign issue_request = lsq.accepted;
////////////////////////////////////////////////////
@ -250,12 +238,12 @@ endgenerate
taiga_fifo #(.DATA_WIDTH($bits(load_attributes_t)), .FIFO_DEPTH(ATTRIBUTES_DEPTH)) attributes_fifo (.fifo(load_attributes), .*);
assign load_attributes_in.fn3 = shared_inputs.fn3;
assign load_attributes_in.byte_addr = shared_inputs.addr[1:0];
assign load_attributes_in.instruction_id = shared_inputs.id;
assign load_attributes_in.id = shared_inputs.id;
assign load_attributes.data_in = load_attributes_in;
assign load_attributes.push = issue_request & shared_inputs.load;
assign load_attributes.potential_push = issue_request & shared_inputs.load;
assign load_attributes.pop = load_complete;
assign load_attributes.supress_push = 0;
assign stage2_attr = load_attributes.data_out;
@ -330,8 +318,8 @@ endgenerate
////////////////////////////////////////////////////
//Output bank
assign wb.rd = csr_done ? csr_rd : final_load_data;
assign wb.done = csr_done | load_complete | (ls_exception_ack & ~exception_is_store);
assign wb.id = csr_done ? csr_id : (ls_exception_ack ? exception_id : stage2_attr.instruction_id);
assign wb.done = csr_done | load_complete;
assign wb.id = csr_done ? csr_id : stage2_attr.id;
////////////////////////////////////////////////////
//End of Implementation

View file

@ -22,17 +22,18 @@
module lut_ram #(
parameter WIDTH = 32,
parameter DEPTH = 32
parameter DEPTH = 32,
parameter READ_PORTS = 2
)
(
input logic clk,
input logic[$clog2(DEPTH)-1:0] waddr,
input logic[$clog2(DEPTH)-1:0] raddr,
input logic[$clog2(DEPTH)-1:0] raddr [READ_PORTS],
input logic ram_write,
input logic[WIDTH-1:0] new_ram_data,
output logic[WIDTH-1:0] ram_data_out
output logic[WIDTH-1:0] ram_data_out [READ_PORTS]
);
@ -44,6 +45,10 @@ module lut_ram #(
ram[waddr] <= new_ram_data;
end
assign ram_data_out = ram[raddr];
always_comb begin
for (int i = 0; i < READ_PORTS; i++) begin
ram_data_out[i] = ram[raddr[i]];
end
end
endmodule

View file

@ -30,17 +30,20 @@ module mul_unit(
input mul_inputs_t mul_inputs,
unit_issue_interface.unit issue,
output unit_writeback_t wb
unit_writeback_interface.unit wb
);
logic signed [63:0] result;
logic mulh [2];
logic done [2];
instruction_id_t id [2];
id_t id [2];
logic rs1_signed, rs2_signed;
logic signed [32:0] rs1_ext, rs2_ext;
logic signed [32:0] rs1_r, rs2_r;
logic stage1_advance;
logic stage2_advance;
////////////////////////////////////////////////////
//Implementation
assign rs1_signed = mul_inputs.op[1:0] inside {MULH_fn3[1:0], MULHSU_fn3[1:0]};//MUL doesn't matter
@ -49,26 +52,36 @@ module mul_unit(
assign rs1_ext = signed'({mul_inputs.rs1[31] & rs1_signed, mul_inputs.rs1});
assign rs2_ext = signed'({mul_inputs.rs2[31] & rs2_signed, mul_inputs.rs2});
assign issue.ready = (~done[0] | ~done[1]);//stage1_advance;
assign stage1_advance = ~done[0] | stage2_advance;
assign stage2_advance = ~done[1] | wb.ack;
//Input and output registered Multiply
always_ff @ (posedge clk) begin
rs1_r <= rs1_ext;
rs2_r <= rs2_ext;
result <= 64'(rs1_r * rs2_r);
if (stage1_advance) begin
rs1_r <= rs1_ext;
rs2_r <= rs2_ext;
end
if (stage2_advance) begin
result <= 64'(rs1_r * rs2_r);
end
end
always_ff @ (posedge clk) begin
mulh[0] <= (mul_inputs.op[1:0] != MUL_fn3[1:0]);
id[0] <= issue.instruction_id;
done[0] <= issue.new_request;
mulh[1] <= mulh[0];
id[1] <= id[0];
done[1] <= done[0];
if (stage1_advance) begin
mulh[0] <= (mul_inputs.op[1:0] != MUL_fn3[1:0]);
id[0] <= issue.id;
done[0] <= issue.new_request;
end
if (stage2_advance) begin
mulh[1] <= mulh[0];
id[1] <= id[0];
done[1] <= done[0];
end
end
//Issue/write-back handshaking
////////////////////////////////////////////////////
assign issue.ready = 1;
assign wb.rd = mulh[1] ? result[63:32] : result[31:0];
assign wb.done = done[1];
assign wb.id = id[1];

View file

@ -58,8 +58,8 @@ module one_hot_to_integer
////////////////////////////////////////////////////
//Assertions
always_ff @ (posedge clk) begin
assert (rst || (~rst && $onehot0(one_hot))) else $error("One-hot signal has multiple bits set!");
end
// always_ff @ (posedge clk) begin
// assert (rst || (~rst && $onehot0(one_hot))) else $error("One-hot signal has multiple bits set!");
//end
endmodule

View file

@ -1,186 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import riscv_types::*;
import taiga_types::*;
module pre_decode
(
input logic clk,
input logic rst,
//Fetch
input logic [31:0] pre_decode_instruction,
input logic [31:0] pre_decode_pc,
input branch_predictor_metadata_t branch_metadata,
input logic branch_prediction_used,
input logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way,
input logic pre_decode_push,
//Global Control
input logic gc_fetch_flush,
//Decode
input logic pre_decode_pop,
output logic fb_valid,
output fetch_buffer_packet_t fb
);
logic buffer_reset;
logic [6:0] opcode;
logic [4:0] opcode_trimmed;
logic [4:0] rs1_addr;
logic [4:0] rs2_addr;
logic [4:0] rd_addr;
logic [2:0] fn3;
logic csr_imm_op;
logic sys_op;
logic rs1_link, rd_link, rs1_eq_rd, use_ras;
fetch_buffer_packet_t new_data;
fetch_buffer_packet_t data_in;
fetch_buffer_packet_t data_out;
fifo_interface #(.DATA_WIDTH($bits(fetch_buffer_packet_t))) fb_fifo();
////////////////////////////////////////////////////
//Implementation
//FIFO
assign buffer_reset = rst | gc_fetch_flush;
assign fb_fifo.supress_push = 0;//Covered by reseting on gc_fetch_flush
assign fb_fifo.push = pre_decode_push & ((fb_valid & ~pre_decode_pop) | fb_fifo.valid);
assign fb_fifo.pop = pre_decode_pop & fb_fifo.valid;
assign fb_fifo.data_in = data_in;
assign data_out = fb_fifo.data_out;
//Bypass overrides
assign new_data = fb_fifo.valid ? data_out : data_in;
always_ff @ (posedge clk) begin
if (~fb_valid | pre_decode_pop)
fb <= new_data;
end
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) fb_valid_m (
.clk, .rst(buffer_reset),
.set(pre_decode_push),
.clr(pre_decode_pop & ~fb_fifo.valid),
.result(fb_valid)
);
taiga_fifo #(
.DATA_WIDTH($bits(fetch_buffer_packet_t)),
.FIFO_DEPTH(FETCH_BUFFER_DEPTH)
) fb_fifo_block (.fifo(fb_fifo), .rst(buffer_reset), .*);
////////////////////////////////////////////////////
//Pre-Decode
assign data_in.instruction = pre_decode_instruction;
assign data_in.pc = pre_decode_pc;
//Instruction components
assign fn3 = pre_decode_instruction[14:12];
assign opcode = pre_decode_instruction[6:0];
assign opcode_trimmed = opcode[6:2];
assign rs1_addr = pre_decode_instruction[19:15];
assign rs2_addr = pre_decode_instruction[24:20];
assign rd_addr = pre_decode_instruction[11:7];
assign csr_imm_op = (opcode_trimmed == SYSTEM_T) && fn3[2];
assign sys_op = (opcode_trimmed == SYSTEM_T) && (fn3 == 0);
////////////////////////////////////////////////////
//RAS Support
assign rs1_link = (rs1_addr inside {1,5});
assign rd_link = (rd_addr inside {1,5});
assign rs1_eq_rd = (rs1_addr == rd_addr);
assign use_ras = (opcode_trimmed == JALR_T) && ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd));
assign data_in.is_return = use_ras;
assign data_in.is_call = (opcode_trimmed inside {JAL_T, JALR_T}) && rd_link;
////////////////////////////////////////////////////
//Register File Support
assign data_in.uses_rs1 = !(opcode_trimmed inside {LUI_T, AUIPC_T, JAL_T, FENCE_T} || csr_imm_op || sys_op);
assign data_in.uses_rs2 = opcode_trimmed inside {BRANCH_T, STORE_T, ARITH_T, AMO_T};
assign data_in.uses_rd = !(opcode_trimmed inside {BRANCH_T, STORE_T, FENCE_T} || sys_op);
////////////////////////////////////////////////////
//Branch Predictor support
assign data_in.branch_metadata = branch_metadata;
assign data_in.branch_prediction_used = branch_prediction_used;
assign data_in.bp_update_way = bp_update_way;
////////////////////////////////////////////////////
//ALU Control Signals
//Add cases: JAL, JALR, LUI, AUIPC, ADD[I], all logic ops
//sub cases: SUB, SLT[U][I]
logic sub_instruction;
assign sub_instruction = (fn3 == ADD_SUB_fn3) && pre_decode_instruction[30] && opcode[5];//If ARITH instruction
assign data_in.alu_sub = ~opcode[2] & (fn3 inside {SLTU_fn3, SLT_fn3} || sub_instruction);//opcode[2] covers LUI,AUIPC,JAL,JALR
always_comb begin
case (fn3)
SLT_fn3 : data_in.alu_logic_op = ALU_LOGIC_ADD;
SLTU_fn3 : data_in.alu_logic_op = ALU_LOGIC_ADD;
SLL_fn3 : data_in.alu_logic_op = ALU_LOGIC_ADD;
XOR_fn3 : data_in.alu_logic_op = ALU_LOGIC_XOR;
OR_fn3 : data_in.alu_logic_op = ALU_LOGIC_OR;
AND_fn3 : data_in.alu_logic_op = ALU_LOGIC_AND;
SRA_fn3 : data_in.alu_logic_op = ALU_LOGIC_ADD;
ADD_SUB_fn3 : data_in.alu_logic_op = ALU_LOGIC_ADD;
endcase
//put LUI, AUIPC, JAL and JALR through adder path
data_in.alu_logic_op = opcode[2] ? ALU_LOGIC_ADD : data_in.alu_logic_op;
end
logic non_mul_div_arith_op;
assign non_mul_div_arith_op = ((opcode_trimmed == ARITH_T) && ~pre_decode_instruction[25]);//pre_decode_instruction[25] denotes multiply/divide instructions
assign data_in.alu_request = non_mul_div_arith_op || (opcode_trimmed inside {ARITH_IMM_T, AUIPC_T, LUI_T, JAL_T, JALR_T});
always_comb begin
if (opcode_trimmed inside {ARITH_T, ARITH_IMM_T})
data_in.alu_rs1_sel = ALU_RS1_RF;
else if (opcode_trimmed inside {JAL_T, JALR_T, AUIPC_T})//AUIPC JAL JALR
data_in.alu_rs1_sel = ALU_RS1_PC;
else
data_in.alu_rs1_sel = ALU_RS1_ZERO;//LUI
end
always_comb begin
if (opcode_trimmed inside {LUI_T, AUIPC_T}) //LUI or AUIPC
data_in.alu_rs2_sel = ALU_RS2_LUI_AUIPC;
else if (opcode_trimmed == ARITH_IMM_T) //ARITH_IMM
data_in.alu_rs2_sel = ALU_RS2_ARITH_IMM;
else if (opcode_trimmed inside {JAL_T, JALR_T} ) //JAL JALR
data_in.alu_rs2_sel = ALU_RS2_JAL_JALR;
else
data_in.alu_rs2_sel = ALU_RS2_RF;
end
////////////////////////////////////////////////////
//Assertions
endmodule

View file

@ -27,47 +27,44 @@ import taiga_types::*;
module ras (
input logic clk,
input logic rst,
input logic gc_fetch_flush,
ras_interface.self ras
);
(* ramstyle = "MLAB, no_rw_check" *) logic[31:0] lut_ram [RAS_DEPTH-1:0];
(* ramstyle = "MLAB, no_rw_check" *) logic[31:0] lut_ram [RAS_DEPTH];
localparam RAS_DEPTH_W = $clog2(RAS_DEPTH);
logic[RAS_DEPTH_W-1:0] read_index;
logic[RAS_DEPTH_W-1:0] write_index;
(* ramstyle = "MLAB, no_rw_check" *) logic valid_chain [RAS_DEPTH-1:0];
logic valid_chain_update;
logic [RAS_DEPTH_W-1:0] read_index;
logic [RAS_DEPTH_W-1:0] new_index;
fifo_interface #(.DATA_WIDTH(RAS_DEPTH_W)) ri_fifo();
///////////////////////////////////////////////////////
//For simulation purposes
initial lut_ram = '{default: 0};
initial valid_chain = '{default: 0};
///////////////////////////////////////////////////////
assign ras.addr = lut_ram[read_index];
assign ras.valid = valid_chain[read_index];
//On a speculative branch, save the current stack pointer
//Restored if branch is misspredicted (gc_fetch_flush)
taiga_fifo #(.DATA_WIDTH(RAS_DEPTH_W), .FIFO_DEPTH(MAX_IDS))
read_index_fifo (.clk, .rst(rst | gc_fetch_flush), .fifo(ri_fifo));
assign ri_fifo.data_in = read_index;
assign ri_fifo.push = ras.branch_fetched;
assign ri_fifo.potential_push = ras.branch_fetched;
assign ri_fifo.pop = ras.branch_retired;
always_ff @ (posedge clk) begin
if (ras.push)
lut_ram[write_index] <= ras.new_addr;
lut_ram[new_index] <= ras.new_addr;
end
//Rolls over when full, most recent calls will be correct, but calls greater than depth
//will be lost.
logic [RAS_DEPTH_W-1:0] new_index_base;
assign new_index_base = (gc_fetch_flush & ri_fifo.valid) ? ri_fifo.data_out : read_index;
assign new_index = new_index_base + RAS_DEPTH_W'(ras.push) - RAS_DEPTH_W'(ras.pop);
always_ff @ (posedge clk) begin
if (rst)
read_index <= 0;
else if (ras.push & ~ras.pop)
read_index <= write_index;
else if (ras.pop & ~ras.push)
read_index <= read_index - 1;
read_index <= new_index;
end
assign write_index = (ras.push & ~ras.pop) ? (read_index + RAS_DEPTH_W'(valid_chain[read_index])) : read_index;
assign valid_chain_update = ras.push | ras.pop;
always_ff @ (posedge clk) begin
if (valid_chain_update)
valid_chain[write_index] <= ras.push;
end
endmodule
endmodule

91
core/regfile_bank_sel.sv Normal file
View file

@ -0,0 +1,91 @@
/*
* Copyright © 2020 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module regfile_bank_sel
import taiga_config::*;
import riscv_types::*;
import taiga_types::*;
(
input logic clk,
input logic rst,
//Register file
input rs_addr_t [REGFILE_READ_PORTS-1:0] rs_addr,
output logic [LOG2_COMMIT_PORTS-1:0] rs_sel [REGFILE_READ_PORTS],
//Writeback
input logic[4:0] rd_addr [COMMIT_PORTS],
input rd_retired [COMMIT_PORTS]
);
//////////////////////////////////////////
typedef logic [LOG2_COMMIT_PORTS-1:0] sel_bank_t [32] ;
sel_bank_t sel_bank [COMMIT_PORTS];
logic [LOG2_COMMIT_PORTS-1:0] new_bank_sel [COMMIT_PORTS];
genvar i;
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
//New Entry Determination
//New entry is the current port index XORed with the content of all other write ports
//existing memory contents
always_comb begin
for (int i = 0; i < COMMIT_PORTS; i++) begin
new_bank_sel[i] = LOG2_COMMIT_PORTS'(i);
for (int j = 0; j < COMMIT_PORTS; j++) begin
if (j != i) new_bank_sel[i] ^= sel_bank[j][rd_addr[i]];
end
end
end
////////////////////////////////////////////////////
//Memory Blocks
generate for (i = 0; i < COMMIT_PORTS; i++) begin
initial sel_bank[i] = '{default: 0};
always_ff @ (posedge clk) begin
if (rd_retired[i])
sel_bank[i][rd_addr[i]] <= new_bank_sel[i];
end
end endgenerate
////////////////////////////////////////////////////
//Outputs
always_comb begin
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin
rs_sel[i] = 0;
for (int j = 0; j < COMMIT_PORTS; j++) begin
rs_sel[i] ^= sel_bank[j][rs_addr[i]];
end
end
end
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
endmodule

115
core/register_file.sv Executable file → Normal file
View file

@ -20,108 +20,61 @@
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import riscv_types::*;
import taiga_types::*;
module register_file(
module register_file
import taiga_config::*;
import riscv_types::*;
import taiga_types::*;
#(
parameter NUM_READ_PORTS = 2
)
(
input logic clk,
input logic rst,
input logic gc_supress_writeback,
input logic instruction_issued,
register_file_writeback_interface.rf wb,
register_file_issue_interface.rf issue,
//Writeback
input logic [4:0] rd_addr,
input logic [31:0] new_data,
input logic commit,
//Trace signals
output logic tr_rs1_forwarding_needed,
output logic tr_rs2_forwarding_needed,
output logic tr_rs1_and_rs2_forwarding_needed
);
//Issue
input rs_addr_t [NUM_READ_PORTS-1:0] read_addr,
output logic [31:0] data [NUM_READ_PORTS]
);
(* ramstyle = "MLAB, no_rw_check" *) logic [XLEN-1:0] register [32];
(* ramstyle = "MLAB, no_rw_check" *) instruction_id_t in_use_by [32];
logic [31:0] register_file [32];
////////////////////////////////////////////////////
//Implementation
logic rs1_inuse;
logic rs2_inuse;
logic rs1_feedforward;
logic rs2_feedforward;
logic valid_write;
logic in_use_match;
//////////////////////////////////////////
////////////////////////////////////////////////////
//Register File
//Assign zero to r0 and initialize all registers to zero
initial register = '{default: 0};
initial in_use_by = '{default: 0};
//Writeback unit does not assert wb.commit when the target register is r0
initial register_file = '{default: 0};
always_ff @ (posedge clk) begin
if (~gc_supress_writeback & valid_write)
register[wb.rd_addr] <= wb.rd_data;
if (commit)
register_file[rd_addr] <= new_data;
end
assign in_use_match = (wb.id == in_use_by[wb.rd_addr]) && valid_write;
reg_inuse inuse (.*,
.clr(1'b0),
.rs1_addr(issue.rs1_addr),.rs2_addr(issue.rs2_addr), .issued_rd_addr(issue.rd_addr),
.retired_rd_addr(wb.rd_addr),
.issued(issue.instruction_issued),
.retired(in_use_match),
.rs1_inuse(rs1_inuse),
.rs2_inuse(rs2_inuse)
);
always_ff @ (posedge clk) begin
if (issue.instruction_issued)
in_use_by[issue.rd_addr] <= issue.id;
always_comb begin
foreach(read_addr[i])
data[i] = register_file[read_addr[i]];
end
assign wb.rs1_id = in_use_by[issue.rs1_addr];
assign wb.rs2_id = in_use_by[issue.rs2_addr];
assign issue.rs2_id = wb.rs2_id;
assign valid_write = wb.rd_nzero & wb.retiring;
assign rs1_feedforward = rs1_inuse;
assign rs2_feedforward = rs2_inuse;
assign issue.rs1_data = rs1_feedforward ? wb.rs1_data : register[issue.rs1_addr];
assign issue.rs2_data = rs2_feedforward ? wb.rs2_data : register[issue.rs2_addr];
assign issue.rs1_conflict = issue.uses_rs1 & rs1_inuse & ~wb.rs1_valid;
assign issue.rs2_conflict = issue.uses_rs2 & rs2_inuse & ~wb.rs2_valid;
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
always_ff @ (posedge clk) begin
assert (!(issue.instruction_issued && issue.rd_addr == 0)) else $error("Write to inuse for register x0 occured!");
end
write_to_zero_reg_assertion:
assert property (@(posedge clk) disable iff (rst) !(commit & rd_addr == 0))
else $error("Write to zero reg occured!");
////////////////////////////////////////////////////
//Simulation Only
// synthesis translate_off
//synthesis translate_off
logic [31:0][31:0] sim_registers_unamed;
simulation_named_regfile sim_register;
always_comb begin
foreach(register[i])
sim_registers_unamed[i] = register[i];
foreach(register_file[i])
sim_registers_unamed[i] = register_file[i];
sim_register = sim_registers_unamed;
end
// synthesis translate_on
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin
assign tr_rs1_forwarding_needed = instruction_issued & rs1_inuse & issue.uses_rs1 & ~tr_rs1_and_rs2_forwarding_needed;
assign tr_rs2_forwarding_needed = instruction_issued & rs2_inuse & issue.uses_rs2 & ~tr_rs1_and_rs2_forwarding_needed;
assign tr_rs1_and_rs2_forwarding_needed = instruction_issued & (rs1_inuse & issue.uses_rs1) & (rs2_inuse & issue.uses_rs2);
end
endgenerate
//synthesis translate_on
endmodule

View file

@ -0,0 +1,203 @@
/*
* Copyright © 2020 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module register_file_and_writeback
import taiga_config::*;
import riscv_types::*;
import taiga_types::*;
(
input logic clk,
input logic rst,
//Issue interface
input issue_packet_t issue,
input logic alu_issued,
output logic [31:0] rs_data [REGFILE_READ_PORTS],
//ID Metadata
output id_t ids_retiring [COMMIT_PORTS],
output logic retired [COMMIT_PORTS],
input logic [4:0] retired_rd_addr [COMMIT_PORTS],
input id_t id_for_rd [COMMIT_PORTS],
//Writeback
unit_writeback_interface.wb unit_wb[NUM_WB_UNITS],
writeback_store_interface.wb wb_store,
//Trace signals
output logic tr_rs1_forwarding_needed,
output logic tr_rs2_forwarding_needed,
output logic tr_rs1_and_rs2_forwarding_needed
);
//Register File
typedef logic [XLEN-1:0] register_file_t [32];
register_file_t register_file [COMMIT_PORTS];
logic [LOG2_COMMIT_PORTS-1:0] rs_sel [REGFILE_READ_PORTS];
//Writeback
logic alu_selected;
logic unit_ack [NUM_WB_UNITS];
//aliases for write-back-interface signals
id_t unit_instruction_id [NUM_WB_UNITS];
logic unit_done [NUM_WB_UNITS];
logic [XLEN-1:0] unit_rd [NUM_WB_UNITS];
//Per-ID muxes for commit buffer
logic [$clog2(NUM_WB_UNITS)-1:0] retiring_unit_select [COMMIT_PORTS];
logic [31:0] retiring_data [COMMIT_PORTS];
typedef logic [31:0] rs_data_set_t [REGFILE_READ_PORTS];
rs_data_set_t rs_data_set [COMMIT_PORTS];
genvar i;
////////////////////////////////////////////////////
//Implementation
//Re-assigning interface inputs to array types so that they can be dynamically indexed
generate for (i=0; i< NUM_WB_UNITS; i++) begin : wb_interfaces_to_arrays_g
assign unit_instruction_id[i] = unit_wb[i].id;
assign unit_done[i] = unit_wb[i].done;
assign unit_rd[i] = unit_wb[i].rd;
assign unit_wb[i].ack = unit_ack[i];
end endgenerate
////////////////////////////////////////////////////
//Unit select for register file
//Iterating through all commit ports:
// Search for complete units (in fixed unit order)
// Assign to a commit port, mask that unit and commit port
always_comb begin
unit_ack = '{default: 0};
retired = '{default: 0};
for (int i = 0; i < COMMIT_PORTS; i++) begin
retiring_unit_select[i] = WB_UNITS_WIDTH'(i);
for (int j = i; j < NUM_WB_UNITS; j++) begin //Unit index i will always be handled by commit port i or lower, so can be skipped when checking higher commit port indicies
if (unit_done[j] & ~unit_ack[j] & ~retired[i]) begin
retiring_unit_select[i] = WB_UNITS_WIDTH'(j);
unit_ack[j] = 1;
retired[i] = 1;
end
end
//ID and data muxes
ids_retiring[i] = unit_instruction_id[retiring_unit_select[i]];
retiring_data[i] = unit_rd[retiring_unit_select[i]];
end
//Late cycle abort for when ALU is not issued to
alu_selected = (retiring_unit_select[0] == ALU_UNIT_WB_ID);
if (alu_selected) retired[0] &= alu_issued;
end
////////////////////////////////////////////////////
//Register Files
//Implemented in seperate module as there is not universal tool support for inferring
//arrays of memory blocks.
generate for (i = 0; i < COMMIT_PORTS; i++) begin
register_file #(.NUM_READ_PORTS(REGFILE_READ_PORTS)) register_file_blocks (
.clk, .rst,
.rd_addr(retired_rd_addr[i]),
.new_data(retiring_data[i]),
.commit(update_lvt[i] & (|retired_rd_addr[i])),
.read_addr(issue.rs_addr),
.data(rs_data_set[i])
);
end endgenerate
////////////////////////////////////////////////////
//Register File LVT
//Only update lvt if the retiring instrucion is the most recently issued
//write to a given register. This check allows multiple oustanding writes
//to the same register. As instructions can complete out-of-order, only
//the most recently issued write to any given register will be committed
logic update_lvt [COMMIT_PORTS];
always_comb begin
update_lvt[0] = retired[0] & (alu_selected ? alu_issued : (id_for_rd[0] == ids_retiring[0]));
for (int i = 1; i < COMMIT_PORTS; i++)
update_lvt[i] = retired[i] & (id_for_rd[i] == ids_retiring[i]) & ~(alu_selected & retired[0] & retired_rd_addr[0] == retired_rd_addr[i]);
end
regfile_bank_sel regfile_lvt (
.clk, .rst,
.rs_addr(issue.rs_addr),
.rs_sel,
.rd_addr(retired_rd_addr),
.rd_retired(update_lvt)
);
////////////////////////////////////////////////////
//Register File Muxing
always_comb begin
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin
rs_data[i] = rs_data_set[rs_sel[i]][i];
end
end
////////////////////////////////////////////////////
//Store Forwarding Support
logic [31:0] commit_regs [COMMIT_PORTS];
logic [$clog2(COMMIT_PORTS)-1:0] store_reg_sel;
logic [$clog2(COMMIT_PORTS)-1:0] store_reg_sel_r;
generate for (i = 0; i < COMMIT_PORTS; i++) begin
always_ff @ (posedge clk) begin
if (wb_store.possibly_waiting & retired[i] & (wb_store.id_needed == ids_retiring[i]))
commit_regs[i] <= retiring_data[i];
end
end endgenerate
logic [COMMIT_PORTS-1:0] store_id_match;
always_comb begin
store_id_match = 0;
for (int i = 0; i < COMMIT_PORTS; i++) begin
if (wb_store.waiting & retired[i] & (wb_store.id_needed == ids_retiring[i]))
store_id_match[i] = 1;
end
store_reg_sel = 0;
for (int i = 1; i < COMMIT_PORTS; i++) begin
if (retired[i] & (wb_store.id_needed == ids_retiring[i]))
store_reg_sel = ($clog2(COMMIT_PORTS))'(i);
end
end
always_ff @ (posedge clk) begin
if (|store_id_match)
store_reg_sel_r <= store_reg_sel;
end
always_ff @ (posedge clk) begin
if (rst | wb_store.ack)
wb_store.id_done <= 0;
else if (|store_id_match)
wb_store.id_done <= 1;
end
assign wb_store.data = commit_regs[store_reg_sel_r];
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
endmodule

View file

@ -27,6 +27,8 @@ package riscv_types;
parameter PAGE_ADDR_W = 12;
parameter ECODE_W = 5;
typedef logic [4:0] rs_addr_t;
typedef struct packed {
logic [6:0] fn7;
logic [4:0] rs2_addr;

View file

@ -57,7 +57,9 @@ module taiga (
ras_interface ras();
register_file_issue_interface rf_issue();
issue_packet_t issue;
logic [31:0] rs_data [REGFILE_READ_PORTS];
alu_inputs_t alu_inputs;
load_store_inputs_t ls_inputs;
@ -66,14 +68,13 @@ module taiga (
div_inputs_t div_inputs;
gc_inputs_t gc_inputs;
unit_issue_interface unit_issue [NUM_UNITS-1:0]();
unit_issue_interface unit_issue [NUM_UNITS]();
logic alu_issued;
exception_packet_t ls_exception;
logic ls_exception_valid;
logic ls_exception_is_store;
tracking_interface ti();
unit_writeback_t unit_wb [NUM_WB_UNITS-1:0];
register_file_writeback_interface rf_wb();
unit_writeback_interface unit_wb [NUM_WB_UNITS]();
mmu_interface immu();
mmu_interface dmmu();
@ -83,16 +84,39 @@ module taiga (
logic tlb_on;
logic [ASIDLEN-1:0] asid;
//Pre-Decode
logic pre_decode_push;
logic pre_decode_pop;
logic [31:0] pre_decode_instruction;
logic [31:0] pre_decode_pc;
branch_predictor_metadata_t branch_metadata;
logic branch_prediction_used;
logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way;
logic fb_valid;
fetch_buffer_packet_t fb;
//Instruction ID/Metadata
//ID issuing
id_t pc_id;
logic pc_id_available;
logic pc_id_assigned;
logic [31:0] if_pc;
//Fetch stage
id_t fetch_id;
logic fetch_complete;
logic [31:0] fetch_instruction;
//Decode stage
logic decode_advance;
decode_packet_t decode;
//Issue stage
id_t rs_id [REGFILE_READ_PORTS];
logic rs_inuse [REGFILE_READ_PORTS];
logic rs_id_inuse [REGFILE_READ_PORTS];
//Branch predictor
branch_metadata_t branch_metadata_if;
branch_metadata_t branch_metadata_ex;
//ID freeing
logic store_complete;
id_t store_id;
logic branch_complete;
id_t branch_id;
logic system_op_or_exception_complete;
logic exception_with_rd_complete;
id_t system_op_or_exception_id;
logic instruction_retired;
logic [$clog2(MAX_COMPLETE_COUNT)-1:0] retire_inc;
//Exception
id_t exception_id;
logic [31:0] exception_pc;
//Global Control
logic gc_issue_hold;
@ -100,31 +124,26 @@ module taiga (
logic gc_fetch_flush;
logic gc_fetch_pc_override;
logic gc_supress_writeback;
instruction_id_t oldest_id;
logic load_store_issue;
logic [31:0] gc_fetch_pc;
logic ls_exception_ack;
logic[31:0] csr_rd;
instruction_id_t csr_id;
id_t csr_id;
logic csr_done;
logic ls_is_idle;
//Decode Unit and Fetch Unit
logic illegal_instruction;
logic instruction_queue_empty;
logic instruction_issued;
logic instruction_issued_no_rd;
logic instruction_issued_with_rd;
logic instruction_complete;
logic gc_flush_required;
//LS
writeback_store_interface wb_store();
logic load_store_exception_clear;
instruction_id_t load_store_exception_id;
logic potential_exception;
//WB
id_t ids_retiring [COMMIT_PORTS];
logic retired [COMMIT_PORTS];
logic [4:0] retired_rd_addr [COMMIT_PORTS];
id_t id_for_rd [COMMIT_PORTS];
//Trace Interface Signals
logic tr_operand_stall;
@ -159,8 +178,8 @@ module taiga (
logic tr_rs1_and_rs2_forwarding_needed;
unit_id_t tr_num_instructions_completing;
instruction_id_t tr_num_instructions_in_flight;
instruction_id_t tr_num_of_instructions_pending_writeback;
id_t tr_num_instructions_in_flight;
id_t tr_num_of_instructions_pending_writeback;
////////////////////////////////////////////////////
//Implementation
@ -172,7 +191,11 @@ module taiga (
endgenerate
////////////////////////////////////////////////////
// Fetch and Pre-Decode
// ID support
instruction_metadata_and_id_management id_block (.*);
////////////////////////////////////////////////////
// Fetch
fetch fetch_block (.*, .icache_on('1), .tlb(itlb), .l1_request(l1_request[L1_ICACHE_ID]), .l1_response(l1_response[L1_ICACHE_ID]), .exception(1'b0));
branch_predictor bp_block (.*);
ras ras_block(.*);
@ -185,12 +208,14 @@ module taiga (
assign itlb.physical_address = itlb.virtual_address;
end
endgenerate
pre_decode pre_decode_block(.*);
////////////////////////////////////////////////////
//Decode/Issue
decode_and_issue decode_and_issue_block (.*);
register_file register_file_block (.*, .issue(rf_issue), .wb(rf_wb));
////////////////////////////////////////////////////
//Register File and Writeback
register_file_and_writeback register_file_and_writeback_block (.*);
////////////////////////////////////////////////////
//Execution Units
@ -215,11 +240,6 @@ module taiga (
div_unit div_unit_block (.*, .issue(unit_issue[DIV_UNIT_WB_ID]), .wb(unit_wb[DIV_UNIT_WB_ID]));
endgenerate
////////////////////////////////////////////////////
//Writeback Mux and Instruction Tracking
write_back write_back_mux (.*);
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////

View file

@ -29,7 +29,7 @@ package taiga_config;
//Privileged ISA Options
//Enable Machine level privilege spec
parameter ENABLE_M_MODE = 0;
parameter ENABLE_M_MODE = 1;
//Enable Supervisor level privilege spec
parameter ENABLE_S_MODE = 0;
//Enable User level privilege spec
@ -51,19 +51,8 @@ package taiga_config;
//Division algorithm selection
typedef enum {
RADIX_2,
RADIX_2_EARLY_TERMINATE,
RADIX_2_EARLY_TERMINATE_FULL,
RADIX_4,
RADIX_4_EARLY_TERMINATE,
RADIX_4_EARLY_TERMINATE_FULL,
RADIX_8,
RADIX_8_EARLY_TERMINATE,
RADIX_16,
QUICK_NAIVE,
QUICK_CLZ,
QUICK_CLZ_MK2,
QUICK_RADIX_4
RADIX_2,//Smallest
QUICK_CLZ//Highest performance and best performance per LUT
} div_type;
parameter div_type DIV_ALGORITHM = QUICK_CLZ;
@ -158,15 +147,23 @@ package taiga_config;
//Branch Predictor Options
parameter USE_BRANCH_PREDICTOR = 1;
parameter BRANCH_PREDICTOR_WAYS = 2;
parameter BRANCH_TABLE_ENTRIES = 512;
parameter BRANCH_TABLE_ENTRIES = 512; //min 512
parameter RAS_DEPTH = 8;
////////////////////////////////////////////////////
//FIFO/Buffer Depths
//All parameters restricted to powers of two
parameter MAX_INFLIGHT_COUNT = 4;
parameter FETCH_BUFFER_DEPTH = 4;
//ID limit
//MAX_IDS restricted to a power of 2
parameter MAX_IDS = 8; //8 sufficient for rv32im configs
////////////////////////////////////////////////////
//Number of commit ports
parameter COMMIT_PORTS = 2; //min 2
parameter REGFILE_READ_PORTS = 2; //min 2, for RS1 and RS2
typedef enum logic {
RS1 = 0,
RS2 = 1
} rs1_index_t;
////////////////////////////////////////////////////
//Trace Options
@ -184,13 +181,13 @@ package taiga_config;
////////////////////////////////////////////////////
//Write-Back Unit IDs
parameter NUM_WB_UNITS = 2 + USE_MUL + USE_DIV;
parameter NUM_UNITS = NUM_WB_UNITS + 2;
parameter NUM_WB_UNITS = 2 + USE_MUL + USE_DIV;//ALU and LS
parameter NUM_UNITS = NUM_WB_UNITS + 2;//Branch and CSRs
parameter ALU_UNIT_WB_ID = 0;
parameter LS_UNIT_WB_ID = 1;
parameter DIV_UNIT_WB_ID = LS_UNIT_WB_ID + USE_DIV;
parameter MUL_UNIT_WB_ID = DIV_UNIT_WB_ID + USE_MUL;
parameter MUL_UNIT_WB_ID = DIV_UNIT_WB_ID + 1;
//Non-writeback units
parameter BRANCH_UNIT_ID = MUL_UNIT_WB_ID + 1;
parameter GC_UNIT_ID = BRANCH_UNIT_ID + 1;

View file

@ -41,16 +41,14 @@ module taiga_fifo #(parameter DATA_WIDTH = 70, parameter FIFO_DEPTH = 4)
logic [LOG2_FIFO_DEPTH-1:0] write_index;
logic [LOG2_FIFO_DEPTH-1:0] read_index;
logic [LOG2_FIFO_DEPTH:0] inflight_count;
logic supressed_push;
////////////////////////////////////////////////////
//Implementation
assign supressed_push = fifo.push & ~fifo.supress_push;
generate if (FIFO_DEPTH == 1) begin
always_ff @ (posedge clk) begin
if (rst)
fifo.valid <= 0;
else if (supressed_push)
else if (fifo.push)
fifo.valid <= 1;
else if (fifo.pop)
fifo.valid <= 0;
@ -58,7 +56,7 @@ module taiga_fifo #(parameter DATA_WIDTH = 70, parameter FIFO_DEPTH = 4)
assign fifo.full = fifo.valid;
always_ff @ (posedge clk) begin
if (fifo.push)
if (fifo.potential_push)
fifo.data_out <= fifo.data_in;
end
end
@ -69,7 +67,7 @@ module taiga_fifo #(parameter DATA_WIDTH = 70, parameter FIFO_DEPTH = 4)
if (rst)
inflight_count <= 0;
else
inflight_count <= inflight_count + (LOG2_FIFO_DEPTH+1)'(fifo.pop) - (LOG2_FIFO_DEPTH+1)'(supressed_push);
inflight_count <= inflight_count + (LOG2_FIFO_DEPTH+1)'(fifo.pop) - (LOG2_FIFO_DEPTH+1)'(fifo.push);
end
assign fifo.valid = inflight_count[LOG2_FIFO_DEPTH];
@ -82,12 +80,12 @@ module taiga_fifo #(parameter DATA_WIDTH = 70, parameter FIFO_DEPTH = 4)
end
else begin
read_index <= read_index + LOG2_FIFO_DEPTH'(fifo.pop);
write_index <= write_index + LOG2_FIFO_DEPTH'(supressed_push);
write_index <= write_index + LOG2_FIFO_DEPTH'(fifo.push);
end
end
always_ff @ (posedge clk) begin
if (fifo.push)
if (fifo.potential_push)
lut_ram[write_index] <= fifo.data_in;
end
assign fifo.data_out = lut_ram[read_index];
@ -96,10 +94,8 @@ module taiga_fifo #(parameter DATA_WIDTH = 70, parameter FIFO_DEPTH = 4)
////////////////////////////////////////////////////
//Assertions
always_ff @ (posedge clk) begin
assert (!(~rst & fifo.full & supressed_push & ~fifo.pop)) else $error("overflow");
//assert (!(~rst & ~fifo.valid & ~supressed_push & fifo.pop)) else $error("underflow");
end
fifo_overflow_assertion:
assert property (@(posedge clk) disable iff (rst) !(fifo.full & fifo.push & ~fifo.pop)) else $error("overflow");
fifo_underflow_assertion:
assert property (@(posedge clk) disable iff (rst) !(~fifo.valid & fifo.pop)) else $error("underflow");
endmodule

View file

@ -24,10 +24,12 @@ package taiga_types;
import taiga_config::*;
import riscv_types::*;
localparam ID_W = $clog2(MAX_INFLIGHT_COUNT);
localparam WB_UNITS_WIDTH = $clog2(NUM_WB_UNITS);
localparam MAX_COMPLETE_COUNT = 3 + COMMIT_PORTS; //Branch + Store + System + COMMIT_PORTS
typedef logic[ID_W-1:0] instruction_id_t;
localparam WB_UNITS_WIDTH = $clog2(NUM_WB_UNITS);
localparam LOG2_COMMIT_PORTS = $clog2(COMMIT_PORTS);
typedef logic[$clog2(MAX_IDS)-1:0] id_t;
typedef logic[WB_UNITS_WIDTH-1:0] unit_id_t;
typedef logic[1:0] branch_predictor_metadata_t;
@ -61,39 +63,38 @@ package taiga_types;
typedef struct packed{
logic valid;
exception_code_t code;
logic [31:0] pc;
logic [31:0] tval;
instruction_id_t id;
id_t id;
} exception_packet_t;
typedef struct packed{
logic [4:0] rd_addr;
logic is_store;
} inflight_instruction_packet;
branch_predictor_metadata_t branch_predictor_metadata;
logic branch_prediction_used;
logic [BRANCH_PREDICTOR_WAYS-1:0] branch_predictor_update_way;
} branch_metadata_t;
typedef struct packed{
logic [31:0] instruction;
id_t id;
logic [31:0] pc;
logic [31:0] instruction;
logic valid;
} decode_packet_t;
typedef struct packed{
logic [31:0] pc;
logic [31:0] instruction;
logic [2:0] fn3;
logic [6:0] opcode;
rs_addr_t [REGFILE_READ_PORTS-1:0] rs_addr;//packed style instead of unpacked due to tool limitations
logic [4:0] rd_addr;
logic uses_rs1;
logic uses_rs2;
logic uses_rd;
logic is_call;
logic is_return;
branch_predictor_metadata_t branch_metadata;
logic branch_prediction_used;
logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way;
logic alu_sub;
logic [1:0] alu_logic_op;
logic alu_request;
alu_rs1_op_t alu_rs1_sel;
alu_rs2_op_t alu_rs2_sel;
} fetch_buffer_packet_t;
typedef struct packed{
instruction_id_t id;
logic done;
logic [XLEN-1:0] rd;
} unit_writeback_t;
id_t id;
logic stage_valid;
} issue_packet_t;
typedef struct packed{
logic [XLEN:0] in1;//contains sign padding bit for slt operation
@ -103,8 +104,7 @@ package taiga_types;
logic subtract;
logic arith;//contains sign padding bit for arithmetic shift right operation
logic lshift;
logic [1:0] logic_op;
logic [1:0] op;
alu_logic_op_t logic_op;
logic shifter_path;
logic slt_path;
} alu_inputs_t;
@ -113,29 +113,24 @@ package taiga_types;
logic [XLEN-1:0] rs1;
logic [XLEN-1:0] rs2;
logic [2:0] fn3;
logic [31:0] dec_pc;
logic dec_pc_valid;
logic [31:0] issue_pc;
logic issue_pc_valid;
logic use_signed;
logic jal;
logic jalr;
logic is_call;
logic is_return;
logic [31:0] instruction;
branch_predictor_metadata_t branch_metadata;
logic branch_prediction_used;
logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way;
logic [20:0] pc_offset;
} branch_inputs_t;
typedef struct packed {
logic[31:0] pc_ex;
logic [31:0] jump_pc;
logic [31:0] njump_pc;
logic [31:0] new_pc;
logic branch_taken;
logic branch_ex;
logic is_branch_ex;
logic is_return_ex;
branch_predictor_metadata_t branch_ex_metadata;
logic branch_prediction_used;
logic [BRANCH_PREDICTOR_WAYS-1:0] bp_update_way;
logic is_call_ex;
} branch_results_t;
typedef struct packed{
@ -159,9 +154,7 @@ package taiga_types;
logic load;
logic store;
logic forwarded_store;
instruction_id_t store_forward_id;
//exception support
logic [31:0] pc;
id_t store_forward_id;
//amo support
amo_details_t amo;
} load_store_inputs_t;
@ -216,7 +209,7 @@ package taiga_types;
logic [3:0] be;
logic [2:0] fn3;
logic [31:0] data_in;
instruction_id_t id;
id_t id;
} data_access_shared_inputs_t;
typedef enum {
@ -260,8 +253,8 @@ package taiga_types;
//Writeback
unit_id_t num_instructions_completing;
instruction_id_t num_instructions_in_flight;
instruction_id_t num_of_instructions_pending_writeback;
id_t num_instructions_in_flight;
id_t num_of_instructions_pending_writeback;
} taiga_trace_events_t;
typedef struct packed {

View file

@ -56,7 +56,8 @@ module tlb_lut_ram #(
logic [WAYS-1:0] tag_hit;
logic [WAYS-1:0] replacement_way;
tlb_entry_t ram_data [WAYS-1:0];
logic [$bits(tlb_entry_t)-1:0] ram_data [WAYS-1:0][1];
tlb_entry_t ram_entry [WAYS-1:0];
tlb_entry_t new_entry;
logic flush_in_progress;
@ -79,9 +80,11 @@ module tlb_lut_ram #(
genvar i;
generate
for (i=0; i<WAYS; i=i+1) begin : lut_rams
lut_ram #(.WIDTH($bits(tlb_entry_t)), .DEPTH(DEPTH)) ram_block (.clk(clk),
lut_ram #(.WIDTH($bits(tlb_entry_t)), .DEPTH(DEPTH), .READ_PORTS(1))
ram_block (.clk(clk),
.waddr(tlb_write_addr), .ram_write(tlb_write[i]), .new_ram_data(new_entry),
.raddr(tlb_read_addr), .ram_data_out(ram_data[i]));
.raddr({tlb_read_addr}), .ram_data_out(ram_data[i]));
assign ram_entry[i] = ram_data[i][0];
end
endgenerate
@ -114,7 +117,7 @@ module tlb_lut_ram #(
always_comb begin
for (int i=0; i<WAYS; i=i+1) begin
tag_hit[i] = {ram_data[i].valid, ram_data[i].tag} == {1'b1, virtual_tag};
tag_hit[i] = {ram_entry[i].valid, ram_entry[i].tag} == {1'b1, virtual_tag};
end
end
@ -138,7 +141,7 @@ module tlb_lut_ram #(
tlb.physical_address[11:0] = tlb.virtual_address[11:0];
tlb.physical_address[31:12] = tlb.virtual_address[31:12];
for (int i=0; i<WAYS; i=i+1) begin
if(tag_hit[i] & tlb_on) tlb.physical_address[31:12] = ram_data[i].phys_addr;
if(tag_hit[i] & tlb_on) tlb.physical_address[31:12] = ram_entry[i].phys_addr;
end
end

56
core/toggle_memory.sv Normal file
View file

@ -0,0 +1,56 @@
/*
* Copyright © 2020 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module toggle_memory
import taiga_config::*;
import taiga_types::*;
(
input logic clk,
input logic rst,
input logic toggle,
input id_t toggle_id,
input id_t read_id,
output logic read_data
);
////////////////////////////////////////////////////
//Implementation
logic id_toggle_memory [MAX_IDS];
initial id_toggle_memory = '{default: 0};
always_ff @ (posedge clk) begin
if (toggle)
id_toggle_memory[toggle_id] <= ~id_toggle_memory[toggle_id];
end
assign read_data = id_toggle_memory[read_id];
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
endmodule

View file

@ -1,252 +0,0 @@
/*
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import riscv_types::*;
import taiga_types::*;
module write_back(
input logic clk,
input logic rst,
input logic gc_fetch_flush,
input logic instruction_issued_with_rd,
input unit_writeback_t unit_wb[NUM_WB_UNITS-1:0],
register_file_writeback_interface.writeback rf_wb,
tracking_interface.wb ti,
output logic instruction_complete,
output logic instruction_queue_empty,
output instruction_id_t oldest_id,
input logic load_store_exception_clear,
input instruction_id_t load_store_exception_id,
output logic potential_exception,
//Writeback-Store Interface
writeback_store_interface.wb wb_store,
//Trace signals
output unit_id_t tr_num_instructions_completing,
output instruction_id_t tr_num_instructions_in_flight,
output instruction_id_t tr_num_of_instructions_pending_writeback
);
//////////////////////////////////////
//Inflight metadata for IDs
(* ramstyle = "MLAB, no_rw_check" *) logic[$bits(inflight_instruction_packet)-1:0] id_metadata [MAX_INFLIGHT_COUNT-1:0];
//aliases for write-back-interface signals
instruction_id_t unit_instruction_id [NUM_WB_UNITS-1:0];
logic [NUM_WB_UNITS-1:0] unit_done;
logic [XLEN-1:0] unit_rd [NUM_WB_UNITS-1:0];
//Per-ID muxes for commit buffer
logic [$clog2(NUM_WB_UNITS)-1:0] id_unit_select [MAX_INFLIGHT_COUNT-1:0];
logic [$clog2(NUM_WB_UNITS)-1:0] id_unit_select_r [MAX_INFLIGHT_COUNT-1:0];
//Commit buffer
logic [XLEN-1:0] results_by_id [MAX_INFLIGHT_COUNT-1:0];
logic [XLEN-1:0] results_by_id_new [MAX_INFLIGHT_COUNT-1:0];
instruction_id_t id_retiring;
inflight_instruction_packet retiring_instruction_packet;
logic [MAX_INFLIGHT_COUNT-1:0] id_inuse;
logic [MAX_INFLIGHT_COUNT-1:0] id_potential_exception;
logic [MAX_INFLIGHT_COUNT-1:0] exception_cleared_one_hot;
logic [MAX_INFLIGHT_COUNT-1:0] id_writeback_pending;
logic [MAX_INFLIGHT_COUNT-1:0] id_writeback_pending_r;
logic [MAX_INFLIGHT_COUNT-1:0] id_writing_to_buffer;
logic [MAX_INFLIGHT_COUNT-1:0] id_retiring_one_hot;
logic [MAX_INFLIGHT_COUNT-1:0] id_issued_one_hot;
logic retiring_next_cycle, retiring;
////////////////////////////////////////////////////
//Implementation
//Re-assigning interface inputs to array types so that they can be dynamically indexed
genvar i;
generate
for (i=0; i< NUM_WB_UNITS; i++) begin : interface_to_array_g
assign unit_instruction_id[i] = unit_wb[i].id;
assign unit_done[i] = unit_wb[i].done;
assign unit_rd[i] = unit_wb[i].rd;
end
endgenerate
////////////////////////////////////////////////////
//ID done determination
//For each ID, check if a unit is reporting that ID as done and OR the results together
//Additionally, OR the result of any store operation completing
always_comb begin
id_writing_to_buffer = '0;
for (int i=0; i< MAX_INFLIGHT_COUNT; i++) begin
for (int j=0; j< NUM_WB_UNITS; j++) begin
id_writing_to_buffer[i] |= (unit_instruction_id[j] == ID_W'(i)) && unit_done[j];
end
id_writing_to_buffer[i] |= (wb_store.commit_id == ID_W'(i)) && wb_store.commit;
end
end
////////////////////////////////////////////////////
//Unit select for writeback buffer
//Set unit_ID for each ID as they are issued
//If ID is not in use, use the current issue_unit_id value
//This is used to support single cycle units, such as the ALU
always_comb begin
id_issued_one_hot = 0;
id_issued_one_hot[ti.issue_id] = 1;
id_issued_one_hot &= {MAX_INFLIGHT_COUNT{ti.issued}};
end
generate for (i=0; i< MAX_INFLIGHT_COUNT; i++) begin
always_ff @ (posedge clk) begin
if (id_issued_one_hot[i])
id_unit_select_r[i] <= ti.issue_unit_id;
end
assign id_unit_select[i] = id_inuse[i] ? id_unit_select_r[i] : ti.issue_unit_id;
end endgenerate
////////////////////////////////////////////////////
//Writeback Buffer
//Mux outputs of units based on IDs
//If ID is done write result to buffer
generate for (i=0; i< MAX_INFLIGHT_COUNT; i++) begin
always_ff @ (posedge clk) begin
if (id_writing_to_buffer[i])
results_by_id[i] <= unit_rd[id_unit_select[i]];
end
end endgenerate
////////////////////////////////////////////////////
//Unit Forwarding Support
//Track whether an ID has written to the commit buffer
set_clr_reg_with_rst #(.SET_OVER_CLR(0), .WIDTH($bits(id_inuse)), .RST_VALUE('0)) id_inuse_m (
.clk, .rst,
.set(id_issued_one_hot),
.clr(id_writing_to_buffer),
.result(id_inuse)
);
assign wb_store.forwarding_data_ready = ~id_inuse[wb_store.id_needed_at_commit];
assign wb_store.forwarded_data = results_by_id[wb_store.id_needed_at_commit];
////////////////////////////////////////////////////
//ID Tracking
//Provides ordering of IDs, ID for issue and oldest ID for committing to register file
id_tracking id_counters (.*, .issued(ti.issued), .retired(retiring_next_cycle), .id_available(ti.id_available),
.oldest_id(oldest_id), .next_id(ti.issue_id), .empty(instruction_queue_empty));
////////////////////////////////////////////////////
//Metadata storage for IDs
//stores destination register for each ID and whether it is a store instruction
initial id_metadata = '{default: 0};
//Inflight Instruction ID table
//Stores rd_addr and whether instruction is a store
always_ff @ (posedge clk) begin
if (ti.id_available)
id_metadata[ti.issue_id] <= ti.inflight_packet;
end
assign retiring_instruction_packet = id_metadata[id_retiring];
////////////////////////////////////////////////////
//Potential Exception Tracking
// always_comb begin
// exception_cleared_one_hot = 0;
// exception_cleared_one_hot[load_store_exception_id] = load_store_exception_clear;
// end
// always_ff @ (posedge clk) begin
// if (rst)
// id_potential_exception <= 0;
// else
// id_potential_exception <= (id_potential_exception | {MAX_INFLIGHT_COUNT{ti.exception_possible}} & id_issued_one_hot) & ~exception_cleared_one_hot;
// end
// assign potential_exception = |id_potential_exception;
////////////////////////////////////////////////////
//Register File Interface
//Track whether the ID has a pending write to the register file
always_ff @ (posedge clk) begin
if (rst)
id_writeback_pending_r <= 0;
else
id_writeback_pending_r <= id_writeback_pending;
end
assign id_writeback_pending = id_writing_to_buffer | (id_writeback_pending_r & ~id_retiring_one_hot);
//Is the oldest instruction ready to commit?
assign retiring_next_cycle = id_writeback_pending[oldest_id] & ~wb_store.hold_for_store_ids[oldest_id];
always_ff @(posedge clk) begin
retiring <= retiring_next_cycle;
id_retiring <= oldest_id;
end
always_comb begin
id_retiring_one_hot = 0;
id_retiring_one_hot[id_retiring] = retiring;
end
//Instruction completion tracking for retired instruction count
assign instruction_complete = retiring & ~retiring_instruction_packet.is_store;
assign rf_wb.rd_addr = retiring_instruction_packet.rd_addr;
assign rf_wb.id = id_retiring;
assign rf_wb.retiring = instruction_complete;
assign rf_wb.rd_nzero = |retiring_instruction_packet.rd_addr;
assign rf_wb.rd_data = results_by_id[id_retiring];
//Register bypass for issue operands
assign rf_wb.rs1_valid = id_writeback_pending_r[rf_wb.rs1_id];//includes the instruction writing to the register file
assign rf_wb.rs2_valid = id_writeback_pending_r[rf_wb.rs2_id];
assign rf_wb.rs1_data = results_by_id[rf_wb.rs1_id];
assign rf_wb.rs2_data = results_by_id[rf_wb.rs2_id];
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin
//Checks if any two pairs are set indicating mux contention
always_comb begin
tr_num_instructions_completing = 0;
for (int i=0; i<NUM_WB_UNITS; i++) begin
tr_num_instructions_completing += unit_done[i];
end
tr_num_instructions_in_flight = 0;
tr_num_of_instructions_pending_writeback = 0;
for (int i=0; i<MAX_INFLIGHT_COUNT-1; i++) begin
tr_num_instructions_in_flight += ID_W'(id_inuse[i]);
tr_num_of_instructions_pending_writeback += ID_W'(id_writeback_pending[i]);
end
end
end
endgenerate
endmodule

View file

@ -46,7 +46,7 @@ axi_ddr_sim<TB>::axi_ddr_sim(string filepath, uint32_t starting_memory_location,
write_distribution = uniform_int_distribution<int>(MIN_DELAY_WR,MAX_DELAY_WR);
this->tb = tb;
init_signals();
printf("Done AXI Initialization: %d Pages intialized\n", page_index);
//printf("Done AXI Initialization. %d Pages intialized\n", page_index);
fflush(stdout);
}
@ -80,7 +80,7 @@ axi_ddr_sim<TB>::axi_ddr_sim(ifstream & input_memory_file, TB * tb){
write_distribution = uniform_int_distribution<int>(MIN_DELAY_WR,MAX_DELAY_WR);
this->tb = tb;
init_signals();
printf("Done AXI Initialization: Started from %u\n", starting_location);
//printf("Done AXI Initialization. Started from: %u\n", starting_location);
fflush(stdout);
}

View file

@ -69,7 +69,7 @@ int main(int argc, char **argv) {
#endif
taigaTracer->reset();
cout << "--------------------------------------------------------------\n";
cout << " Starting Simulation, logging to: " << argv[1] << "\n";
cout << " Starting Simulation, logging to " << argv[1] << "\n";
cout << "--------------------------------------------------------------\n";
cout << flush;
@ -86,7 +86,7 @@ int main(int argc, char **argv) {
}
cout << "--------------------------------------------------------------\n";
cout << " Simulation Completed: " << taigaTracer->get_cycle_count() << " cycles.\n";
cout << " Simulation Completed " << taigaTracer->get_cycle_count() << " cycles.\n";
taigaTracer->print_stats();
logFile.close();

View file

@ -62,21 +62,8 @@
../core/div_algorithms/div_radix2.sv
../core/div_algorithms/div_radix2_ET.sv
../core/div_algorithms/div_radix2_ET_full.sv
../core/div_algorithms/div_radix4.sv
../core/div_algorithms/div_radix8.sv
../core/div_algorithms/div_radix8_ET.sv
../core/div_algorithms/div_radix4_ET.sv
../core/div_algorithms/div_radix4_ET_full.sv
../core/div_algorithms/div_radix16.sv
../core/msb.sv
../core/msb_naive.sv
../core/clz.sv
../core/div_algorithms/div_quick_clz_mk2.sv
../core/div_algorithms/div_quick_clz.sv
../core/div_algorithms/div_quick_radix4.sv
../core/div_algorithms/div_quick_naive.sv
../core/div_algorithms/div_algorithm.sv
../core/div_unit.sv
@ -93,17 +80,14 @@
../core/ras.sv
../core/branch_predictor_ram.sv
../core/branch_predictor.sv
../core/fetch.sv
../core/pre_decode.sv
../core/fetch.sv
../core/illegal_instruction_checker.sv
../core/decode_and_issue.sv
../core/id_inuse.sv
../core/reg_inuse.sv
../core/register_file.sv
../core/id_tracking.sv
../core/write_back.sv
../core/regfile_bank_sel.sv
../core/register_file.sv
../core/register_file_and_writeback.sv
../core/placer_randomizer.sv
@ -112,5 +96,8 @@
../l2_arbiter/l2_round_robin.sv
../l2_arbiter/l2_arbiter.sv
../core/toggle_memory.sv
../core/instruction_metadata_and_id_management.sv
../core/taiga.sv