commit a6d67016ac80663dc9e707cbf5eb3b4f993b475e Author: Sven Stucki Date: Wed Apr 1 11:11:07 2015 +0200 Initial RiscV core commit; still in an early stage, but ALU instructions work diff --git a/alu.sv b/alu.sv new file mode 100644 index 00000000..a0c5d292 --- /dev/null +++ b/alu.sv @@ -0,0 +1,639 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineer: Matthias Baer - baermatt@student.ethz.ch // +// // +// Additional contributions by: // +// Igor Loi - igor.loi@unibo.it // +// Andreas Traber - atraber@student.ethz.ch // +// // +// // +// Create Date: 19/09/2013 // +// Design Name: Pipelined Processor // +// Module Name: alu.sv // +// Project Name: Processor // +// Language: SystemVerilog // +// // +// Description: Arithmetic logic unit of the pipelined processor // +// // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (August 6th 2014) Changed port and signal names, addedd // +// comments // +// Revision v0.3 - (December 17 2014) Added vector support // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +`include "defines.sv" + +module alu +( + // Inputs of the ALU + input logic [`ALU_OP_WIDTH-1:0] operator_i, + input logic [31:0] operand_a_i, + input logic [31:0] operand_b_i, + input logic carry_i, + input logic flag_i, +`ifdef TCDM_ADDR_PRECAL + input logic [31:0] adder_i, +`endif + + input logic [1:0] vector_mode_i, + input logic [1:0] cmp_mode_i, + input logic [1:0] vec_ext_i, + + output logic [31:0] adder_lsu_o, + output logic [31:0] result_o, + output logic overflow_o, + output logic carry_o, + output logic flag_o +); + + +`ifdef TCDM_ADDR_PRECAL + assign adder_lsu_o = adder_i; +`else + assign adder_lsu_o = operand_a_i + operand_b_i; +`endif + + logic [31:0] operand_a_rev; // bit reversed signal of operand_a_i + + // bit reverse operand_a for left shifts + genvar k; + generate + for(k = 0; k < 32; k++) + begin + assign operand_a_rev[k] = operand_a_i[31-k]; + end + endgenerate + + ////////////////////////////////////////////////////////////////////////////////////////// + // ____ _ _ _ _ _ _ _ _ // + // | _ \ __ _ _ __| |_(_) |_(_) ___ _ __ ___ __| | / \ __| | __| | ___ _ __ // + // | |_) / _` | '__| __| | __| |/ _ \| '_ \ / _ \/ _` | / _ \ / _` |/ _` |/ _ \ '__| // + // | __/ (_| | | | |_| | |_| | (_) | | | | __/ (_| | / ___ \ (_| | (_| | __/ | // + // |_| \__,_|_| \__|_|\__|_|\___/|_| |_|\___|\__,_| /_/ \_\__,_|\__,_|\___|_| // + // // + ////////////////////////////////////////////////////////////////////////////////////////// + + logic [3:0] carry_in; + logic [3:0] carry_out; + logic [31:0] adder_op_a; + logic [31:0] adder_op_b; + logic [31:0] adder_result; + + // prepare operand a + assign adder_op_a = (operator_i == `ALU_ABS) ? ~operand_a_i : operand_a_i; + + // prepare operand b + assign adder_op_b = (operator_i == `ALU_SUB) ? ~operand_b_i : operand_b_i; + + // prepare vector carrys + always_comb + begin + carry_in = {carry_out[2], carry_out[1], carry_out[0], 1'b0}; + + case (operator_i) + `ALU_ADDC: carry_in[0] = carry_i; + + `ALU_SUB, `ALU_ABS: + begin + case (vector_mode_i) + default: // VEC_MODE32 + begin + carry_in[0] = 1'b1; + end + + `VEC_MODE16: + begin + carry_in[0] = 1'b1; + carry_in[2] = 1'b1; + end + + `VEC_MODE8: + begin + carry_in = 4'b1111; + end + endcase + end + + default: + begin + case (vector_mode_i) + default: // VEC_MODE32 + begin + carry_in[0] = 1'b0; + end + + `VEC_MODE16: + begin + carry_in[0] = 1'b0; + carry_in[2] = 1'b0; + end + + `VEC_MODE8: + begin + carry_in = 4'b0000; + end + endcase + end + endcase + end + + // adder consisting of four slices + assign {carry_out[0], adder_result[ 7: 0]} = adder_op_a[ 7: 0] + adder_op_b[ 7: 0] + carry_in[0]; + assign {carry_out[1], adder_result[15: 8]} = adder_op_a[15: 8] + adder_op_b[15: 8] + carry_in[1]; + assign {carry_out[2], adder_result[23:16]} = adder_op_a[23:16] + adder_op_b[23:16] + carry_in[2]; + assign {carry_out[3], adder_result[31:24]} = adder_op_a[31:24] + adder_op_b[31:24] + carry_in[3]; + + + // averaging by right shifting of one bit + logic [31:0] result_avg; + + assign result_avg[ 6: 0] = adder_result[ 7: 1]; + assign result_avg[14: 8] = adder_result[15: 9]; + assign result_avg[22:16] = adder_result[23:17]; + assign result_avg[30:24] = adder_result[31:25]; + + assign result_avg[ 7] = (vector_mode_i == `VEC_MODE8) ? ((operator_i == `ALU_AVGU) ? 1'b0 : adder_result[ 7]) : adder_result[ 8]; + assign result_avg[15] = ((vector_mode_i == `VEC_MODE16) || (vector_mode_i == `VEC_MODE8)) ? ((operator_i == `ALU_AVGU) ? 1'b0 : adder_result[15]) : adder_result[16]; + assign result_avg[23] = (vector_mode_i == `VEC_MODE8) ? ((operator_i == `ALU_AVGU) ? 1'b0 : adder_result[23]) : adder_result[24]; + assign result_avg[31] = (operator_i == `ALU_AVGU) ? 1'b0 : adder_result[31]; + + + //////////////////////////////////////// + // ____ _ _ ___ _____ _____ // + // / ___|| | | |_ _| ___|_ _| // + // \___ \| |_| || || |_ | | // + // ___) | _ || || _| | | // + // |____/|_| |_|___|_| |_| // + // // + //////////////////////////////////////// + logic shift_left; // should we shift left? + logic [31:0] shift_amt; // amount of shift + logic [31:0] shift_amt_left; // amount of shift, adapted to vector mode for sll + logic [31:0] shift_amt_int; // amount of shift, adapted to vector mode for sll + logic [31:0] shift_op_a; // input of the shifter + logic [31:0] shift_result; + logic [31:0] shift_left_result; + + + // by reversing the bits of the input, we also have the reverse the order of shift amounts + always_comb + begin + case(vector_mode_i) + default: // VEC_MODE32 + begin + shift_amt_left[31: 0] = shift_amt[31: 0]; + end + + `VEC_MODE16: + begin + shift_amt_left[15: 0] = shift_amt[31:16]; + shift_amt_left[31:16] = shift_amt[15: 0]; + end + + `VEC_MODE8: + begin + shift_amt_left[ 7: 0] = shift_amt[31:24]; + shift_amt_left[15: 8] = shift_amt[23:16]; + shift_amt_left[23:16] = shift_amt[15: 8]; + shift_amt_left[31:24] = shift_amt[ 7: 0]; + end + endcase + end + + // choose the bit reversed or the normal input for shift operand a + assign shift_op_a = (shift_left == 1'b1) ? operand_a_rev : operand_a_i; + assign shift_amt_int = (shift_left == 1'b1) ? shift_amt_left : shift_amt; + + // right shifts, we let the synthesizer optimize this + always_comb + begin + case(vector_mode_i) + default: // VEC_MODE32 + begin + if(operator_i == `ALU_SRA) + shift_result = $unsigned( $signed(shift_op_a) >>> shift_amt_int[4:0] ); + else if(operator_i == `ALU_ROR) + shift_result = {shift_op_a, shift_op_a} >> shift_amt_int[4:0]; + else + shift_result = shift_op_a >> shift_amt_int[4:0]; + end + + `VEC_MODE16: + begin + if(operator_i == `ALU_SRA) + begin + shift_result[31:16] = $unsigned( $signed(shift_op_a[31:16]) >>> shift_amt_int[19:16] ); + shift_result[15: 0] = $unsigned( $signed(shift_op_a[15: 0]) >>> shift_amt_int[ 3: 0] ); + end + else + begin + shift_result[31:16] = shift_op_a[31:16] >> shift_amt_int[19:16]; + shift_result[15: 0] = shift_op_a[15: 0] >> shift_amt_int[ 3: 0]; + end + end + + `VEC_MODE8: + begin + if(operator_i == `ALU_SRA) + begin + shift_result[31:24] = $unsigned( $signed(shift_op_a[31:24]) >>> shift_amt_int[26:24] ); + shift_result[23:16] = $unsigned( $signed(shift_op_a[23:16]) >>> shift_amt_int[18:16] ); + shift_result[15: 8] = $unsigned( $signed(shift_op_a[15: 8]) >>> shift_amt_int[10: 8] ); + shift_result[ 7: 0] = $unsigned( $signed(shift_op_a[ 7: 0]) >>> shift_amt_int[ 2: 0] ); + end + else + begin + shift_result[31:24] = shift_op_a[31:24] >> shift_amt_int[26:24]; + shift_result[23:16] = shift_op_a[23:16] >> shift_amt_int[18:16]; + shift_result[15: 8] = shift_op_a[15: 8] >> shift_amt_int[10: 8]; + shift_result[ 7: 0] = shift_op_a[ 7: 0] >> shift_amt_int[ 2: 0]; + end + end + endcase; // case (vec_mode_i) + end + + // bit reverse the shift_result for left shifts + genvar j; + generate + for(j = 0; j < 32; j++) + begin + assign shift_left_result[j] = shift_result[31-j]; + end + endgenerate + + + + ////////////////////////////////////////////////////////////////// + // ____ ___ __ __ ____ _ ____ ___ ____ ___ _ _ // + // / ___/ _ \| \/ | _ \ / \ | _ \|_ _/ ___| / _ \| \ | | // + // | | | | | | |\/| | |_) / _ \ | |_) || |\___ \| | | | \| | // + // | |__| |_| | | | | __/ ___ \| _ < | | ___) | |_| | |\ | // + // \____\___/|_| |_|_| /_/ \_\_| \_\___|____/ \___/|_| \_| // + // // + ////////////////////////////////////////////////////////////////// + + // results + logic [3:0] is_equal; + logic [3:0] is_greater; // handles both signed and unsigned forms + + logic [3:0] sel_minmax; // mux control + logic [31:0] result_minmax; + logic [31:0] minmax_b; + + + logic do_min; + + // 8-bit vector comparisons, basic building blocks + logic [3:0] cmp_sign_mode; + logic [3:0] is_equal_vec; + logic [3:0] is_greater_vec; + + + // generate cmp_sign_mode signal that is used for comparisons below + always_comb + begin + cmp_sign_mode[3:0] = 4'b0000; // unsigned mode + + // signed mode + if ((operator_i == `ALU_GTS) || + (operator_i == `ALU_GES) || + (operator_i == `ALU_LTS) || + (operator_i == `ALU_SLTS) || + (operator_i == `ALU_LES) || + (operator_i == `ALU_MAX) || + (operator_i == `ALU_MIN) || + (operator_i == `ALU_ABS)) + begin + case (vector_mode_i) + default: cmp_sign_mode[3:0] = 4'b1000; + `VEC_MODE16: cmp_sign_mode[3:0] = 4'b1010; + `VEC_MODE8: cmp_sign_mode[3:0] = 4'b1111; + endcase + end + end + + // generate vector equal and greater than signals, cmp_sign_mode decides if the comparison is done signed or unsigned + genvar i; + generate + for(i = 0; i < 4; i++) + begin + assign is_equal_vec[i] = (operand_a_i[8*i+7:8*i] == operand_b_i[8*i+7:i*8]); + assign is_greater_vec[i] = $signed({operand_a_i[8*i+7] & cmp_sign_mode[i], operand_a_i[8*i+7:8*i]}) + > + $signed({operand_b_i[8*i+7] & cmp_sign_mode[i], operand_b_i[8*i+7:i*8]}); + end + endgenerate + + + always_comb + begin + is_equal[3:0] = {4{is_equal_vec[3] & is_equal_vec[2] & is_equal_vec[1] & is_equal_vec[0]}}; + is_greater[3:0] = {4{is_greater_vec[3] | (is_equal_vec[3] & (is_greater_vec[2] + | (is_equal_vec[2] & (is_greater_vec[1] + | (is_equal_vec[1] & (is_greater_vec[0]))))))}}; + + case(vector_mode_i) + default:; // see default assignment + + `VEC_MODE16: + begin + is_equal[1:0] = {2{is_equal_vec[0] & is_equal_vec[1]}}; + is_equal[3:2] = {2{is_equal_vec[2] & is_equal_vec[3]}}; + is_greater[1:0] = {2{is_greater_vec[1] | (is_equal_vec[1] & is_greater_vec[0])}}; + is_greater[3:2] = {2{is_greater_vec[3] | (is_equal_vec[3] & is_greater_vec[2])}}; + end + + `VEC_MODE8: + begin + is_equal[3:0] = is_equal_vec[3:0]; + is_greater[3:0] = is_greater_vec[3:0]; + end + endcase + end + + // generate comparison results + logic [3:0] cmp_result; + logic any_result; + logic all_result; + + always_comb + begin + cmp_result = is_equal; + + case (operator_i) + `ALU_EQ: cmp_result = is_equal; + `ALU_NE: cmp_result = ~is_equal; + `ALU_GTS, `ALU_GTU: cmp_result = is_greater; + `ALU_GES, `ALU_GEU: cmp_result = is_greater | is_equal; + `ALU_LTS, `ALU_SLTS, + `ALU_LTU, `ALU_SLTU: cmp_result = ~(is_greater | is_equal); + `ALU_LES, `ALU_LEU: cmp_result = ~is_greater; + default:; // nothing to do + endcase //~case(operator_i) + end + + assign any_result = |cmp_result; + assign all_result = &cmp_result; + + + // choose result value for min/max/abs + assign minmax_b = (operator_i == `ALU_ABS) ? adder_result : operand_b_i; + + assign do_min = ((operator_i == `ALU_MIN) || (operator_i == `ALU_MINU)); + + // reuse the minmax mux also for the cmove instruction + // the mux now handles, min, max, abs, cmov, ins + always_comb + begin + sel_minmax[3:0] = is_greater ^ {4{do_min}}; + + if(operator_i == `ALU_CMOV) + sel_minmax[3:0] = {4{flag_i}}; + + if(operator_i == `ALU_INS) + begin + if(vector_mode_i == `VEC_MODE16) + begin + sel_minmax[1:0] = {2{vec_ext_i[0]}}; + sel_minmax[3:2] = ~{2{vec_ext_i[0]}}; + end + else // `VEC_MODE8 + begin + sel_minmax[0] = (vec_ext_i != 2'b00); + sel_minmax[1] = (vec_ext_i != 2'b01); + sel_minmax[2] = (vec_ext_i != 2'b10); + sel_minmax[3] = (vec_ext_i != 2'b11); + end + end + end + + assign result_minmax[31:24] = (sel_minmax[3] == 1'b1) ? operand_a_i[31:24] : minmax_b[31:24]; + assign result_minmax[23:16] = (sel_minmax[2] == 1'b1) ? operand_a_i[23:16] : minmax_b[23:16]; + assign result_minmax[15: 8] = (sel_minmax[1] == 1'b1) ? operand_a_i[15: 8] : minmax_b[15: 8]; + assign result_minmax[ 7: 0] = (sel_minmax[0] == 1'b1) ? operand_a_i[ 7: 0] : minmax_b[ 7: 0]; + + + ////////////////////////////////////////////////// + // _____ _ _ // + // | ____|_ _| |_ ___ _ __ ___(_) ___ _ __ // + // | _| \ \/ / __/ _ \ '_ \/ __| |/ _ \| '_ \ // + // | |___ > <| || __/ | | \__ \ | (_) | | | | // + // |_____/_/\_\\__\___|_| |_|___/_|\___/|_| |_| // + // // + ////////////////////////////////////////////////// + + logic [7:0] ext_byte; + logic [15:0] ext_word; + logic [31:0] result_ext; + + always_comb + begin + ext_byte = operand_a_i[7:0]; + + if(operator_i == `ALU_EXT) + begin + case(vec_ext_i) + 2'b00: ext_byte = operand_a_i[ 7: 0]; + 2'b01: ext_byte = operand_a_i[15: 8]; + 2'b10: ext_byte = operand_a_i[23:16]; + 2'b11: ext_byte = operand_a_i[31:24]; + endcase + end + end + + assign ext_word = ((vec_ext_i[0] == 1'b1) && (operator_i == `ALU_EXT)) ? operand_a_i[31:16] : operand_a_i[15:0]; + + always_comb + begin + // zero extend byte + result_ext = {24'b0, ext_byte[7:0]}; + + // sign extend byte + if((operator_i == `ALU_EXTBS) || ((operator_i == `ALU_EXT) && (vector_mode_i == `VEC_MODE8))) + result_ext = {{24{ext_byte[7]}}, ext_byte[7:0]}; + + // zero extend word + if(operator_i == `ALU_EXTHZ) + result_ext = {16'b0, ext_word[15:0]}; + + // sign extend word + if((operator_i == `ALU_EXTHS) || ((operator_i == `ALU_EXT) && (vector_mode_i == `VEC_MODE16))) + result_ext = {{16{ext_word[15]}}, ext_word[15:0]}; + end + + ///////////////////////////////////////////////////////////////////// + // ____ _ _ ____ _ ___ // + // | __ )(_) |_ / ___|___ _ _ _ __ | |_ / _ \ _ __ ___ // + // | _ \| | __| | | / _ \| | | | '_ \| __| | | | | '_ \/ __| // + // | |_) | | |_ | |__| (_) | |_| | | | | |_ | |_| | |_) \__ \_ // + // |____/|_|\__| \____\___/ \__,_|_| |_|\__| \___/| .__/|___(_) // + // |_| // + ///////////////////////////////////////////////////////////////////// + + logic [31:0] ff_input; // either op_a_i or its bit reversed version + logic [5:0] ff1_result; // holds the index of the first '1' + logic [5:0] fl1_result; // holds the index of the last '1' + logic ff_cmp; // compare value for ff1 and fl1 + integer q; + + assign ff_input = (operator_i == `ALU_FF1) ? operand_a_i : operand_a_rev; + assign ff_cmp = (operator_i == `ALU_CLB) ? ~operand_a_i[31] : 1'b1; + + // search for first bit set to '1' + always_comb + begin + ff1_result = 6'd0; + + for(q = 0; q < 32; q++) + begin + if(ff_input[q] == ff_cmp) + begin + ff1_result = q + 6'd1; + break; + end + end + end + + // special case if ff1_res is 0 (no 1 found), then we keep the 0 + assign fl1_result = (ff1_result == 6'd0) ? 6'd0 : (6'd33 - ff1_result); + + // count the number of '1's in a word + logic [5:0] cnt_result; // holds the number of '1's in a word + logic [1:0] cnt_l1[16]; + logic [2:0] cnt_l2[8]; + logic [3:0] cnt_l3[4]; + logic [4:0] cnt_l4[2]; + + genvar l, m, n, p; + generate for(l = 0; l < 16; l++) + begin + assign cnt_l1[l] = operand_a_i[2*l] + operand_a_i[2*l + 1]; + end + endgenerate + + generate for(m = 0; m < 8; m++) + begin + assign cnt_l2[m] = cnt_l1[2*m] + cnt_l1[2*m + 1]; + end + endgenerate + + generate for(n = 0; n < 4; n++) + begin + assign cnt_l3[n] = cnt_l2[2*n] + cnt_l2[2*n + 1]; + end + endgenerate + + generate for(p = 0; p < 2; p++) + begin + assign cnt_l4[p] = cnt_l3[2*p] + cnt_l3[2*p + 1]; + end + endgenerate + + assign cnt_result = cnt_l4[0] + cnt_l4[1]; + + //////////////////////////////////////////////////////// + // ____ _ _ __ __ // + // | _ \ ___ ___ _ _| | |_ | \/ |_ ___ __ // + // | |_) / _ \/ __| | | | | __| | |\/| | | | \ \/ / // + // | _ < __/\__ \ |_| | | |_ | | | | |_| |> < // + // |_| \_\___||___/\__,_|_|\__| |_| |_|\__,_/_/\_\ // + // // + //////////////////////////////////////////////////////// + + always_comb + begin + shift_left = 1'b0; + shift_amt = operand_b_i; + result_o = 'x; + carry_o = 1'b0; + overflow_o = 1'b0; + flag_o = 1'b0; + + unique case (operator_i) + // Standard Operations + `ALU_ADD, `ALU_ADDC, `ALU_SUB: + begin // Addition defined above + result_o = adder_result[31:0]; + carry_o = carry_out[3]; + overflow_o = (adder_op_a[31] ^ adder_result[31]) & (adder_op_b[31] ^ adder_result[31]); // ++ => - and -- => + + end + `ALU_AVG, `ALU_AVGU: result_o = result_avg; + `ALU_AND: result_o = operand_a_i & operand_b_i; + `ALU_OR: result_o = operand_a_i | operand_b_i; + `ALU_XOR: result_o = operand_a_i ^ operand_b_i; + + // Shift Operations + `ALU_MOVHI: + begin + shift_left = 1'b1; + shift_amt = 32'd16; + result_o = shift_left_result; + end + `ALU_SLL: + begin + shift_left = 1'b1; + result_o = shift_left_result; + end + + `ALU_SRL, `ALU_SRA, `ALU_ROR: result_o = shift_result; + + // Extension Operations + `ALU_EXTWZ, `ALU_EXTWS: result_o = operand_a_i; + `ALU_EXTBZ, `ALU_EXTBS, `ALU_EXTHZ, `ALU_EXTHS, `ALU_EXT: result_o = result_ext; + + // Min/Max/Abs, CMOV, INS + `ALU_MIN, `ALU_MINU, `ALU_MAX, `ALU_MAXU, `ALU_ABS, `ALU_CMOV, `ALU_INS: result_o = result_minmax; + + // Comparison Operations + `ALU_EQ, `ALU_NE, `ALU_GTU, `ALU_GEU, `ALU_LTU, `ALU_LEU, `ALU_GTS, `ALU_GES, `ALU_LTS, `ALU_LES: + begin + result_o[31:24] = {8{cmp_result[3]}}; + result_o[23:16] = {8{cmp_result[2]}}; + result_o[15: 8] = {8{cmp_result[1]}}; + result_o[ 7: 0] = {8{cmp_result[0]}}; + + case (cmp_mode_i) + `ALU_CMP_ANY: + begin + flag_o = any_result; + end + `ALU_CMP_ALL: + begin + flag_o = all_result; + end + `ALU_CMP_FULL: + begin + flag_o = cmp_result[0]; + end + default:; + endcase //~case(cmp_mode_i) + end + + // Set Lower Than Operations (result = 1, if a < b) + `ALU_SLTS, `ALU_SLTU: result_o = {30'b0, cmp_result[0]}; + + `ALU_FF1: result_o = {26'h0, ff1_result}; + `ALU_FL1: result_o = {26'h0, fl1_result}; + `ALU_CLB: result_o = {26'h0, fl1_result}; + `ALU_CNT: result_o = {26'h0, cnt_result}; + + `ALU_NOP: ; // Do nothing + + default: + begin + //synopsys translate_off + + //synopsys translate_on + end + endcase //~case(operator_i) + end + +endmodule //~module alu + diff --git a/controller.sv b/controller.sv new file mode 100644 index 00000000..49f7b193 --- /dev/null +++ b/controller.sv @@ -0,0 +1,1404 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineer: Matthias Baer - baermatt@student.ethz.ch // +// // +// Additional contributions by: // +// Igor Loi - igor.loi@unibo.it // +// Andreas Traber - atraber@student.ethz.ch // +// // +// // +// // +// Create Date: 19/09/2013 // +// Design Name: Pipelined OpenRISC Processor // +// Module Name: controller.sv // +// Project Name: OR10N // +// Language: SystemVerilog // +// // +// Description: CPU Controller of the pipelined processor // +// // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (August 8th 2014) Changed port and signal names, added // +// comments // +// Revision v0.3 - (December 1th 2014) Merged debug unit // +// Revision v0.4 - (January 6th 2015) Added vectorial instructions // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +`include "defines.sv" + +module controller +( + input logic clk, + input logic rst_n, + + input logic fetch_enable_i, // Start the decoding + output logic eoc_o, // End of computation: triggered by a special instruction + output logic core_busy_o, // Core is busy processing instructions + + input logic [31:0] instr_rdata_i, // Instruction read from instr memory/cache: (sampled in the if stage) + output logic instr_req_o, // Fetch instruction Request: + input logic instr_gnt_i, // grant from icache + input logic instr_ack_i, // Acknow from instr memory or cache (means that data is available) + + output logic [2:0] pc_mux_sel_o, // Selector in the Fetch stage to select the rigth PC (normal, jump ...) + output logic pc_mux_boot_o, // load boot address as PC, goes to the IF stage + + // ALU signals + output logic [`ALU_OP_WIDTH-1:0] alu_operator_o, // Operator in the Ex stage for the ALU block + output logic extend_immediate_o, // Extend a 16 bit immediate to 32 bit + output logic [1:0] alu_op_a_mux_sel_o, // Operator a is selected between reg value, PC or immediate + output logic [1:0] alu_op_b_mux_sel_o, // Operator b is selected between reg value or immediate + output logic alu_pc_mux_sel_o, // selects IF or ID PC for ALU computations + output logic [3:0] immediate_mux_sel_o, + + output logic [1:0] vector_mode_o, // selects between 32 bit, 16 bit and 8 bit vectorial modes + output logic scalar_replication_o, // activates scalar_replication for vectorial mode + output logic [1:0] alu_cmp_mode_o, // selects comparison mode for ALU (i.e. full, any, all) + + // Mupliplicator related control signals + output logic mult_is_running_o, // Multiplication operation is running + input logic mult_is_running_ex_i, // Multiplication operation is running in EX + output logic [1:0] mult_sel_subword_o, // Select subwords for 16x16 bit of multiplier + output logic [1:0] mult_signed_mode_o, // Multiplication in signed mode + output logic mult_use_carry_o, // Use carry for MAC + output logic mult_mac_en_o, // Use the accumulator after multiplication + + output logic regfile_wdata_mux_sel_o, // Mul selctor used in WB stage to select regfile wdata from ex result (ALU-MUL), from data memory, or special registers + input logic regfile_wdata_mux_sel_ex_i, // FW signal: Mul selctor used in WB stage to select regfile wdata from ex result (ALU-MUL), from data memory, or special registers + output logic regfile_we_o, // Write Enable to regfile + output logic [1:0] regfile_alu_waddr_mux_sel_o, // Select register write address for ALU/MUL operations + + output logic regfile_alu_we_o, // Write Enable to regfile 2nd port + + output logic prepost_useincr_o, // When not active bypass the alu result=op_a + input logic data_misaligned_i, + + output logic sp_we_o, // Write enable to special purpose register + input logic sp_we_ex_i, // Manipulated sp_we from ex stage : FW + + // LD/ST unit signals + output logic data_we_o, // Write enable to data memory + output logic [1:0] data_type_o, // Data type on data memory: byte, half word or word + output logic data_sign_extension_o, // Sign extension on read data from data memory + output logic [1:0] data_reg_offset_o, // Offset in bytes inside register for stores + output logic data_req_o, // Request for a transaction to data memory + input logic data_ack_i, // Data memory request-acknowledge + input logic data_req_ex_i, // Delayed copy of the data_req_o + input logic data_rvalid_i, // rvalid from data memory + + // hwloop signals + output logic [2:0] hwloop_we_o, // write enables for hwloop regs + output logic [1:0] hwloop_regid_o, // identifies the hwloop reg set + output logic hwloop_wb_mux_sel_o, // select data to write to hwloop regs + output logic [1:0] hwloop_cnt_mux_sel_o, // selects hwloop counter input + input logic hwloop_jump_i, // modify pc_mux_sel to select the hwloop addr + + // Interrupt signals + input logic irq_present_i, // there is an IRQ, so if we are sleeping we should wake up now + + // Exception Controller Signals + output logic jump_in_id_o, // jump instruction in ID stage + output logic illegal_insn_o, // illegal instruction encountered + output logic trap_insn_o, // trap instruction encountered + output logic pipe_flush_o, // pipe flush requested by controller + input logic pc_valid_i, // is the next_pc currently valid? + output logic clear_isr_running_o, // an l.rfe instruction was encountered, exit ISR + input logic pipe_flushed_i, // Pipe is flushed + + // Debug Unit Signals + input logic dbg_stall_i, // Pipeline stall is requested + input logic dbg_set_npc_i, // Change PC to value from debug unit + + // SPR Signals + input logic sr_flag_fw_i, // forwared branch signal + input logic sr_flag_i, // branch signal + input logic set_flag_ex_i, // alu is currently updating the flag if 1 + output logic set_flag_o, // to special purpose registers --> flag + output logic set_carry_o, // to special purpose registers --> carry + output logic set_overflow_o, // to special purpose registers --> overflow + output logic restore_sr_o, // restores status register after interrupt + + // Forwarding signals from regfile + input logic [4:0] regfile_waddr_ex_i, // FW: write address from EX stage + input logic regfile_we_ex_i, // FW: write enable from EX stage + input logic [4:0] regfile_waddr_wb_i, // FW: write address from WB stage + input logic regfile_we_wb_i, // FW: write enable from WB stage + input logic [4:0] regfile_alu_waddr_fw_i, // FW: ALU/MUL write address from EX stage + input logic regfile_alu_we_fw_i, // FW: ALU/MUL write enable from EX stage + output logic [1:0] operand_a_fw_mux_sel_o, // regfile ra data selector form ID stage + output logic [1:0] operand_b_fw_mux_sel_o, // regfile rb data selector form ID stage + output logic [1:0] operand_c_fw_mux_sel_o, // regfile rc data selector form ID stage + + output logic drop_instruction_o, // prevent instruction to enter ID stage +`ifdef BRANCH_PREDICTION + output logic wrong_branch_taken_o, // 1 if the wrong branch was selected + output logic take_branch_o, // 1 if branch should be taken +`endif + output logic stall_if_o, // Stall IF stage (deassert requests) + output logic stall_id_o, // Stall ID stage (and instr and data memory interface) ( ID_STAGE ) + output logic stall_ex_o, // Stall ex stage ( EX_STAGE ) + output logic stall_wb_o // Stall write to register file due contentions ( WB_STAGE ) +); + + // FSM state encoding + enum logic [1:0] { RESET, IDLE, FIRST_FETCH, DECODE} ctrl_fsm_cs, ctrl_fsm_ns; + + logic reg_d_ex_is_reg_a_id; + logic reg_d_ex_is_reg_b_id; + logic reg_d_ex_is_reg_c_id; + logic reg_d_wb_is_reg_a_id; + logic reg_d_wb_is_reg_b_id; + logic reg_d_wb_is_reg_c_id; + logic reg_d_alu_is_reg_a_id; + logic reg_d_alu_is_reg_b_id; + logic reg_d_alu_is_reg_c_id; + + logic [`ALU_OP_WIDTH-1:0] alu_operator; + logic mult_is_running; + logic regfile_we; + logic regfile_alu_we; + logic data_we; + logic data_req; + logic set_flag; + logic set_overflow; + logic set_carry; + logic deassert_we; + + logic lsu_stall; + logic misalign_stall; + logic mtspr_stall; + logic mfspr_stall; + logic instr_ack_stall; + logic load_stall; + logic jr_stall; + + logic set_npc; +`ifdef BRANCH_PREDICTION + logic wrong_branch_taken; +`endif + logic rega_used; + logic regb_used; + logic regc_used; + + //////////////////////////////////////////////////////////////////////////////////////////// + // ____ ___ ____ _____ ____ ___ _ _ _____ ____ ___ _ _ _____ ____ // + // / ___/ _ \| _ \| ____| / ___/ _ \| \ | |_ _| _ \ / _ \| | | | | ____| _ \ // + // | | | | | | |_) | _| | | | | | | \| | | | | |_) | | | | | | | | _| | |_) | // + // | |__| |_| | _ <| |___ | |__| |_| | |\ | | | | _ <| |_| | |___| |___| |___| _ < // + // \____\___/|_| \_\_____| \____\___/|_| \_| |_| |_| \_\\___/|_____|_____|_____|_| \_\ // + // // + //////////////////////////////////////////////////////////////////////////////////////////// + always_comb + begin + // Default values + eoc_o = 1'b0; + + instr_req_o = 1'b1; + + pc_mux_sel_o = `INCR_PC; + pc_mux_boot_o = 1'b0; + + alu_operator = `ALU_NOP; + extend_immediate_o = 1'b0; + alu_op_a_mux_sel_o = `OP_A_REGA_OR_FWD; + alu_op_b_mux_sel_o = `OP_B_REGB_OR_FWD; + alu_pc_mux_sel_o = 1'b0; + + vector_mode_o = `VEC_MODE32; + scalar_replication_o = 1'b0; + alu_cmp_mode_o = `ALU_CMP_FULL; + + mult_is_running = 1'b0; + mult_signed_mode_o = 2'b00; + mult_sel_subword_o = 2'b00; + mult_use_carry_o = 1'b0; + mult_mac_en_o = 1'b0; + + regfile_wdata_mux_sel_o = 1'b1; + regfile_we = 1'b0; + regfile_alu_we = 1'b0; + regfile_alu_waddr_mux_sel_o = 2'b01; + + prepost_useincr_o = 1'b1; + + hwloop_we_o = 3'b0; + hwloop_regid_o = 2'b0; + hwloop_wb_mux_sel_o = 1'b0; + hwloop_cnt_mux_sel_o = 2'b00; + immediate_mux_sel_o = `IMM_I; // TODO: Check if sensible default + + sp_we_o = 1'b0; + + data_we = 1'b0; + data_type_o = 2'b00; + data_sign_extension_o = 1'b0; + data_reg_offset_o = 2'b00; + data_req = 1'b0; + + set_flag = 1'b0; + set_overflow = 1'b0; + set_carry = 1'b0; + + restore_sr_o = 1'b0; + clear_isr_running_o = 1'b0; + + illegal_insn_o = 1'b0; + trap_insn_o = 1'b0; + pipe_flush_o = 1'b0; + + ctrl_fsm_ns = ctrl_fsm_cs; + + rega_used = 1'b0; + regb_used = 1'b0; + regc_used = 1'b0; +`ifdef BRANCH_PREDICTION + wrong_branch_taken_o = 1'b0; + take_branch_o = 1'b0; +`endif + + case (ctrl_fsm_cs) + RESET: + begin + // We were just reset and have to copy the boot address from + // outside to our PC + // We do not yet start fetching instructions as the next_pc is invalid! + instr_req_o = 1'b0; + pc_mux_boot_o = 1'b1; + + if (fetch_enable_i == 1'b1) + ctrl_fsm_ns = IDLE; + end + + IDLE: + begin + // we begin execution when either fetch_enable is high or an + // interrupt has arrived + instr_req_o = fetch_enable_i || irq_present_i; + pc_mux_sel_o = `NO_INCR; + + if (fetch_enable_i || irq_present_i) + begin + ctrl_fsm_ns = FIRST_FETCH; + end + end // case: IDLE + + FIRST_FETCH: + begin + // Stall because of IF miss + if (instr_ack_i == 1'b1) + begin + ctrl_fsm_ns = DECODE; + end + + // hwloop detected, jump to start address! + // Attention: This has to be done in the DECODE and the FIRST_FETCH states + if (hwloop_jump_i == 1'b1) + pc_mux_sel_o = `HWLOOP_ADDR; + end + + DECODE: + begin + $display("%t: Decoding Instruction 0x%h.", $time, instr_rdata_i[31:0]); + // Instruction Decoding + unique case (instr_rdata_i[6:0]) + + + ////////////////////////////////////// + // _ _ _ __ __ ____ ____ // + // | | | | | \/ | _ \/ ___| // + // _ | | | | | |\/| | |_) \___ \ // + // | |_| | |_| | | | | __/ ___) | // + // \___/ \___/|_| |_|_| |____/ // + ////////////////////////////////////// + + /* + `INSTR_: + begin // Jump + pc_mux_sel_o = `PC_FROM_IMM; + end + + + `OPCODE_JAL: + begin // Jump and Link + pc_mux_sel_o = `PC_FROM_IMM; + alu_op_a_mux_sel_o = `OP_A_CURRPC; + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_HEX4; + alu_operator = `ALU_ADD; + regfile_alu_waddr_mux_sel_o = 2'b10; // select r9 to write back return address + regfile_alu_we = 1'b1; + end + + `OPCODE_JR: + begin // Jump Register + pc_mux_sel_o = `PC_FROM_REGFILE; + regb_used = 1'b1; + end + + `OPCODE_JALR: begin // Jump and Link Register + pc_mux_sel_o = `PC_FROM_REGFILE; + alu_op_a_mux_sel_o = `OP_A_CURRPC; + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_HEX4; + alu_operator = `ALU_ADD; + regfile_alu_waddr_mux_sel_o = 2'b10; // select r9 to write back return address + regfile_alu_we = 1'b1; + regb_used = 1'b1; + end + +`ifndef BRANCH_PREDICTION + `OPCODE_BNF: + begin // Branch if No Flag + if (sr_flag_fw_i == 1'b0) + pc_mux_sel_o = `PC_FROM_IMM; + end + + `OPCODE_BF: + begin // Branch if Flag + if (sr_flag_fw_i == 1'b1) + pc_mux_sel_o = `PC_FROM_IMM; + end +`else // BRANCH_PREDICTION + `OPCODE_BNF: + begin // Branch if No Flag + if (set_flag_ex_i == 1'b0) + if (sr_flag_i == 1'b0) + pc_mux_sel_o = `PC_FROM_IMM; + else + pc_mux_sel_o = `INCR_PC; + else // default branch + begin + pc_mux_sel_o = instr_rdata_i[25] ? `PC_FROM_IMM : `INCR_PC; + // decision was wrong + wrong_branch_taken_o = (sr_flag_fw_i ^ instr_rdata_i[25]) ? 1'b0 : 1'b1; + take_branch_o = instr_rdata_i[25] ? `INCR_PC : `PC_FROM_IMM; + end + end + + `OPCODE_BF: + begin // Branch if Flag + if (set_flag_ex_i == 1'b0) + if (sr_flag_i == 1'b1) + pc_mux_sel_o = `PC_FROM_IMM; + else + pc_mux_sel_o = `INCR_PC; + else // default branch + begin + pc_mux_sel_o = instr_rdata_i[25] ? `PC_FROM_IMM : `INCR_PC; + // decision was wrong + wrong_branch_taken_o = (sr_flag_fw_i ^ instr_rdata_i[25]) ? 1'b1 : 1'b0; + take_branch_o = instr_rdata_i[25] ? `INCR_PC : `PC_FROM_IMM; + end + end +`endif + + `OPCODE_NOP: + begin // No Operation + // if (instr_rdata_i[25:24] == 2'b01) + // $display("%t: Executing l.nop with 0x%h.", $time, instr_rdata_i[15:0]); + // else + // $display("%t: Illegal l.nop received.", $time); + end + + `OPCODE_EOC: begin // End of Computation (Custom Instruction 1) + eoc_o = 1'b1; + pc_mux_sel_o = `NO_INCR; + end + + `OPCODE_RFE: + begin + pc_mux_sel_o = `EXC_PC_REG; // restore PC from EPCR + restore_sr_o = 1'b1; + clear_isr_running_o = 1'b1; + end + + */ + + ////////////////////////////////// + // _ ____ ______ _____ // + // | | | _ \ / / ___|_ _| // + // | | | | | |/ /\___ \ | | // + // | |___| |_| / / ___) || | // + // |_____|____/_/ |____/ |_| // + // // + ////////////////////////////////// + + `OPCODE_STORE: begin + alu_op_b_mux_sel_o = `OP_B_IMM; + alu_operator = `ALU_ADD; + data_req = 1'b1; + data_we = 1'b1; + rega_used = 1'b1; + regb_used = 1'b1; + + unique case (instr_rdata_i) inside + `INSTR_SW: data_type_o = 2'b00; + `INSTR_SH: data_type_o = 2'b01; + `INSTR_SB: data_type_o = 2'b10; + default: begin + data_req = 1'b0; + data_we = 1'b0; + rega_used = 1'b0; + regb_used = 1'b0; + end + endcase // unique case (instr_rdata_i) + end + + /* + + // Pre/Post-Increment Stores and Register-Register Stores + `OPCODE_STPOST, `OPCODE_STPRE: begin + alu_operator = `ALU_ADD; // addr is generated in ID stage so no need for addr gen in alu TODO: always use ID stage addr + data_req = 1'b1; + regfile_alu_waddr_mux_sel_o = 2'b00; + rega_used = 1'b1; + regb_used = 1'b1; + data_we = 1'b1; // write to memory + + + if (instr_rdata_i[31:26] == `OPCODE_STPOST) + begin + prepost_useincr_o = 1'b0; // if post increment instruction, don't use the modified address + end + + case (instr_rdata_i[5:4]) + default: begin + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_5N6S; // offset in 11bit immediate + regfile_alu_we = 1'b1; // write new addr value into regfile using portB + end + + 2'b11: begin // register-register store with post increment + regc_used = 1'b1; + alu_op_b_mux_sel_o = `OP_B_REGC_OR_FWD; + regfile_alu_we = 1'b1; // write new addr value into regfile using portB + end + + 2'b01: begin // register-register store without pre/post-increment + alu_op_b_mux_sel_o = `OP_B_REGC_OR_FWD; + regc_used = 1'b1; + end + endcase // case (instr_rdata_i[5:4]) + + // Word, Half Word or Byte store + case (instr_rdata_i[3:2]) + default: data_type_o = 2'b00; + 2'b00: data_type_o = 2'b00; // word + 2'b10: data_type_o = 2'b01; // half word + 2'b11: data_type_o = 2'b10; // byte + endcase // case(instr_rdata_i[4:3] + + // offset inside value to be stored, e.g. l.sh1, l.sb1 and so on + data_reg_offset_o = instr_rdata_i[1:0]; + end + + // Pre/Post-Increment Loads and Register-Register Loads + `OPCODE_LDPOST, `OPCODE_LDPRE: begin + alu_operator = `ALU_ADD; // addr is generated in ID stage so no need for addr gen in alu TODO: always use ID stage addr + data_req = 1'b1; + rega_used = 1'b1; + regfile_wdata_mux_sel_o = 1'b1; // get data from wb + regfile_alu_waddr_mux_sel_o = 2'b00; + regfile_we = 1'b1; // write regfile portA with data coming from mem + + if (instr_rdata_i[31:26] == `OPCODE_LDPOST) + prepost_useincr_o = 1'b0; // if post increment instruction, don't use the modified address + + // Since we also support register-register loads without + // pre/post-increment, we have to distinguish the two cases + // here. If no pre/post is used, we don't write back to + // the second write port of the RF + if (instr_rdata_i[5:4] == 2'b01) // normal case + regfile_alu_we = 1'b0; + else // pre/post case + regfile_alu_we = 1'b1; // write new addr value into regfile using portB + + if (instr_rdata_i[4] == 1'b0) + begin + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_11S; // offset in 11bit immediate + end + else + begin + alu_op_b_mux_sel_o = `OP_B_REGB_OR_FWD; // offset in rB register + regb_used = 1'b1; + end + + // Word, Half Word or Byte load + case (instr_rdata_i[3:2]) + default: data_type_o = 2'b00; + 2'b00: data_type_o = 2'b00; // word + 2'b10: data_type_o = 2'b01; // half word + 2'b11: data_type_o = 2'b10; // byte + endcase // case(instr_rdata_i[4:3] + + // sign extension + data_sign_extension_o = instr_rdata_i[1]; + end + + `OPCODE_LWZ, `OPCODE_LWS: begin // Load Single Word and Extend with Zero/Sign (is equal in 32-bit) + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + alu_operator = `ALU_ADD; + data_req = 1'b1; + regfile_wdata_mux_sel_o = 1'b1; + regfile_we = 1'b1; + rega_used = 1'b1; + end + + `OPCODE_LBZ: begin // Load Byte and Extend with Zero + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + alu_operator = `ALU_ADD; + data_req = 1'b1; + regfile_wdata_mux_sel_o = 1'b1; + regfile_we = 1'b1; + data_type_o = 2'b10; + rega_used = 1'b1; + end + + `OPCODE_LBS: begin // Load Byte and Extend with Sign + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + alu_operator = `ALU_ADD; + data_req = 1'b1; + regfile_wdata_mux_sel_o = 1'b1; + regfile_we = 1'b1; + data_type_o = 2'b10; + data_sign_extension_o = 1'b1; + rega_used = 1'b1; + end + + `OPCODE_LHZ: begin // Load Half Word and Extend with Zero + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + alu_operator = `ALU_ADD; + data_req = 1'b1; + regfile_wdata_mux_sel_o = 1'b1; + regfile_we = 1'b1; + data_type_o = 2'b01; + rega_used = 1'b1; + end + + `OPCODE_LHS: begin // Load Half Word and Extend with Sign + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + alu_operator = `ALU_ADD; + data_req = 1'b1; + regfile_wdata_mux_sel_o = 1'b1; + regfile_we = 1'b1; + data_type_o = 2'b01; + data_sign_extension_o = 1'b1; + rega_used = 1'b1; + end + + */ + + ////////////////////////// + // _ _ _ _ // + // / \ | | | | | | // + // / _ \ | | | | | | // + // / ___ \| |__| |_| | // + // /_/ \_\_____\___/ // + // // + ////////////////////////// + + //`INSTR_LUI: begin // Load Upper Immediate + `OPCODE_LUI: begin // Load Upper Immediate + alu_op_a_mux_sel_o = `OP_A_ZERO; + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_U; + alu_operator = `ALU_ADD; + regfile_alu_we = 1'b1; + end + + //`INSTR_AUIPC: begin // Add Upper Immediate to PC + `OPCODE_AUIPC: begin // Add Upper Immediate to PC + alu_pc_mux_sel_o = 1'b1; + alu_op_a_mux_sel_o = `OP_A_CURRPC; + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_U; + alu_operator = `ALU_ADD; + regfile_alu_we = 1'b1; + end + + `OPCODE_OPIMM: begin // Reigster-Immediate ALU Operations + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_I; + regfile_alu_we = 1'b1; + rega_used = 1'b1; + + unique case (instr_rdata_i) inside + `INSTR_ADDI: alu_operator = `ALU_ADD; // Add Immediate + `INSTR_SLTI: alu_operator = `ALU_SLTS; // Set to one if Lower Than Immediate + `INSTR_SLTIU: alu_operator = `ALU_SLTU; // Set to one if Lower Than Immediate Unsigned + `INSTR_XORI: alu_operator = `ALU_XOR; // Exclusive Or with Immediate + `INSTR_ORI: alu_operator = `ALU_OR; // Or with Immediate + `INSTR_ANDI: alu_operator = `ALU_AND; // And with Immediate + `INSTR_SLLI: alu_operator = `ALU_SLL; // Shift Left Logical by Immediate + `INSTR_SRLI: alu_operator = `ALU_SRL; // Shift Right Logical by Immediate + `INSTR_SRAI: alu_operator = `ALU_SRA; // Shift Right Arithmetically by Immediate + endcase // unique case (instr_rdata_i) + end // case: `OPCODE_OPIMM + + /* + `OPCODE_ADDIC: begin // Add Immediate and Carry + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + alu_operator = `ALU_ADDC; + regfile_alu_we = 1'b1; + set_overflow = 1'b1; + set_carry = 1'b1; + rega_used = 1'b1; + end + + `OPCODE_SHIFT: begin // Shift-Instructions + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + alu_operator = {4'b0010, instr_rdata_i[7:6]}; // 00 = SLL, 01 = SRL, 10 = SRA, 11 = ROR + regfile_alu_we = 1'b1; + rega_used = 1'b1; + end + */ + + `OPCODE_OP: begin // ALU register-register operation + regfile_alu_we = 1'b1; + rega_used = 1'b1; + regb_used = 1'b1; + + unique case (instr_rdata_i) inside + `INSTR_ADD: alu_operator = `ALU_ADD; // Add + `INSTR_SUB: alu_operator = `ALU_SUB; // Sub + `INSTR_SLL: alu_operator = `ALU_SLL; // Shift Left Logical + `INSTR_SLT: alu_operator = `ALU_SLTS; // Set Lower Than + `INSTR_SLTU: alu_operator = `ALU_SLTU; // Set Lower Than Unsigned + `INSTR_XOR: alu_operator = `ALU_XOR; // Xor + `INSTR_SRL: alu_operator = `ALU_SRL; // Shift Right Logical + `INSTR_SRA: alu_operator = `ALU_SRA; // Shift Right Arithmetic + `INSTR_OR: alu_operator = `ALU_OR; // Or + `INSTR_AND: alu_operator = `ALU_AND; // And + endcase // unique case (instr_rdata_i) + end + + + /* + + `OPCODE_MOVHI: + begin + if (instr_rdata_i[16] == 1'b0) + begin // Move Immediate High + extend_immediate_o = 1'b1; + alu_op_a_mux_sel_o = `OP_A_IMM16; + alu_operator = `ALU_MOVHI; + regfile_alu_we = 1'b1; + end + else + begin + // synopsys translate_off + $display("%t: Illegal l.movhi received.", $time); + // synopsys translate_on + illegal_insn_o = 1'b1; + end + end + + `OPCODE_MULI: begin // Multiply Immediate Signed + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + mult_is_running = 1'b1; + + regfile_alu_we = 1'b1; + regfile_alu_waddr_mux_sel_o = 2'b01; + rega_used = 1'b1; + end + + `OPCODE_ALU: begin // Arithmetic Operation + rega_used = 1'b1; + regb_used = 1'b1; + + case (instr_rdata_i[9:8]) + 2'b00: begin // ALU Operation + regfile_alu_we = 1'b1; + + casex (instr_rdata_i[3:0]) + 4'b0XXX: begin // Standard Operation + alu_operator = {3'b000, instr_rdata_i[2:0]}; + + if ((instr_rdata_i[2:0] ==? 3'b00X) || (instr_rdata_i[2:0] == 3'b010)) begin // l.add, l.addc & l.sub + set_overflow = 1'b1; + set_carry = 1'b1; + end + end + 4'b1000: begin // Shift Operation + alu_operator = {4'b0010, instr_rdata_i[7:6]}; + end + 4'b110X: begin // l.ext{b,h,w}{s,z} + alu_operator = {3'b010, instr_rdata_i[7:6], instr_rdata_i[0]}; + regb_used = 1'b0; // register b is not used + end + 4'b1110: begin // l.cmov + alu_operator = `ALU_CMOV; + end + 4'b1111: begin // l.ff1 + alu_operator = `ALU_FF1; + end + default: begin + // synopsys translate_off + $display("%t: Illegal ALU instruction received.", $time); + // synopsys translate_on + regfile_alu_we = 1'b0; // disable Write Enable for illegal instruction + illegal_insn_o = 1'b1; + end + endcase // casex (instr_rdata_i[3:2]) + end + + 2'b01: begin // l.fl1, l.clb, l.cnt + regfile_alu_we = 1'b1; + regb_used = 1'b0; + + case (instr_rdata_i[3:0]) + 4'b1101: alu_operator = `ALU_CNT; + 4'b1110: alu_operator = `ALU_CLB; + 4'b1111: alu_operator = `ALU_FL1; + + default: begin + // synopsys translate_off + $display("%t: Illegal ALU instruction received.", $time); + // synopsys translate_on + regfile_alu_we = 1'b0; // disable Write Enable for illegal instruction + illegal_insn_o = 1'b1; + end + endcase //~case(instr_rdata_i[3:0]) + end + + 2'b10: begin // Min, Max, Abs, Avg + regfile_alu_we = 1'b1; + + case (instr_rdata_i[3:0]) + 4'b0000: alu_operator = `ALU_MIN; + 4'b0001: alu_operator = `ALU_MINU; + 4'b0010: alu_operator = `ALU_MAX; + 4'b0011: alu_operator = `ALU_MAXU; + 4'b0100: alu_operator = `ALU_AVG; + 4'b0101: alu_operator = `ALU_AVGU; + + 4'b1000: begin + regb_used = 1'b0; + alu_operator = `ALU_ABS; + end + + default: begin + // synopsys translate_off + $display("%t: Illegal ALU instruction received.", $time); + // synopsys translate_on + regfile_alu_we = 1'b0; // disable Write Enable for illegal instruction + illegal_insn_o = 1'b1; + end + endcase //~case(instr_rdata_i[3:0]) + end + + 2'b11: begin // Multiplication + if ((instr_rdata_i[3:0] == 4'b0110) || (instr_rdata_i[3:0] == 4'b1011)) + begin // Is multiplication and no division + mult_is_running = 1'b1; + + if ((instr_rdata_i[3:0] == 4'b0110) || (instr_rdata_i[3:0] == 4'b1011)) // l.mul & l.mulu + begin + regfile_alu_we = 1'b1; + regfile_alu_waddr_mux_sel_o = 2'b01; + end + end + else + begin + // synopsys translate_off + $display("%t: Division instruction received, this is not supported.", $time); + // synopsys translate_on + illegal_insn_o = 1'b1; + end + end + endcase; // case (instr_rdata_i[9:8]) + end + + `OPCODE_MAC: begin // MAC instruction + mult_is_running = 1'b1; + + rega_used = 1'b1; + regb_used = 1'b1; + + regfile_alu_waddr_mux_sel_o = 2'b01; + regfile_alu_we = 1'b1; + + case (instr_rdata_i[6:5]) + 2'b00: begin // MAC + case (instr_rdata_i[3:0]) + 4'b1000: begin // l.mac + mult_mac_en_o = 1'b1; + regc_used = 1'b1; + set_carry = 1'b1; + set_overflow = 1'b1; + end + + 4'b1001: begin // l.mac.c + mult_use_carry_o = 1'b1; + mult_mac_en_o = 1'b1; + regc_used = 1'b1; + set_carry = 1'b1; + set_overflow = 1'b1; + end + + default: begin + // synopsys translate_off + $display("%t: Illegal MAC instruction received.", $time); + // synopsys translate_on + regfile_alu_we = 1'b0; + illegal_insn_o = 1'b1; + end + endcase // case (instr_rdata_i[3:0]) + end + + 2'b01: begin // MAC with subword selection + vector_mode_o = `VEC_MODE216; + mult_mac_en_o = 1'b1; + regc_used = 1'b1; + mult_sel_subword_o = instr_rdata_i[2:1]; + mult_signed_mode_o = instr_rdata_i[4:3]; + mult_use_carry_o = instr_rdata_i[0]; + set_carry = 1'b1; + set_overflow = 1'b1; + end + + 2'b11: begin // mult with subword selection + vector_mode_o = `VEC_MODE216; + mult_sel_subword_o = instr_rdata_i[2:1]; + mult_signed_mode_o = instr_rdata_i[4:3]; + end + + default: begin + // synopsys translate_off + $display("%t: Illegal MAC instruction received.", $time); + // synopsys translate_on + regfile_alu_we = 1'b0; + illegal_insn_o = 1'b1; + end + endcase + end + + `OPCODE_SFI: begin // Set Flag Immediate-Instructions + if (instr_rdata_i[25] == 1'b0) begin + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + alu_operator = {2'b10, instr_rdata_i[24:21]}; + set_flag = 1'b1; + rega_used = 1'b1; + end + else begin + // synopsys translate_off + $display("%t: Illegal Set Flag Immediate instruction received.", $time); + // synopsys translate_on + illegal_insn_o = 1'b1; + end + end + + `OPCODE_SF: begin // Set Flag Instruction + if (instr_rdata_i[25] == 1'b0) begin + alu_operator = {2'b10, instr_rdata_i[24:21]}; + set_flag = 1'b1; + rega_used = 1'b1; + regb_used = 1'b1; + end + else begin + // synopsys translate_off + $display("%t: Illegal Set Flag instruction received.", $time); + // synopsys translate_on + illegal_insn_o = 1'b1; + end + end + + `OPCODE_VEC: begin // vectorial alu operations + rega_used = 1'b1; + regfile_alu_we = 1'b1; + + if (instr_rdata_i[0] == 1'b0) // choose vector size + vector_mode_o = `VEC_MODE16; + else + vector_mode_o = `VEC_MODE8; + + if ((instr_rdata_i[7:6] == 2'b01) || (instr_rdata_i[7:6] == 2'b10)) // replicate scalar 2 or 4 times + scalar_replication_o = 1'b1; + + if (instr_rdata_i[7:6] == 2'b10) // use immediate as operand b + begin + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_VEC; + end + else + regb_used = 1'b1; + + // now decode the sub opcodes + case (instr_rdata_i[5:1]) + 5'b00000: alu_operator = `ALU_ADD; + 5'b00001: alu_operator = `ALU_SUB; + 5'b00010: alu_operator = `ALU_AVG; + 5'b00011: alu_operator = `ALU_MIN; + 5'b00100: alu_operator = `ALU_MAX; + 5'b00101: alu_operator = `ALU_SRL; + 5'b00110: alu_operator = `ALU_SRA; + 5'b00111: alu_operator = `ALU_SLL; + + 5'b01000: begin // lv32.mul + regfile_alu_waddr_mux_sel_o = 2'b01; + mult_is_running = 1'b1; + end + + 5'b01001: alu_operator = `ALU_OR; + 5'b01010: alu_operator = `ALU_XOR; + 5'b01011: alu_operator = `ALU_AND; + + 5'b01100: begin // lv32.ins + alu_operator = `ALU_INS; + scalar_replication_o = 1'b1; + end + + 5'b10000: begin // lv32.abs + regb_used = 1'b0; // abs does not use operand b + alu_operator = `ALU_ABS; + end + + 5'b10001: begin // lv32.ext + regb_used = 1'b0; + alu_operator = `ALU_EXT; + end + + default: begin // unknown instruction encountered + regfile_alu_we = 1'b0; + illegal_insn_o = 1'b1; + // synopsys translate_off + $display("%t: Unknown vector opcode 0x%h.", $time, instr_rdata_i[5:1]); + // synopsys translate_on + end + endcase // instr_rdata[5:1] + end + + `OPCODE_VCMP: begin // Vectorial comparisons, i.e. lv32.cmp_*, lv32.all_*, lv32.any_* + rega_used = 1'b1; + regfile_alu_we = 1'b1; + + if (instr_rdata_i[0] == 1'b0) // choose vector size + vector_mode_o = `VEC_MODE16; + else + vector_mode_o = `VEC_MODE8; + + if ((instr_rdata_i[7:6] == 2'b01) || (instr_rdata_i[7:6] == 2'b10)) // replicate scalar 2 or 4 times + scalar_replication_o = 1'b1; + + if (instr_rdata_i[7:6] == 2'b10) // use immediate as operand b + begin + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_VEC; + end + else + regb_used = 1'b1; + + // now decode the sub opcodes for the ALU + case (instr_rdata_i[3:1]) + 3'b000: alu_operator = `ALU_EQ; + 3'b001: alu_operator = `ALU_NE; + 3'b010: alu_operator = `ALU_GTS; + 3'b011: alu_operator = `ALU_GES; + 3'b100: alu_operator = `ALU_LTS; + 3'b101: alu_operator = `ALU_LES; + + default: begin // unknown instruction encountered + illegal_insn_o = 1'b1; + // synopsys translate_off + $display("%t: Unknown vector opcode 0x%h.", $time, instr_rdata_i[5:1]); + // synopsys translate_on + end + endcase //~case(instr_rdata_i[3:1]) + + alu_cmp_mode_o = instr_rdata_i[5:4]; // which kind of comparison do we have here, i.e. full, any, all + + if((instr_rdata_i[5:4] == `ALU_CMP_ANY) || (instr_rdata_i[5:4] == `ALU_CMP_ALL)) + set_flag = 1'b1; // set the flag for lv32.all_* and lv32.any_* + end + + //////////////////////////////////////////////// + // ____ ____ _____ ____ ___ _ _ // + // / ___|| _ \| ____/ ___|_ _| / \ | | // + // \___ \| |_) | _|| | | | / _ \ | | // + // ___) | __/| |__| |___ | | / ___ \| |___ // + // |____/|_| |_____\____|___/_/ \_\_____| // + // // + //////////////////////////////////////////////// + + `OPCODE_MTSPR: begin // Move To Special-Purpose Register + alu_operator = `ALU_OR; + alu_op_b_mux_sel_o = `OP_B_IMM; + sp_we_o = 1'b1; + rega_used = 1'b1; + regb_used = 1'b1; + end + + `OPCODE_MFSPR: begin // Move From Special-Purpose Register + alu_operator = `ALU_OR; + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_16; + regfile_wdata_mux_sel_o = 1'b0; + regfile_we = 1'b1; + rega_used = 1'b1; + end + + `OPCODE_SYNC: + begin + if (instr_rdata_i[25] == 1'b1) // sync operation + begin // l.psync, flush pipeline. Actually this also does l.msync + pipe_flush_o = 1'b1; + end + else + begin + if (instr_rdata_i[24:16] == 9'h100) + begin + // l.trap + // Currently we ignore the immediate + trap_insn_o = 1'b1; + end + else + illegal_insn_o = 1'b1; + end + end + + /////////////////////////////////////////////// + // _ ___ ___ ___ ___ ____ // + // | | | \ \ / / | / _ \ / _ \| _ \ // + // | |_| |\ \ /\ / /| | | | | | | | | |_) | // + // | _ | \ V V / | |__| |_| | |_| | __/ // + // |_| |_| \_/\_/ |_____\___/ \___/|_| // + /////////////////////////////////////////////// + + `OPCODE_HWLOOP: begin // hwloop instructions + + hwloop_regid_o = instr_rdata_i[22:21]; // set hwloop register id + + case (instr_rdata_i[25:23]) + 3'b000,3'b110,3'b111: begin // lp.start set start address + hwloop_wb_mux_sel_o = 1'b1; + hwloop_we_o[0] = 1'b1; // set we for start addr reg + alu_op_a_mux_sel_o = `OP_A_CURRPC; + alu_op_b_mux_sel_o = `OP_B_IMM; + alu_operator = `ALU_ADD; + alu_pc_mux_sel_o = 1'b1; + immediate_mux_sel_o = `IMM_21S; + // $display("%t: hwloop start address: %h", $time, instr_rdata_i); + end + 3'b001: begin // lp.end set end address + hwloop_wb_mux_sel_o = 1'b1; + hwloop_we_o[1] = 1'b1; // set we for end addr reg + alu_op_a_mux_sel_o = `OP_A_CURRPC; + alu_op_b_mux_sel_o = `OP_B_IMM; + alu_operator = `ALU_ADD; + alu_pc_mux_sel_o = 1'b1; + immediate_mux_sel_o = `IMM_21S; + // $display("%t: hwloop end address: %h", $time, instr_rdata_i); + end + 3'b010: begin // lp.counti initialize counter from immediate + hwloop_cnt_mux_sel_o = 2'b01; + hwloop_we_o[2] = 1'b1; // set we for counter reg + // $display("%t: hwloop counter imm: %h", $time, instr_rdata_i); + end + 3'b011: begin // lp.count initialize counter from register + hwloop_cnt_mux_sel_o = 2'b11; + hwloop_we_o[2] = 1'b1; // set we for counter reg + rega_used = 1'b1; + // $display("%t: hwloop counter: %h", $time, instr_rdata_i); + end + 3'b100: begin // lp.setupi + hwloop_wb_mux_sel_o = 1'b0; + hwloop_cnt_mux_sel_o = 2'b10; + hwloop_we_o = 3'b111; // set we for counter/start/end reg + alu_op_a_mux_sel_o = `OP_A_CURRPC; + alu_op_b_mux_sel_o = `OP_B_IMM; + alu_operator = `ALU_ADD; + alu_pc_mux_sel_o = 1'b1; + immediate_mux_sel_o = `IMM_8Z; + // $display("%t: hwloop setup imm: %h", $time, instr_rdata_i); + end + 3'b101: begin // lp.setup + hwloop_wb_mux_sel_o = 1'b0; + hwloop_cnt_mux_sel_o = 2'b11; + hwloop_we_o = 3'b111; // set we for counter/start/end reg + alu_op_a_mux_sel_o = `OP_A_CURRPC; + alu_op_b_mux_sel_o = `OP_B_IMM; + alu_operator = `ALU_ADD; + alu_pc_mux_sel_o = 1'b1; + immediate_mux_sel_o = `IMM_16Z; + rega_used = 1'b1; + // $display("%t: hwloop setup: %h", $time, instr_rdata_i); + end + endcase + end + + + */ + + + default: begin + // synopsys translate_off + $display("%t: Unknown Instruction 0x%h.", $time, instr_rdata_i[31:0]); + // synopsys translate_on + illegal_insn_o = 1'b1; + // TODO: Check me / replace with exception + pc_mux_sel_o = `NO_INCR; + end + endcase; // case (instr_rdata_i[31:26]) + + // misaligned access was detected by the LSU + if (data_misaligned_i == 1'b1) + begin + // only part of the pipeline is unstalled, make sure that the + // correct operands are sent to the AGU + alu_op_a_mux_sel_o = `OP_A_REGA_OR_FWD; + alu_op_b_mux_sel_o = `OP_B_IMM; + immediate_mux_sel_o = `IMM_I; // TODO: FIXME + + // if prepost increments are used, we do not write back the + // second address since the first calculated address was + // the correct one + regfile_alu_we = 1'b0; + + // if post increments are used, we must make sure that for + // the second memory access we do use the adder + prepost_useincr_o = 1'b1; + end + + if ( set_npc == 1'b1 ) + pc_mux_sel_o = `NO_INCR; + + // hwloop detected, jump to start address! + if (hwloop_jump_i == 1'b1) + pc_mux_sel_o = `HWLOOP_ADDR; + +`ifdef BRANCH_PREDICTION + if (wrong_branch_taken) + pc_mux_sel_o = `PC_BRANCH_PRED; +`endif + // the pipeline is flushed and we are requested to go to sleep + if ((pipe_flushed_i == 1'b1) && (fetch_enable_i == 1'b0)) + ctrl_fsm_ns = IDLE; + end + endcase + end + + assign core_busy_o = (ctrl_fsm_cs != IDLE); + + //////////////////////////////////////////////////////////////////////////////////////////// + // Generate Stall Signals! // + //////////////////////////////////////////////////////////////////////////////////////////// + always_comb + begin + mfspr_stall = 1'b0; + mtspr_stall = 1'b0; + load_stall = 1'b0; + jr_stall = 1'b0; + deassert_we = 1'b0; + + + /* + // Stall because of l.mfspr with dependency + if ((regfile_wdata_mux_sel_ex_i == 1'b0) && (regfile_we_ex_i == 1'b1) && + ((reg_d_ex_is_reg_a_id == 1'b1) || (reg_d_ex_is_reg_b_id == 1'b1) || (reg_d_ex_is_reg_c_id == 1'b1)) ) + begin + deassert_we = 1'b1; + mfspr_stall = 1'b1; + end + + // Stall because of l.mtspr (always...) + // mtspr in ex stage, normal instruction in id stage which can change an spr reg + if ((sp_we_ex_i == 1'b1) && (instr_rdata_i[31:26] != `OPCODE_MTSPR)) + begin + deassert_we = 1'b1; + mtspr_stall = 1'b1; + end + + // Stall because of load operation + if ((data_req_ex_i == 1'b1) && (regfile_we_ex_i == 1'b1) && + ((reg_d_ex_is_reg_a_id == 1'b1) || (reg_d_ex_is_reg_b_id == 1'b1) || (reg_d_ex_is_reg_c_id == 1'b1)) ) + begin + deassert_we = 1'b1; + load_stall = 1'b1; + end + + // Stall because of jr path + // - Load results cannot directly be forwarded to PC + // - Multiplication results cannot be forwarded to PC + if (((instr_rdata_i[31:26] == `OPCODE_JALR) || (instr_rdata_i[31:26] == `OPCODE_JR)) && + (((regfile_we_wb_i == 1'b1) && (reg_d_wb_is_reg_b_id == 1'b1) && (data_rvalid_i == 1'b1)) || + ((regfile_we_ex_i == 1'b1) && (reg_d_ex_is_reg_b_id == 1'b1)) || + ((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_b_id == 1'b1) && (mult_is_running_ex_i == 1'b1))) ) + begin + jr_stall = 1'b1; + deassert_we = 1'b1; + end + +`ifdef BRANCH_PREDICTION + // Stall because of set_flag path + if (wrong_branch_taken) + begin + deassert_we = 1'b1; + end +`endif + + */ + + end + +`ifdef BRANCH_PREDICTION + assign drop_instruction_o = wrong_branch_taken; +`else + assign drop_instruction_o = 1'b0; +`endif + + // Stall because of IF miss + assign instr_ack_stall = ~instr_ack_i; + + // Stall if TCDM contention has been detected + assign lsu_stall = ~data_ack_i; + + assign misalign_stall = data_misaligned_i; + + // deassert we signals (in case of stalls) + assign alu_operator_o = (deassert_we) ? `ALU_NOP : alu_operator; + assign mult_is_running_o = (deassert_we) ? 1'b0 : mult_is_running; + assign regfile_we_o = (deassert_we) ? 1'b0 : regfile_we; + assign regfile_alu_we_o = (deassert_we) ? 1'b0 : regfile_alu_we; + assign data_we_o = (deassert_we) ? 1'b0 : data_we; + assign data_req_o = (deassert_we) ? 1'b0 : data_req; + assign set_flag_o = (deassert_we) ? 1'b0 : set_flag; + assign set_overflow_o = (deassert_we) ? 1'b0 : set_overflow; + assign set_carry_o = (deassert_we) ? 1'b0 : set_carry; + + + //////////////////////////////////////////////////////////////////////////////////////////// + // Freeze Unit. This unit controls the pipeline stages // + //////////////////////////////////////////////////////////////////////////////////////////// + always_comb + begin + // we unstall the if_stage if the debug unit wants to set a new + // pc, so that the new value gets written into current_pc_if and is + // used by the instr_core_interface + stall_if_o = (instr_ack_stall | mfspr_stall | mtspr_stall | load_stall | jr_stall | lsu_stall | misalign_stall | dbg_stall_i | (~pc_valid_i)); + stall_id_o = instr_ack_stall | mfspr_stall | mtspr_stall | load_stall | jr_stall | lsu_stall | misalign_stall | dbg_stall_i; + stall_ex_o = instr_ack_stall | lsu_stall | dbg_stall_i; + stall_wb_o = lsu_stall | dbg_stall_i; + end + + + //////////////////////////////////////////////////////////////////////////////////////////// + // Forwarding control unit. (Forwarding from wb and ex stage to id stage) // + // RiscV register encoding: rs1 is [19:15], rs2 is [24:20], rd is [11:7] // + // Or10n register encoding: ra is [20:16], rb is [15:11], rd is [25:21] // + //////////////////////////////////////////////////////////////////////////////////////////// + assign reg_d_ex_is_reg_a_id = (regfile_waddr_ex_i == instr_rdata_i[19:15]) && (rega_used == 1'b1); + assign reg_d_ex_is_reg_b_id = (regfile_waddr_ex_i == instr_rdata_i[24:20]) && (regb_used == 1'b1); + assign reg_d_ex_is_reg_c_id = (regfile_waddr_ex_i == instr_rdata_i[11:7]) && (regc_used == 1'b1); + assign reg_d_wb_is_reg_a_id = (regfile_waddr_wb_i == instr_rdata_i[19:15]) && (rega_used == 1'b1); + assign reg_d_wb_is_reg_b_id = (regfile_waddr_wb_i == instr_rdata_i[24:20]) && (regb_used == 1'b1); + assign reg_d_wb_is_reg_c_id = (regfile_waddr_wb_i == instr_rdata_i[11:7]) && (regc_used == 1'b1); + assign reg_d_alu_is_reg_a_id = (regfile_alu_waddr_fw_i == instr_rdata_i[19:15]) && (rega_used == 1'b1); + assign reg_d_alu_is_reg_b_id = (regfile_alu_waddr_fw_i == instr_rdata_i[24:20]) && (regb_used == 1'b1); + //assign reg_d_alu_is_reg_c_id = (regfile_alu_waddr_fw_i == instr_rdata_i[11:7]) && (regc_used == 1'b1); + + always_comb + begin + // default assignements + operand_a_fw_mux_sel_o = `SEL_REGFILE; + operand_b_fw_mux_sel_o = `SEL_REGFILE; + operand_c_fw_mux_sel_o = `SEL_REGFILE; + + // Forwarding WB -> ID + if (regfile_we_wb_i == 1'b1) + begin + if (reg_d_wb_is_reg_a_id == 1'b1) + operand_a_fw_mux_sel_o = `SEL_FW_WB; + if (reg_d_wb_is_reg_b_id == 1'b1) + operand_b_fw_mux_sel_o = `SEL_FW_WB; + if (reg_d_wb_is_reg_c_id == 1'b1) + operand_c_fw_mux_sel_o = `SEL_FW_WB; + end + + // Forwarding EX -> ID + if (regfile_alu_we_fw_i == 1'b1) + begin + if (reg_d_alu_is_reg_a_id == 1'b1) + operand_a_fw_mux_sel_o = `SEL_FW_EX; + if (reg_d_alu_is_reg_b_id == 1'b1) + operand_b_fw_mux_sel_o = `SEL_FW_EX; + if (reg_d_alu_is_reg_c_id == 1'b1) + operand_c_fw_mux_sel_o = `SEL_FW_EX; + end + + if (data_misaligned_i == 1'b1) + begin + operand_a_fw_mux_sel_o = `SEL_FW_EX; + operand_b_fw_mux_sel_o = `SEL_REGFILE; + end + end + + // check if jump or branch in pipeline + /* + assign jump_in_id_o = ((instr_rdata_i[31:26] == `OPCODE_BF) || (instr_rdata_i[31:26] == `OPCODE_BNF) || + (instr_rdata_i[31:26] == `OPCODE_J) || (instr_rdata_i[31:26] == `OPCODE_JR) || + (instr_rdata_i[31:26] == `OPCODE_JAL) || (instr_rdata_i[31:26] == `OPCODE_JALR) || + (instr_rdata_i[31:26] == `OPCODE_RFE) ); + */ + assign jump_in_id_o = 1'b0; // TODO: FIXME + + // update registers + always_ff @(posedge clk , negedge rst_n) + begin : UPDATE_REGS + if ( rst_n == 1'b0 ) + begin + ctrl_fsm_cs <= RESET; + end + else + begin + ctrl_fsm_cs <= ctrl_fsm_ns; + end + end + + // hold NPC until IF stage has taken over this value + always_ff @(posedge clk , negedge rst_n) + begin : HOLD_NPC + if ( rst_n == 1'b0 ) + begin + set_npc <= 1'b0; + end + else + begin + if (dbg_set_npc_i == 1'b1) + set_npc <= 1'b1; + else if (stall_if_o == 1'b0) + set_npc <= 1'b0; + end + end + +`ifdef BRANCH_PREDICTION + // Wrong branch was taken! + always_ff @(posedge clk , negedge rst_n) + begin : WRONG_BRANCH + if ( rst_n == 1'b0 ) + begin + wrong_branch_taken <= 1'b0; + end + else + begin + if (stall_if_o == 1'b0) + wrong_branch_taken <= wrong_branch_taken_o; + end + end +`endif + +endmodule // controller diff --git a/debug_unit.sv b/debug_unit.sv new file mode 100644 index 00000000..12423e3b --- /dev/null +++ b/debug_unit.sv @@ -0,0 +1,221 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineer: Florian Glaser - glaserf@ethz.ch // +// // +// Additional contributions by: // +// Andreas Traber - atraber@student.ethz.ch // +// // +// // +// Create Date: 11/07/2014 // +// Design Name: Pipelined OpenRISC Processor // +// Module Name: debug_unit.sv // +// Project Name: OR10N // +// Language: SystemVerilog // +// // +// Description: Debug Controller for the pipelined processor // +// // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (December 1, 2014) Merge with current OR10N core, // +// changed port and signal names // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + + +`include "defines.sv" + +module debug_unit +( + input logic clk, + input logic rst_n, + + // signals to Debug Interface + input logic dbginf_stall_i, + output logic dbginf_bp_o, + input logic dbginf_strobe_i, + output logic dbginf_ack_o, + input logic dbginf_we_i, + input logic [15:0] dbginf_addr_i, + input logic [31:0] dbginf_data_i, + output logic [31:0] dbginf_data_o, + + // signals to core + output logic dbg_st_en_o, // Single-step trace mode enabled + output logic [1:0] dbg_dsr_o, // debug stop register + + output logic stall_core_o, + output logic flush_pipe_o, + input logic pipe_flushed_i, + input logic trap_i, + + output logic sp_mux_o, + output logic regfile_mux_o, + output logic regfile_we_o, + output logic [15:0] regfile_addr_o, + output logic [31:0] regfile_wdata_o, + input logic [31:0] regfile_rdata_i + + ); + + // registers for debug control + logic [1:0] DSR_DP, DSR_DN; // Debug Stop Register: IIE, INTE + logic [1:0] DMR1_DP, DMR1_DN; // only single step trace and branch trace bits + logic [2*`N_WP-1:0] DMR2_DP, DMR2_DN; // only BP enable control and BP cause status + + // watchpoint status + logic [`N_WP-1:0] WP_Status_D; + + // BP control FSM + enum logic [2:0] {Idle, Trap, DebugStall, StallCore} BP_State_SN, BP_State_SP; + + // ack to debug interface + assign dbginf_ack_o = (dbginf_strobe_i && (BP_State_SP == StallCore)); + + always_comb + begin + BP_State_SN = BP_State_SP; + stall_core_o = 1'b0; + dbginf_bp_o = 1'b0; + flush_pipe_o = 1'b0; + case (BP_State_SP) + Idle: + begin + if(trap_i == 1'b1) + BP_State_SN = Trap; + + if(dbginf_stall_i) + begin + flush_pipe_o = 1'b1; + BP_State_SN = DebugStall; + end + end + + // A trap was encountered, wait for for the pipeline to be + // flushed + Trap: + begin + if(pipe_flushed_i == 1'b1) + begin + dbginf_bp_o = 1'b1; + BP_State_SN = StallCore; + end + end + + // A stall from adv dbg was seen, flush the pipeline and wait for unstalling + DebugStall: + begin + flush_pipe_o = 1'b1; + + if(pipe_flushed_i == 1'b1) + begin + BP_State_SN = StallCore; + end + end + + StallCore: + begin + stall_core_o = 1'b1; + + if(~dbginf_stall_i) + BP_State_SN = Idle; + end + + default: BP_State_SN = Idle; + endcase // case (BP_State_SP) + end + + + // data to GPRs and SPRs + assign regfile_wdata_o = dbginf_data_i; + + assign dbg_st_en_o = DMR1_DP[0]; + assign dbg_dsr_o = DSR_DP; + + // address decoding, write and read controller + always_comb + begin + DMR1_DN = DMR1_DP; + DMR2_DN[`N_WP-1:0] = DMR2_DP[`N_WP-1:0]; + DSR_DN = DSR_DP; + dbginf_data_o = 32'b0; + regfile_we_o = 1'b0; + regfile_addr_o = 16'b0; + regfile_mux_o = 1'b0; + sp_mux_o = 1'b0; + + if(dbginf_strobe_i == 1'b1) begin + // address decoding, first stage: evaluate higher 5 Bits to detect if debug regs are accessed + if(dbginf_addr_i[15:11] == 5'b00110) begin + // second stage: evaluate Bits 10:0 to detect which part of debug registers is accessed + casex(dbginf_addr_i[10:0]) + 11'd16: begin // SP_DMR1 + if(dbginf_we_i == 1'b1) + DMR1_DN = dbginf_data_i[`DMR1_ST+1:`DMR1_ST]; + else + dbginf_data_o[`DMR1_ST+1:`DMR1_ST] = DMR1_DP; + end + 11'd17: begin // SP_DMR2 + if(dbginf_we_i == 1'b1) + DMR2_DN[`N_WP-1:0] = dbginf_data_i[`DMR2_WGB0 + (`N_WP-1):`DMR2_WGB0]; + else + dbginf_data_o[`DMR2_WGB0 + (`N_WP-1):`DMR2_WGB0] = DMR2_DP[`N_WP-1:0]; + dbginf_data_o[`DMR2_WBS0 + (`N_WP-1):`DMR2_WBS0] = DMR2_DP[2*`N_WP-1:`N_WP]; + end + 11'd20: begin // SP_DSR + // currently we only handle IIE and INTE + if(dbginf_we_i == 1'b1) + DSR_DN = dbginf_data_i[7:6]; + else + dbginf_data_o[7:6] = DSR_DP[1:0]; + end + default: ; + endcase // casex [10:0] + end + // check if GPRs are accessed + else if(dbginf_addr_i[15:10] == 6'b000001) + begin + regfile_mux_o = 1'b1; + regfile_addr_o[4:0] = dbginf_addr_i[4:0]; + + if(dbginf_we_i == 1'b1) + regfile_we_o = 1'b1; + else + dbginf_data_o = regfile_rdata_i; + end + // some other SPR is accessed + else + begin + sp_mux_o = 1'b1; + regfile_addr_o = dbginf_addr_i; + + if(dbginf_we_i == 1'b1) + regfile_we_o = 1'b1; + else + dbginf_data_o = regfile_rdata_i; + end + end + end + + // normal FF setup + always_ff@(posedge clk or negedge rst_n) begin + if (~rst_n) begin + DMR1_DP <= 2'b0; + DMR2_DP <= 'b0; + DSR_DP <= 'b0; + BP_State_SP <= Idle; + end + else begin + DMR1_DP <= DMR1_DN; + DMR2_DP <= DMR2_DN; + DSR_DP <= DSR_DN; + BP_State_SP <= BP_State_SN; + end + end // always_ff@ (posedge clk or negedge rst_n) + +endmodule // debug_unit diff --git a/ex_stage.sv b/ex_stage.sv new file mode 100644 index 00000000..0acb0361 --- /dev/null +++ b/ex_stage.sv @@ -0,0 +1,270 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// DEI @ UNIBO - University of Bologna // +// // +// Engineer: Renzo Andri - andrire@student.ethz.ch // +// // +// Additional contributions by: // +// Igor Loi - igor.loi@unibo.it // +// // +// // +// Create Date: 01/07/2014 // +// Design Name: Execute stage // +// Module Name: ex_stage.sv // +// Project Name: OR10N // +// Language: SystemVerilog // +// // +// Description: Execution stage: Host Alu and Multiplier // +// ALU: computes additions/subtractions/comparisons // +// (in a pure combinational way) // +// Multiplier: // +// 32bit multiplication: takes two cycles to complete. The // +// Result goes to the register file (only the 32 lsb) // +// 64bit multiplication(l.muld): takes two cycles to complete // +// Result goes to sp register maclo(32lsb) and machi(32msb) // +// // +// // +// Revision: // +// // +// Revision v0.1 - File Created // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// +// int = internal signals +// wb = writeback +// sp = special registers + + + +`include "defines.sv" + +module ex_stage +( + input logic clk, + input logic rst_n, + + // ALU signals from ID stage + input logic [`ALU_OP_WIDTH-1:0] alu_operator_i, + input logic [31:0] alu_operand_a_i, + input logic [31:0] alu_operand_b_i, + input logic [31:0] alu_operand_c_i, + input logic alu_carry_i, + input logic alu_flag_i, + + input logic [1:0] vector_mode_i, + input logic [1:0] alu_cmp_mode_i, + input logic [1:0] alu_vec_ext_i, + + // Multiplier signals + input logic mult_is_running_i, + input logic [1:0] mult_sel_subword_i, + input logic [1:0] mult_signed_mode_i, + input logic mult_use_carry_i, + input logic mult_mac_en_i, + + output logic [31:0] data_addr_ex_o, + + // input from ID stage + input logic stall_ex_i, + input logic stall_wb_i, + + input logic [4:0] regfile_alu_waddr_i, + input logic regfile_alu_we_i, + + input logic prepost_useincr_i, + + // directly passed through to WB stage, not used in EX + input logic regfile_we_i, + input logic [4:0] regfile_waddr_i, + input logic regfile_wdata_mux_sel_i, + + input logic [31:0] regfile_rb_data_i, + + input logic hwloop_wb_mux_sel_i, + input logic [31:0] hwloop_pc_plus4_i, + input logic [31:0] hwloop_cnt_i, + + input logic set_overflow_i, + input logic set_carry_i, + + input logic eoc_i, + input logic sp_we_i, + + // Output of EX stage pipeline + + //interface with Special registers + output logic alu_flag_o, + + output logic carry_o, + output logic overflow_o, + + output logic set_overflow_o, + output logic set_carry_o, + + output logic [15:0] regfile_wdata_wb_o, + output logic [4:0] regfile_waddr_wb_o, + output logic regfile_wdata_mux_sel_wb_o, + output logic regfile_we_wb_o, + output logic [31:0] regfile_rb_data_wb_o, + + output logic [31:0] hwloop_start_data_o, + output logic [31:0] hwloop_end_data_o, + output logic [31:0] hwloop_cnt_data_o, + + output logic sp_we_wb_o, + output logic eoc_o, + + // Forwarding ports : to ID stage + output logic [4:0] regfile_alu_waddr_fw_o, + output logic regfile_alu_we_fw_o, + output logic [31:0] regfile_alu_wdata_fw_o, // forward to RF and ID/EX pipe, ALU & MUL + output logic [31:0] regfile_alu_wdata_fw_pc_o // forward to PC, no multiplication + +`ifdef TCDM_ADDR_PRECAL + , + input logic [31:0] alu_adder_i +`endif +); + + + + // Alu outputs - OVerflow and CarrY + logic alu_overflow_int; + logic alu_carry_int; + + // Internal output of the LU + logic [31:0] alu_result; + + logic [31:0] alu_adder_lsu_int; // to LS unit + + logic [31:0] mult_result; + logic mult_carry_int; + logic mult_overflow_int; + + + + // Result Selection: Select between ALU output signals and MUL + assign carry_o = (mult_is_running_i == 1'b1) ? mult_carry_int : alu_carry_int; + assign overflow_o = (mult_is_running_i == 1'b1) ? mult_overflow_int : alu_overflow_int; + + assign regfile_alu_we_fw_o = regfile_alu_we_i; + assign regfile_alu_waddr_fw_o = regfile_alu_waddr_i; + assign regfile_alu_wdata_fw_o = (mult_is_running_i == 1'b0) ? alu_result : mult_result; + assign regfile_alu_wdata_fw_pc_o = alu_result; // forwarding to PC, multiplication not allowed + + // generate flags: goes to special purpose register + assign set_overflow_o = (stall_ex_i == 1'b0) ? set_overflow_i : 1'b0; + assign set_carry_o = (stall_ex_i == 1'b0) ? set_carry_i : 1'b0; + + //NOTE Igor fix: replaced alu_adder_int with alu_adder_lsu_int --> Now data_addr is calculated with + //NOTE a dedicated adder, no carry is considered , just op_a + op_b from id stage + assign data_addr_ex_o = (prepost_useincr_i == 1'b1) ? alu_adder_lsu_int : alu_operand_a_i; + + // hwloop mux. selects the right data to be sent to the hwloop registers (start/end-address and counter) + always_comb + begin : hwloop_start_mux + case (hwloop_wb_mux_sel_i) + 1'b0: hwloop_start_data_o = hwloop_pc_plus4_i; + 1'b1: hwloop_start_data_o = alu_result; + endcase; // case (hwloop_wb_mux_sel) + end + + // assign alu result to hwloop end data + assign hwloop_end_data_o = alu_result; + + // assign hwloop mux. selects the right data to be sent to the hwloop registers (start/end-address and counter) + assign hwloop_cnt_data_o = hwloop_cnt_i; + + + //////////////////////////// + // _ _ _ _ // + // / \ | | | | | | // + // / _ \ | | | | | | // + // / ___ \| |__| |_| | // + // /_/ \_\_____\___/ // + // // + //////////////////////////// + alu alu_i + ( + .operator_i ( alu_operator_i ), + .operand_a_i ( alu_operand_a_i ), + .operand_b_i ( alu_operand_b_i ), + .carry_i ( alu_carry_i ), + .flag_i ( alu_flag_i ), +`ifdef TCDM_ADDR_PRECAL + .adder_i ( alu_adder_i ), +`endif + .vector_mode_i ( vector_mode_i ), + .cmp_mode_i ( alu_cmp_mode_i ), + .vec_ext_i ( alu_vec_ext_i ), + + .adder_lsu_o ( alu_adder_lsu_int ), + .result_o ( alu_result ), + .overflow_o ( alu_overflow_int ), // Internal signal + .carry_o ( alu_carry_int ), // Internal signal + .flag_o ( alu_flag_o ) + ); + + + //////////////////////////////////////////////////////////////// + // __ __ _ _ _ _____ ___ ____ _ ___ _____ ____ // + // | \/ | | | | | |_ _|_ _| _ \| | |_ _| ____| _ \ // + // | |\/| | | | | | | | | || |_) | | | || _| | |_) | // + // | | | | |_| | |___| | | || __/| |___ | || |___| _ < // + // |_| |_|\___/|_____|_| |___|_| |_____|___|_____|_| \_\ // + // // + //////////////////////////////////////////////////////////////// + /* + mult mult_i + ( + .vector_mode_i ( vector_mode_i ), + .sel_subword_i ( mult_sel_subword_i ), + .signed_mode_i ( mult_signed_mode_i ), + .use_carry_i ( mult_use_carry_i ), + .mac_en_i ( mult_mac_en_i ), + + .op_a_i ( alu_operand_a_i ), + .op_b_i ( alu_operand_b_i ), + .mac_i ( alu_operand_c_i ), + .carry_i ( alu_carry_i ), + + .result_o ( mult_result ), + + .carry_o ( mult_carry_int ), + .overflow_o ( mult_overflow_int ) + ); + */ + + + /////////////////////////////////////// + // EX/WB Pipeline Register // + /////////////////////////////////////// + always_ff @(posedge clk, negedge rst_n) + begin : EX_WB_Pipeline_Register + if (rst_n == 1'b0) + begin + regfile_wdata_wb_o <= 16'h0000; + regfile_waddr_wb_o <= 5'b0_0000; + regfile_wdata_mux_sel_wb_o <= 1'b0; + regfile_we_wb_o <= 1'b0; + regfile_rb_data_wb_o <= 32'h0000_0000; + sp_we_wb_o <= 1'b0; + eoc_o <= 1'b0; + end + else + begin + if (stall_wb_i == 1'b0) + begin + regfile_we_wb_o <= regfile_we_i; + regfile_waddr_wb_o <= regfile_waddr_i; + regfile_wdata_wb_o <= alu_result[15:0]; // this is only used for SPR address + regfile_wdata_mux_sel_wb_o <= regfile_wdata_mux_sel_i; + regfile_rb_data_wb_o <= regfile_rb_data_i; + sp_we_wb_o <= sp_we_i; + eoc_o <= eoc_i; + end + end + end + +endmodule diff --git a/id_stage.sv b/id_stage.sv new file mode 100644 index 00000000..0f6ad9a1 --- /dev/null +++ b/id_stage.sv @@ -0,0 +1,887 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// DEI @ UNIBO - University of Bologna // +// // +// Engineer: Renzo Andri - andrire@student.ethz.ch // +// // +// Additional contributions by: // +// Igor Loi - igor.loi@unibo.it // +// Andreas Traber - atraber@student.ethz.ch // +// // +// // +// Create Date: 19/09/2013 // +// Design Name: Decode stage // +// Module Name: id_stage.sv // +// Project Name: OR10N // +// Language: SystemVerilog // +// // +// Description: Decode stage of the OR10N core. It decodes the instructions// +// and hosts the register file and the pipe controller // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (July 1st 2014) Pipe splitted in several files // +// Revision v0.3 - (August 7th 2014) Changed port and signal names, added // +// comments // +// Revision v0.4 - (December 1th 2014) Merged debug unit // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + + +`include "defines.sv" + +module id_stage +( + input logic clk, + input logic rst_n, + + input logic fetch_enable_i, + output logic core_busy_o, + + // Interface to instruction memory + input logic [31:0] instr_rdata_i, // comes from pipeline of IF stage + output logic instr_req_o, + input logic instr_gnt_i, + input logic instr_ack_i, + + // IF and ID stage signals + output logic [31:0] pc_from_immediate_o, + + output logic [2:0] pc_mux_sel_o, + output logic pc_mux_boot_o, + output logic [1:0] exc_pc_mux_o, + output logic force_nop_o, + output logic [31:0] pc_from_regfile_fw_o, + + input logic [31:0] current_pc_if_i, + input logic [31:0] current_pc_id_i, + + // branch prediction + output logic drop_instruction_o, +`ifdef BRANCH_PREDICTION + output logic wrong_branch_taken_o, + output logic take_branch_o, +`endif + // STALLS + output logic stall_if_o, + output logic stall_id_o, + output logic stall_ex_o, + output logic stall_wb_o, + + input logic sr_flag_fw_i, + input logic sr_flag_i, + + // To the Pipeline ID/EX + output logic [31:0] regfile_rb_data_ex_o, + output logic [31:0] alu_operand_a_ex_o, + output logic [31:0] alu_operand_b_ex_o, + output logic [31:0] alu_operand_c_ex_o, + output logic [`ALU_OP_WIDTH-1:0] alu_operator_ex_o, + + output logic [1:0] vector_mode_ex_o, + output logic [1:0] alu_cmp_mode_ex_o, + output logic [1:0] alu_vec_ext_ex_o, + + output logic eoc_ex_o, + + output logic mult_is_running_ex_o, // TODO: Rename (-> mult enable ?) + output logic [1:0] mult_sel_subword_ex_o, + output logic [1:0] mult_signed_mode_ex_o, + output logic mult_use_carry_ex_o, + output logic mult_mac_en_ex_o, + + output logic [4:0] regfile_waddr_ex_o, + output logic regfile_wdata_mux_sel_ex_o, + output logic regfile_we_ex_o, + + output logic [4:0] regfile_alu_waddr_ex_o, + output logic regfile_alu_we_ex_o, + + output logic prepost_useincr_ex_o, + input logic data_misaligned_i, + + output logic [2:0] hwloop_we_ex_o, + output logic [1:0] hwloop_regid_ex_o, + output logic hwloop_wb_mux_sel_ex_o, + output logic [31:0] hwloop_cnt_o, + output logic [`HWLOOP_REGS-1:0] hwloop_dec_cnt_o, + output logic [31:0] hwloop_targ_addr_o, + + output logic sp_we_ex_o, + + // Interface to load store unit + output logic data_we_ex_o, + output logic [1:0] data_type_ex_o, + output logic data_sign_ext_ex_o, + output logic [1:0] data_reg_offset_ex_o, + output logic data_misaligned_ex_o, + output logic data_req_ex_o, + input logic data_ack_i, // Grant from data memory + input logic data_rvalid_i, + + // SPR signals + output logic set_flag_ex_o, + output logic set_carry_ex_o, + output logic set_overflow_ex_o, + output logic set_dsx_o, + + // Interrupt signals + input logic irq_i, + input logic irq_nm_i, + input logic irq_enable_i, + output logic save_pc_if_o, + output logic save_pc_id_o, + output logic save_sr_o, + output logic restore_sr_o, + + // from hwloop regs + input logic [`HWLOOP_REGS-1:0] [31:0] hwloop_start_addr_i, + input logic [`HWLOOP_REGS-1:0] [31:0] hwloop_end_addr_i, + input logic [`HWLOOP_REGS-1:0] [31:0] hwloop_counter_i, + + // Debug Unit Signals + input logic dbg_flush_pipe_i, + output logic pipe_flushed_o, + input logic dbg_st_en_i, + input logic [1:0] dbg_dsr_i, + input logic dbg_stall_i, + output logic dbg_trap_o, + input logic dbg_reg_mux_i, + input logic dbg_reg_we_i, + input logic [4:0] dbg_reg_addr_i, + input logic [31:0] dbg_reg_wdata_i, + output logic [31:0] dbg_reg_rdata_o, + input logic dbg_set_npc_i, + + // Forward Signals + input logic [4:0] regfile_waddr_wb_i, + input logic regfile_we_wb_i, + input logic [31:0] regfile_wdata_wb_i, // From wb_stage: selects data from data memory, ex_stage result and sp rdata + + input logic [4:0] regfile_alu_waddr_fw_i, + input logic regfile_alu_we_fw_i, + input logic [31:0] regfile_alu_wdata_fw_i, + input logic [31:0] regfile_alu_wdata_fw_pc_i, + + input logic [31:0] wdata_reg_i + +`ifdef TCDM_ADDR_PRECAL + , + output logic [31:0] alu_adder_o +`endif + +); + + + // Immediate decoding and sign extension + logic [31:0] imm_i_type; + logic [31:0] imm_s_type; + logic [31:0] imm_sb_type; + logic [31:0] imm_u_type; + logic [31:0] imm_uj_type; + + logic [31:0] immediate_b; // contains the immediate for operand b + + logic [31:0] current_pc; // PC to be used in ALU (either IF or ID) + + logic exc_pc_sel; + logic [2:0] pc_mux_sel_int; // selects next PC in if stage + + logic irq_present; + + // Signals running between controller and exception controller + logic jump_in_id; + logic jump_in_ex; // registered copy of jump_in_id + logic illegal_insn; + logic trap_insn; + logic pipe_flush; + logic pc_valid; + logic clear_isr_running; + + + logic [4:0] regfile_addr_ra_id; + logic [4:0] regfile_addr_rb_id; + logic [4:0] regfile_addr_rc_id; + + logic [4:0] regfile_waddr_id; + logic [4:0] regfile_alu_waddr_id; + logic regfile_alu_we_id; + + logic [31:0] regfile_data_ra_id; + logic [31:0] regfile_data_rb_id; + logic [31:0] regfile_data_rc_id; + + logic imm_sign_ext_sel; + + // ALU Control + logic [`ALU_OP_WIDTH-1:0] alu_operator; + logic [1:0] alu_op_a_mux_sel; + logic [1:0] alu_op_b_mux_sel; + logic scalar_replication; + + logic [1:0] vector_mode; + logic [1:0] alu_cmp_mode; + logic [1:0] alu_vec_ext; + + logic alu_pc_mux_sel; + logic [3:0] immediate_mux_sel; + + // Multiplier Control + logic mult_is_running; // output of the controller (1 if the opcode is a multiplication) + logic [1:0] mult_sel_subword; // Select a subword when doing multiplications + logic [1:0] mult_signed_mode; // Signed mode multiplication at the output of the controller, and before the pipe registers + logic mult_use_carry; // Enables carry in for the MAC + logic mult_mac_en; // Enables the use of the accumulator + + logic eoc; // End of computation generated from the controller + + // Register Write Control + logic regfile_wdata_mux_sel; + logic regfile_we_id; + logic [1:0] regfile_alu_waddr_mux_sel; // TODO: FixMe -> 1bit + + // Special-Purpose Register Write Control + logic sp_we_id; + + // Data Memory Control + logic data_we_id; + logic [1:0] data_type_id; + logic data_sign_ext_id; + logic [1:0] data_reg_offset_id; + logic data_req_id; + + // hwloop signals + logic [1:0] hwloop_regid; + logic [2:0] hwloop_we; + logic hwloop_wb_mux_sel; + logic [1:0] hwloop_cnt_mux_sel; + logic [31:0] hwloop_cnt; + logic hwloop_jump; + logic hwloop_enable; + + // Supervision Register + logic set_flag; + logic set_carry; + logic set_overflow; + + logic prepost_useincr; + + // Forwarding + logic [1:0] operand_a_fw_mux_sel; + logic [1:0] operand_b_fw_mux_sel; + logic [1:0] operand_c_fw_mux_sel; + logic [31:0] operand_a_fw_id; + logic [31:0] operand_b_fw_id; + + logic [31:0] alu_operand_a; + logic [31:0] alu_operand_b; + logic [31:0] alu_operand_c; + logic [31:0] operand_b; // before going through the scalar replication mux + logic [31:0] operand_b_vec; // scalar replication of operand_b for 8 and 16 bit + + + // TODO: FIXME temporary assignments while not everything is implemented (e.g. exceptions) + assign exc_pc_sel = 1'b0; + assign pc_valid = 1'b1; + + + assign pc_mux_sel_o = (exc_pc_sel == 1'b1) ? `PC_EXCEPTION : pc_mux_sel_int; + + + // Instruction Parts + + logic [31:0] instr; + assign instr = instr_rdata_i; // TODO: Remove + + assign imm_i_type = { {20 {instr[31]}}, instr[31:20] }; + assign imm_s_type = { {20 {instr[31]}}, instr[31:25], instr[11:7] }; + assign imm_sb_type = { {20 {instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8] }; + assign imm_u_type = { instr[31:12], {12 {1'b0}} }; + assign imm_uj_type = { {20 {instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0 }; + + // source registers + assign regfile_addr_ra_id = instr[19:15]; + assign regfile_addr_rb_id = instr[24:20]; + //assign regfile_addr_rc_id = instr_rdata_i[25:21]; + + //assign alu_vec_ext = instr_rdata_i[9:8]; + + // destination registers + assign regfile_waddr_id = instr[11:7]; + + // Second Register Write Adress Selection + // Used for prepost load/store and multiplier + always_comb + begin : alu_waddr_mux + case (regfile_alu_waddr_mux_sel) + default: regfile_alu_waddr_id = regfile_addr_ra_id; + 2'b00: regfile_alu_waddr_id = regfile_addr_ra_id; + 2'b01: regfile_alu_waddr_id = regfile_waddr_id; + endcase + end + + /////////////////////////////////////////////////////////////////////////////////////// + // ____ ____ _ // + // | _ \ _ __ ___ __ _ _ __ __ _ _ __ ___ / ___|___ _ _ _ __ | |_ ___ _ __ // + // | |_) | '__/ _ \ / _` | '__/ _` | '_ ` _ \ | | / _ \| | | | '_ \| __/ _ \ '__| // + // | __/| | | (_) | (_| | | | (_| | | | | | | | |__| (_) | |_| | | | | || __/ | // + // |_| |_| \___/ \__, |_| \__,_|_| |_| |_| \____\___/ \__,_|_| |_|\__\___|_| // + // |___/ // + /////////////////////////////////////////////////////////////////////////////////////// + + // to instruction fetch pc mux + //assign pc_from_immediate_o = immediate26_id; + + // PC Mux + always_comb + begin : alu_pc_mux + case (alu_pc_mux_sel) + 1'b0: current_pc = current_pc_if_i; + 1'b1: current_pc = current_pc_id_i; + endcase; // case (alu_pc_mux_sel) + end + + // pc_from_regfile fw Mux, similar to operand_b_fw_mux, + // but not allowed to forward data from load/store unit and from the + // pre/post increment and multiplier + always_comb + begin : pc_from_regfile_fw_mux + case (operand_b_fw_mux_sel) + `SEL_FW_EX: pc_from_regfile_fw_o = regfile_alu_wdata_fw_pc_i; + `SEL_FW_WB: pc_from_regfile_fw_o = wdata_reg_i; + //`SEL_REGFILE: pc_from_regfile_fw_o = regfile_data_rb_id; + //default: pc_from_regfile_fw_o = regfile_data_rb_id; + default: pc_from_regfile_fw_o = regfile_alu_wdata_fw_pc_i; + endcase; // case (operand_b_fw_mux_sel) + end + + /* + // hwloop_cnt_mux + always_comb + begin : hwloop_cnt_mux + case (hwloop_cnt_mux_sel) + 2'b00: hwloop_cnt = 32'b0; + 2'b01: hwloop_cnt = immediate21z_id; + 2'b10: hwloop_cnt = immediate13z_id; + 2'b11: hwloop_cnt = operand_a_fw_id; + endcase; // case (hwloop_cnt_mux_sel) + end + */ + + + //////////////////////////////////////////////////////// + // ___ _ _ // + // / _ \ _ __ ___ _ __ __ _ _ __ __| | / \ // + // | | | | '_ \ / _ \ '__/ _` | '_ \ / _` | / _ \ // + // | |_| | |_) | __/ | | (_| | | | | (_| | / ___ \ // + // \___/| .__/ \___|_| \__,_|_| |_|\__,_| /_/ \_\ // + // |_| // + //////////////////////////////////////////////////////// + + // ALU_Op_a Mux + always_comb + begin : alu_operand_a_mux + case (alu_op_a_mux_sel) + default: alu_operand_a = operand_a_fw_id; + `OP_A_REGA_OR_FWD: alu_operand_a = operand_a_fw_id; + `OP_A_CURRPC: alu_operand_a = current_pc; + `OP_A_ZERO: alu_operand_a = 32'b0; + //`OP_A_IMM16: alu_operand_a = immediate16_id; + endcase; // case (alu_op_a_mux_sel) + end + + // Operand a forwarding mux + always_comb + begin : operand_a_fw_mux + case (operand_a_fw_mux_sel) + `SEL_FW_EX: operand_a_fw_id = regfile_alu_wdata_fw_i; + `SEL_FW_WB: operand_a_fw_id = regfile_wdata_wb_i; + `SEL_REGFILE: operand_a_fw_id = regfile_data_ra_id; + default: operand_a_fw_id = regfile_data_ra_id; + endcase; // case (operand_a_fw_mux_sel) + end + + ////////////////////////////////////////////////////// + // ___ _ ____ // + // / _ \ _ __ ___ _ __ __ _ _ __ __| | | __ ) // + // | | | | '_ \ / _ \ '__/ _` | '_ \ / _` | | _ \ // + // | |_| | |_) | __/ | | (_| | | | | (_| | | |_) | // + // \___/| .__/ \___|_| \__,_|_| |_|\__,_| |____/ // + // |_| // + ////////////////////////////////////////////////////// + + // Immediate Mux for operand B + always_comb + begin : immediate_mux + case (immediate_mux_sel) + default: immediate_b = 32'h4; + //`IMM_VEC: immediate_b = immediate_vec_id; + `IMM_I: immediate_b = imm_i_type; + `IMM_S: immediate_b = imm_s_type; + //`IMM_SB: immediate_b = imm_sb_type; + `IMM_U: immediate_b = imm_u_type; + `IMM_UJ: immediate_b = imm_uj_type; + endcase; // case (immediate_mux_sel) + end + + // ALU_Op_b Mux + always_comb + begin : alu_operand_b_mux + case (alu_op_b_mux_sel) + default: operand_b = operand_b_fw_id; + `OP_B_REGB_OR_FWD: operand_b = operand_b_fw_id; + //`OP_B_REGC_OR_FWD: operand_b = alu_operand_c; + `OP_B_IMM: operand_b = immediate_b; + endcase // case (alu_op_b_mux_sel) + end + + // scalar replication for operand B + //assign operand_b_vec = (vector_mode == `VEC_MODE8) ? {4{operand_b[7:0]}} : {2{operand_b[15:0]}}; + + // choose normal or scalar replicated version of operand b + assign alu_operand_b = (scalar_replication == 1'b1) ? operand_b_vec : operand_b; + + + // Operand b forwarding mux + always_comb + begin : operand_b_fw_mux + case (operand_b_fw_mux_sel) + `SEL_FW_EX: operand_b_fw_id = regfile_alu_wdata_fw_i; + `SEL_FW_WB: operand_b_fw_id = regfile_wdata_wb_i; + `SEL_REGFILE: operand_b_fw_id = regfile_data_rb_id; + default: operand_b_fw_id = regfile_data_rb_id; + endcase; // case (operand_b_fw_mux_sel) + end + + + ////////////////////////////////////////////////////// + // ___ _ ____ // + // / _ \ _ __ ___ _ __ __ _ _ __ __| | / ___| // + // | | | | '_ \ / _ \ '__/ _` | '_ \ / _` | | | // + // | |_| | |_) | __/ | | (_| | | | | (_| | | |___ // + // \___/| .__/ \___|_| \__,_|_| |_|\__,_| \____| // + // |_| // + ////////////////////////////////////////////////////// + + // Operand c forwarding mux + always_comb + begin : operand_c_fw_mux + case (operand_c_fw_mux_sel) + `SEL_FW_EX: alu_operand_c = regfile_alu_wdata_fw_i; + `SEL_FW_WB: alu_operand_c = regfile_wdata_wb_i; + `SEL_REGFILE: alu_operand_c = regfile_data_rc_id; + default: alu_operand_c = regfile_data_rc_id; + endcase; // case (operand_b_fw_mux_sel) + end + + + ///////////////////////////////////////////////////////// + // ____ _____ ____ ___ ____ _____ _____ ____ ____ // + // | _ \| ____/ ___|_ _/ ___|_ _| ____| _ \/ ___| // + // | |_) | _|| | _ | |\___ \ | | | _| | |_) \___ \ // + // | _ <| |__| |_| || | ___) || | | |___| _ < ___) | // + // |_| \_\_____\____|___|____/ |_| |_____|_| \_\____/ // + // // + ///////////////////////////////////////////////////////// + riscv_register_file registers_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + // Read port a + .raddr_a_i ( (dbg_reg_mux_i == 1'b0) ? regfile_addr_ra_id : dbg_reg_addr_i ), + .rdata_a_o ( regfile_data_ra_id ), + + // Read port b + .raddr_b_i ( regfile_addr_rb_id ), + .rdata_b_o ( regfile_data_rb_id ), + + // Read port c + .raddr_c_i ( regfile_addr_rc_id ), + .rdata_c_o ( regfile_data_rc_id ), + + // Write port a + .waddr_a_i ( regfile_waddr_wb_i ), + .wdata_a_i ( regfile_wdata_wb_i ), + .we_a_i ( regfile_we_wb_i ), + + // Write port b + .waddr_b_i ( (dbg_reg_mux_i == 1'b0) ? regfile_alu_waddr_fw_i : dbg_reg_addr_i ), + .wdata_b_i ( (dbg_reg_mux_i == 1'b0) ? regfile_alu_wdata_fw_i : dbg_reg_wdata_i ), + .we_b_i ( (dbg_reg_mux_i == 1'b0) ? regfile_alu_we_fw_i : dbg_reg_we_i ) + ); + + assign dbg_reg_rdata_o = regfile_data_ra_id; + + //////////////////////////////////////////////////////////////////// + // ____ ___ _ _ _____ ____ ___ _ _ _____ ____ // + // / ___/ _ \| \ | |_ _| _ \ / _ \| | | | | ____| _ \ // + // | | | | | | \| | | | | |_) | | | | | | | | _| | |_) | // + // | |__| |_| | |\ | | | | _ <| |_| | |___| |___| |___| _ < // + // \____\___/|_| \_| |_| |_| \_\\___/|_____|_____|_____|_| \_\ // + // // + //////////////////////////////////////////////////////////////////// + controller controller_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + .fetch_enable_i ( fetch_enable_i ), + .eoc_o ( eoc ), + .core_busy_o ( core_busy_o ), + + // Signal from-to PC pipe (instr rdata) and instr mem system (req and ack) + .instr_rdata_i ( instr_rdata_i ), + .instr_req_o ( instr_req_o ), + .instr_gnt_i ( instr_gnt_i ), + .instr_ack_i ( instr_ack_i ), + .pc_mux_sel_o ( pc_mux_sel_int ), + .pc_mux_boot_o ( pc_mux_boot_o ), + + // Alu signals + .alu_operator_o ( alu_operator ), + .extend_immediate_o ( imm_sign_ext_sel ), + .alu_op_a_mux_sel_o ( alu_op_a_mux_sel ), + .alu_op_b_mux_sel_o ( alu_op_b_mux_sel ), + .alu_pc_mux_sel_o ( alu_pc_mux_sel ), + .immediate_mux_sel_o ( immediate_mux_sel ), + + .scalar_replication_o ( scalar_replication ), + .vector_mode_o ( vector_mode ), + .alu_cmp_mode_o ( alu_cmp_mode ), + + // mult signals + .mult_is_running_o ( mult_is_running ), + .mult_is_running_ex_i ( mult_is_running_ex_o ), + .mult_sel_subword_o ( mult_sel_subword ), + .mult_signed_mode_o ( mult_signed_mode ), + .mult_use_carry_o ( mult_use_carry ), + .mult_mac_en_o ( mult_mac_en ), + + // Register file control signals + .regfile_wdata_mux_sel_o ( regfile_wdata_mux_sel ), + .regfile_wdata_mux_sel_ex_i ( regfile_wdata_mux_sel_ex_o ), + .regfile_we_o ( regfile_we_id ), + + .regfile_alu_we_o ( regfile_alu_we_id ), + .regfile_alu_waddr_mux_sel_o ( regfile_alu_waddr_mux_sel ), + + .prepost_useincr_o ( prepost_useincr ), + .data_misaligned_i ( data_misaligned_i ), + + // SP register signals + .sp_we_o ( sp_we_id ), + .sp_we_ex_i ( sp_we_ex_o ), + + // Data bus interface + .data_we_o ( data_we_id ), + .data_type_o ( data_type_id ), + .data_sign_extension_o ( data_sign_ext_id ), + .data_reg_offset_o ( data_reg_offset_id ), + .data_req_o ( data_req_id ), + .data_ack_i ( data_ack_i ), + .data_req_ex_i ( data_req_ex_o ), + .data_rvalid_i ( data_rvalid_i ), + + // hwloop signals + .hwloop_we_o ( hwloop_we ), + .hwloop_regid_o ( hwloop_regid ), + .hwloop_wb_mux_sel_o ( hwloop_wb_mux_sel ), + .hwloop_cnt_mux_sel_o ( hwloop_cnt_mux_sel ), + .hwloop_jump_i ( hwloop_jump ), + + // Interrupt signals + .irq_present_i ( irq_present ), + + // Exception Controller Signals + .jump_in_id_o ( jump_in_id ), + .illegal_insn_o ( illegal_insn ), + .trap_insn_o ( trap_insn ), + .pipe_flush_o ( pipe_flush ), + .pc_valid_i ( pc_valid ), + .clear_isr_running_o ( clear_isr_running ), + .pipe_flushed_i ( pipe_flushed_o ), + + // Debug Unit Signals + .dbg_stall_i ( dbg_stall_i ), + .dbg_set_npc_i ( dbg_set_npc_i ), + + // SPR Signals + .sr_flag_fw_i ( sr_flag_fw_i ), // Forwarded Branch Signal + .sr_flag_i ( sr_flag_i ), + .set_flag_ex_i ( set_flag_ex_o ), + .set_flag_o ( set_flag ), + .set_overflow_o ( set_overflow ), + .set_carry_o ( set_carry ), + .restore_sr_o ( restore_sr_o ), + + // regfile port 1 + .regfile_waddr_ex_i ( regfile_waddr_ex_o ), // Write address for register file from ex-wb- pipeline registers + .regfile_we_ex_i ( regfile_we_ex_o ), + .regfile_waddr_wb_i ( regfile_waddr_wb_i ), // Write address for register file from ex-wb- pipeline registers + .regfile_we_wb_i ( regfile_we_wb_i ), + + // regfile port 2 + .regfile_alu_waddr_fw_i ( regfile_alu_waddr_fw_i ), + .regfile_alu_we_fw_i ( regfile_alu_we_fw_i ), + + // Forwarding signals + .operand_a_fw_mux_sel_o ( operand_a_fw_mux_sel ), + .operand_b_fw_mux_sel_o ( operand_b_fw_mux_sel ), + .operand_c_fw_mux_sel_o ( operand_c_fw_mux_sel ), + + // branch prediction + .drop_instruction_o ( drop_instruction_o ), +`ifdef BRANCH_PREDICTION + .wrong_branch_taken_o ( wrong_branch_taken_o ), + .take_branch_o ( take_branch_o ), +`endif + // Stall signals + .stall_if_o ( stall_if_o ), + .stall_id_o ( stall_id_o ), + .stall_ex_o ( stall_ex_o ), + .stall_wb_o ( stall_wb_o ) + ); + + /////////////////////////////////////////////////////////////////////// + // _____ ____ _ _ _ // + // | ____|_ _____ / ___|___ _ __ | |_ _ __ ___ | | | ___ _ __ // + // | _| \ \/ / __| | | / _ \| '_ \| __| '__/ _ \| | |/ _ \ '__| // + // | |___ > < (__ _ | |__| (_) | | | | |_| | | (_) | | | __/ | // + // |_____/_/\_\___(_) \____\___/|_| |_|\__|_| \___/|_|_|\___|_| // + // // + /////////////////////////////////////////////////////////////////////// + + assign force_nop_o = 1'b0; + /* + exc_controller exc_controller_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + .fetch_enable_i ( fetch_enable_i ), + + // to IF stage + .exc_pc_sel_o ( exc_pc_sel ), + .exc_pc_mux_o ( exc_pc_mux_o ), + .force_nop_o ( force_nop_o ), + + // hwloop signals + .hwloop_enable_o ( hwloop_enable ), + + // Interrupt signals + .irq_i ( irq_i ), + .irq_nm_i ( irq_nm_i ), + .irq_enable_i ( irq_enable_i ), + .irq_present_o ( irq_present ), + + // SPR + .save_pc_if_o ( save_pc_if_o ), + .save_pc_id_o ( save_pc_id_o ), + .save_sr_o ( save_sr_o ), + .set_dsx_o ( set_dsx_o ), + + // Controller + .core_busy_i ( core_busy_o ), + .jump_in_id_i ( jump_in_id ), + .jump_in_ex_i ( jump_in_ex ), + .stall_id_i ( stall_id_o ), + .illegal_insn_i ( illegal_insn ), + .trap_insn_i ( trap_insn ), + .pipe_flush_i ( pipe_flush ), + .pc_valid_o ( pc_valid ), + .clear_isr_running_i ( clear_isr_running ), + + // Debug Unit Signals + .dbg_flush_pipe_i ( dbg_flush_pipe_i ), + .pipe_flushed_o ( pipe_flushed_o ), + .dbg_st_en_i ( dbg_st_en_i ), + .dbg_dsr_i ( dbg_dsr_i ), + .dbg_stall_i ( dbg_stall_i ), + .dbg_set_npc_i ( dbg_set_npc_i ), + .dbg_trap_o ( dbg_trap_o ) + ); + */ + + + ////////////////////////////////////////////////////////////////////////// + // ____ ___ _ _ _____ ____ ___ _ _ _____ ____ // + // / ___/ _ \| \ | |_ _| _ \ / _ \| | | | | ____| _ \ // + // HWLOOP-| | | | | | \| | | | | |_) | | | | | | | | _| | |_) | // + // | |__| |_| | |\ | | | | _ <| |_| | |___| |___| |___| _ < // + // \____\___/|_| \_| |_| |_| \_\\___/|_____|_____|_____|_| \_\ // + // // + ////////////////////////////////////////////////////////////////////////// + + /* + hwloop_controller hwloop_controller_i + ( + // from ID stage + .enable_i ( hwloop_enable ), + .current_pc_i ( current_pc_if_i ), + + // to ID controller + .hwloop_jump_o ( hwloop_jump ), + + // to if stage + .hwloop_targ_addr_o ( hwloop_targ_addr_o ), + + // from hwloop_regs + .hwloop_start_addr_i ( hwloop_start_addr_i ), + .hwloop_end_addr_i ( hwloop_end_addr_i ), + .hwloop_counter_i ( hwloop_counter_i ), + + // to hwloop_regs + .hwloop_dec_cnt_o ( hwloop_dec_cnt_o ) + ); + */ + + + + ///////////////////////////////////////////////////////////////////////////////// + // ___ ____ _______ __ ____ ___ ____ _____ _ ___ _ _ _____ // + // |_ _| _ \ | ____\ \/ / | _ \_ _| _ \| ____| | |_ _| \ | | ____| // + // | || | | |_____| _| \ / | |_) | || |_) | _| | | | || \| | _| // + // | || |_| |_____| |___ / \ | __/| || __/| |___| |___ | || |\ | |___ // + // |___|____/ |_____/_/\_\ |_| |___|_| |_____|_____|___|_| \_|_____| // + // // + ///////////////////////////////////////////////////////////////////////////////// + always_ff @(posedge clk, negedge rst_n) + begin : ID_EX_PIPE_REGISTERS + if (rst_n == 1'b0) + begin + regfile_rb_data_ex_o <= 32'h0000_0000; + + alu_operator_ex_o <= `ALU_NOP; + alu_operand_a_ex_o <= 32'h0000_0000; + alu_operand_b_ex_o <= 32'h0000_0000; + alu_operand_c_ex_o <= 32'h0000_0000; + + vector_mode_ex_o <= `VEC_MODE32; + alu_cmp_mode_ex_o <= `ALU_CMP_FULL; + alu_vec_ext_ex_o <= 2'h0; + + mult_is_running_ex_o <= 1'b0; + mult_sel_subword_ex_o <= 2'b0; + mult_signed_mode_ex_o <= 2'b0; + mult_use_carry_ex_o <= 1'b0; + mult_mac_en_ex_o <= 1'b0; + + + regfile_waddr_ex_o <= 5'b0; + regfile_wdata_mux_sel_ex_o <= 1'b0; + regfile_we_ex_o <= 1'b0; + + regfile_alu_waddr_ex_o <= 4'b0; + regfile_alu_we_ex_o <= 1'b0; + prepost_useincr_ex_o <= 1'b0; + + sp_we_ex_o <= 1'b0; + + data_we_ex_o <= 1'b0; + data_type_ex_o <= 2'b0; + data_sign_ext_ex_o <= 1'b0; + data_reg_offset_ex_o <= 2'b0; + data_req_ex_o <= 1'b0; + + data_misaligned_ex_o <= 1'b0; + + set_flag_ex_o <= 1'b0; + set_overflow_ex_o <= 1'b0; + set_carry_ex_o <= 1'b0; + + hwloop_we_ex_o <= 3'b0; + hwloop_regid_ex_o <= 2'b0; + hwloop_wb_mux_sel_ex_o <= 1'b0; + hwloop_cnt_o <= 32'b0; + + jump_in_ex <= 1'b0; + + eoc_ex_o <= 1'b0; + + `ifdef TCDM_ADDR_PRECAL + alu_adder_o <= '0; + `endif + + end + else if ((stall_ex_o == 1'b0) && (data_misaligned_i == 1'b1)) + begin // misaligned access case, only unstall alu operands + + // if we are using post increments, then we have to use the + // original value of the register for the second memory access + // => keep it stalled + if (prepost_useincr_ex_o == 1'b1) + begin + alu_operand_a_ex_o <= alu_operand_a; + end + + alu_operand_b_ex_o <= alu_operand_b; + regfile_alu_we_ex_o <= regfile_alu_we_id; + prepost_useincr_ex_o <= prepost_useincr; + + data_misaligned_ex_o <= 1'b1; + end + else if ((stall_ex_o == 1'b0) && (data_misaligned_i == 1'b0)) + begin // unstall the whole pipeline + regfile_rb_data_ex_o <= operand_b_fw_id; + + alu_operator_ex_o <= alu_operator; + alu_operand_a_ex_o <= alu_operand_a; + alu_operand_b_ex_o <= alu_operand_b; + alu_operand_c_ex_o <= alu_operand_c; + + vector_mode_ex_o <= vector_mode; + alu_cmp_mode_ex_o <= alu_cmp_mode; + alu_vec_ext_ex_o <= alu_vec_ext; + + mult_is_running_ex_o <= mult_is_running; + mult_sel_subword_ex_o <= mult_sel_subword; + mult_signed_mode_ex_o <= mult_signed_mode; + mult_use_carry_ex_o <= mult_use_carry; + mult_mac_en_ex_o <= mult_mac_en; + + + regfile_waddr_ex_o <= regfile_waddr_id; + regfile_wdata_mux_sel_ex_o <= regfile_wdata_mux_sel; + regfile_we_ex_o <= regfile_we_id; + + regfile_alu_waddr_ex_o <= regfile_alu_waddr_id; + regfile_alu_we_ex_o <= regfile_alu_we_id; + + prepost_useincr_ex_o <= prepost_useincr; + + + sp_we_ex_o <= sp_we_id; + + data_we_ex_o <= data_we_id; + data_type_ex_o <= data_type_id; + data_sign_ext_ex_o <= data_sign_ext_id; + data_reg_offset_ex_o <= data_reg_offset_id; + data_req_ex_o <= data_req_id; + + data_misaligned_ex_o <= 1'b0; + + set_flag_ex_o <= set_flag; + set_overflow_ex_o <= set_overflow; + set_carry_ex_o <= set_carry; + + hwloop_we_ex_o <= hwloop_we; + hwloop_regid_ex_o <= hwloop_regid; + hwloop_wb_mux_sel_ex_o <= hwloop_wb_mux_sel; + hwloop_cnt_o <= hwloop_cnt; + + jump_in_ex <= jump_in_id; + + eoc_ex_o <= eoc; + +`ifdef TCDM_ADDR_PRECAL + alu_adder_o <= alu_operand_a + alu_operand_b; +`endif + + end + end + + +endmodule diff --git a/if_stage.sv b/if_stage.sv new file mode 100644 index 00000000..93a2f077 --- /dev/null +++ b/if_stage.sv @@ -0,0 +1,198 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// DEI @ UNIBO - University of Bologna // +// // +// Engineer: Renzo Andri - andrire@student.ethz.ch // +// // +// Additional contributions by: // +// Igor Loi - igor.loi@unibo.it // +// Andreas Traber - atraber@student.ethz.ch // +// // +// // +// Create Date: 01/07/2014 // +// Design Name: Instruction fetch stage // +// Module Name: if_stage.sv // +// Project Name: RiscV // +// Language: SystemVerilog // +// // +// Description: Instruction fetch unit: Selection of the next PC, and // +// buffering (Sampling) of the read instruction // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (August 6th 2014) Changed port and signal names, addedd // +// comments // +// Revision v0.3 - (December 1th 2014) Merged debug unit and added more // +// exceptions // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + + + +`include "defines.sv" + +module if_stage +( + input logic clk, + input logic rst_n, + + // the boot address is used to calculate the exception offsets + input logic [31:0] boot_addr_i, + + // Output of IF Pipeline stage + output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding + output logic [31:0] current_pc_if_o, // current pc program counter + output logic [31:0] current_pc_id_o, // current pc program counter + + // From to Instr memory + input logic [31:0] instr_rdata_i, // Instruction read from instruction memory /cache + output logic [31:0] instr_addr_o, // address for instruction fetch + + // Forwarding ports - control signals + input logic force_nop_i, // insert a NOP in the pipe + input logic [31:0] exception_pc_reg_i, // address used to restore the program counter when the interrupt/exception is served + input logic [31:0] pc_from_regfile_i, // pc from reg file + input logic [31:0] pc_from_immediate_i, // pc from immediate + input logic [31:0] pc_from_hwloop_i, // pc from hwloop start addr + input logic [2:0] pc_mux_sel_i, // sel for pc multiplexer + input logic pc_mux_boot_i, // load boot address as PC + input logic [1:0] exc_pc_mux_i, // select which exception to execute + + // from debug unit + input logic [31:0] dbg_pc_from_npc, + input logic dbg_set_npc, + + // branch prediction + input logic drop_instruction_i, +`ifdef BRANCH_PREDICTION + input logic wrong_branch_taken_i, + input logic take_branch_i, +`endif + // pipeline stall + input logic stall_if_i, + input logic stall_id_i // Stall in the id stage: here (if_stage) freeze the registers +); + + + + //////////////////////////////////// + // Instruction Fetch (IF) signals // + //////////////////////////////////// + logic [31:0] next_pc; // Next program counter + logic [31:0] exc_pc; // Exception PC + logic [31:0] instr_rdata_int; // The instruction read from instr memory/cache is forwarded to ID stage, and the controller can force this forwarding to a nop (BUBBLE) + +`ifdef BRANCH_PREDICTION + logic [31:0] correct_branch; +`endif + + logic [31:0] branch_taken; + logic [31:0] branch_not_taken; + + + // Address to fetch the instruction + assign instr_addr_o = next_pc; + + assign branch_taken = current_pc_id_o + pc_from_immediate_i; + assign branch_not_taken = current_pc_if_o + 32'd4; + + // Next PC Selection: pc_mux_sel_i comes from id_stage.controller + always_comb + begin : PC_MUX + case (pc_mux_sel_i) + `INCR_PC: begin next_pc = current_pc_if_o + 32'd4; end // PC is incremented and points the next instruction + `NO_INCR: begin next_pc = current_pc_if_o; end // PC is not incremented + `PC_FROM_REGFILE: begin next_pc = pc_from_regfile_i; end // PC is taken from the regfile + `PC_FROM_IMM: begin next_pc = branch_taken; end // PC is taken from current PC in id + the immediate displacement + `PC_EXCEPTION: begin next_pc = exc_pc; end // PC that points to the exception + `EXC_PC_REG: begin next_pc = exception_pc_reg_i; end // restore the PC when exiting from interr/ecpetions + `HWLOOP_ADDR: begin next_pc = pc_from_hwloop_i; end // PC is taken from hwloop start addr +`ifdef BRANCH_PREDICTION + `PC_BRANCH_PRED: begin next_pc = correct_branch; end // take pc from branch prediction +`endif + default: begin next_pc = current_pc_if_o + 32'd4; end + endcase //~case (pc_mux_sel_i) + end + + + // Exception PC selection + always_comb + begin : EXC_PC_MUX + case (exc_pc_mux_i) + `EXC_PC_IRQ: begin exc_pc = {boot_addr_i[31:8], `EXC_IRQ }; end + `EXC_PC_IRQ_NM: begin exc_pc = {boot_addr_i[31:8], `EXC_IRQ_NM }; end + `EXC_PC_ILLINSN: begin exc_pc = {boot_addr_i[31:8], `EXC_ILLINSN}; end + `EXC_PC_NO_INCR: begin exc_pc = current_pc_if_o; end + endcase //~case (exc_pc_mux_i) + end + + // NOP = addi x0, x0, 0 + assign instr_rdata_int = (force_nop_i == 1'b1) ? { {25 {1'b0}}, `OPCODE_OPIMM } : instr_rdata_i; + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // IF PC register // + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// + always_ff @(posedge clk, negedge rst_n) + begin : IF_PIPELINE + if (rst_n == 1'b0) + begin : ASSERT_RESET + current_pc_if_o <= 32'h0; + end + else + begin : DEASSERT_RESET + if ( pc_mux_boot_i == 1'b1 ) + begin + // set PC to boot address if we were just reset + current_pc_if_o <= boot_addr_i; + end + else if ( dbg_set_npc == 1'b1 ) + begin + // debug units sets NPC, PC_MUX_SEL holds this value + current_pc_if_o <= dbg_pc_from_npc; + end + else if ( stall_if_i == 1'b0 ) + begin : ENABLED_PIPE + current_pc_if_o <= next_pc; + end + end + end + +`ifdef BRANCH_PREDICTION + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Save branch targets in case of a misprediction // + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// + always_ff @(posedge clk, negedge rst_n) + begin : SAVE_BRANCH_TARGET + if (rst_n == 1'b0) + begin : ASSERT_RESET + correct_branch <= 32'b0; + end + else + begin : DEASSERT_RESET + if (wrong_branch_taken_i) + correct_branch <= (take_branch_i) ? branch_taken : branch_not_taken; + end + end +`endif + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // IF-ID PIPE: Pipeline that is frozen when the ID stage is stalled // + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// + always_ff @(posedge clk, negedge rst_n) + begin : IF_ID_PIPE_REGISTERS + if (rst_n == 1'b0) + begin : ASSERT_RESET + instr_rdata_id_o <= '0; + current_pc_id_o <= '0; + end + else + begin : DEASSERT_RESET + if((stall_id_i == 1'b0) & (drop_instruction_i == 1'b0)) + begin : ENABLED_PIPE + instr_rdata_id_o <= instr_rdata_int; + current_pc_id_o <= current_pc_if_o; + end + end + end + +endmodule diff --git a/include/defines.sv b/include/defines.sv new file mode 100644 index 00000000..63bd48ad --- /dev/null +++ b/include/defines.sv @@ -0,0 +1,376 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineer: Matthias Baer - baermatt@student.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 19/09/2013 // +// Design Name: Pipelined Processor // +// Module Name: defines.sv // +// Project Name: Processor // +// Language: SystemVerilog // +// // +// Description: Defines for the the pipelined processor // +// // +// // +// Revision: // +// Revision v0.1 - File Created // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// +// BTW: If you want to create more of those fancy ASCII art comments: +// http://patorjk.com/software/taag/#p=display&v=0&f=Standard&t=Fancy%20ASCII%20Art +//////////////////////////////////////////////////////////////////////////////// + + + +//////////////////////////////////////////////// +// ___ ____ _ // +// / _ \ _ __ / ___|___ __| | ___ ___ // +// | | | | '_ \| | / _ \ / _` |/ _ \/ __| // +// | |_| | |_) | |__| (_) | (_| | __/\__ \ // +// \___/| .__/ \____\___/ \__,_|\___||___/ // +// |_| // +//////////////////////////////////////////////// + +/* +`define OPCODE_J 6'h00 +`define OPCODE_JAL 6'h01 +`define OPCODE_HWLOOP 6'h02 +`define OPCODE_BNF 6'h03 +`define OPCODE_BF 6'h04 +`define OPCODE_NOP 6'h05 +`define OPCODE_MOVHI 6'h06 // also used for l.macrc +`define OPCODE_SYNC 6'h08 // also used for l.trap +`define OPCODE_RFE 6'h09 +`define OPCODE_VEC 6'h0a // vectorial instructions +`define OPCODE_VCMP 6'h0b // vectorial compare instructions +`define OPCODE_JR 6'h11 +`define OPCODE_JALR 6'h12 +`define OPCODE_MACI 6'h13 +`define OPCODE_EOC 6'h1c // l.eoc = l.cust1 +`define OPCODE_STPOST 6'h14 // ST post-increment +`define OPCODE_STPRE 6'h15 // ST pre-increment +`define OPCODE_LDPOST 6'h16 // LD post-increment +`define OPCODE_LDPRE 6'h17 // LD pre-increment +`define OPCODE_LWZ 6'h21 +`define OPCODE_LWS 6'h22 +`define OPCODE_LBZ 6'h23 +`define OPCODE_LBS 6'h24 +`define OPCODE_LHZ 6'h25 +`define OPCODE_LHS 6'h26 +`define OPCODE_ADDI 6'h27 +`define OPCODE_ADDIC 6'h28 +`define OPCODE_ANDI 6'h29 +`define OPCODE_ORI 6'h2a +`define OPCODE_XORI 6'h2b +`define OPCODE_MULI 6'h2c +`define OPCODE_MFSPR 6'h2d +`define OPCODE_SHIFT 6'h2e +`define OPCODE_SFI 6'h2f +`define OPCODE_MTSPR 6'h30 +`define OPCODE_MAC 6'h31 +`define OPCODE_SW 6'h35 +`define OPCODE_SB 6'h36 +`define OPCODE_SH 6'h37 +`define OPCODE_ALU 6'h38 +`define OPCODE_SF 6'h39 + */ + + + + +`define OPCODE_SYSTEM 7'h73 +`define OPCODE_FENCE 7'h0f +`define OPCODE_OP 7'h33 +`define OPCODE_OPIMM 7'h13 +`define OPCODE_STORE 7'h23 +`define OPCODE_LOAD 7'h03 +`define OPCODE_BRANCH 7'h63 +`define OPCODE_JALR 7'h67 +`define OPCODE_JAL 7'h6f +`define OPCODE_AUIPC 7'h17 +`define OPCODE_LUI 7'h37 + +`define INSTR_LUI { {25 {1'b?}}, `OPCODE_LUI } +`define INSTR_AUIPC { {25 {1'b?}}, `OPCODE_AUIPC } +`define INSTR_JAL { {25 {1'b?}}, `OPCODE_JAL } +`define INSTR_JALR { {17 {1'b?}}, 3'b000, {5 {1'b?}}, `OPCODE_JALR } +// BRANCH +`define INSTR_BEQ { {17 {1'b?}}, 3'b000, {5 {1'b?}}, `OPCODE_BRANCH } +`define INSTR_BNE { {17 {1'b?}}, 3'b001, {5 {1'b?}}, `OPCODE_BRANCH } +`define INSTR_BLT { {17 {1'b?}}, 3'b100, {5 {1'b?}}, `OPCODE_BRANCH } +`define INSTR_BGE { {17 {1'b?}}, 3'b101, {5 {1'b?}}, `OPCODE_BRANCH } +`define INSTR_BLTU { {17 {1'b?}}, 3'b110, {5 {1'b?}}, `OPCODE_BRANCH } +`define INSTR_BGEU { {17 {1'b?}}, 3'b111, {5 {1'b?}}, `OPCODE_BRANCH } +// LOAD +`define INSTR_LB { {17 {1'b?}}, 3'b000, {5 {1'b?}}, `OPCODE_LOAD } +`define INSTR_LH { {17 {1'b?}}, 3'b001, {5 {1'b?}}, `OPCODE_LOAD } +`define INSTR_LW { {17 {1'b?}}, 3'b010, {5 {1'b?}}, `OPCODE_LOAD } +`define INSTR_LBU { {17 {1'b?}}, 3'b100, {5 {1'b?}}, `OPCODE_LOAD } +`define INSTR_LHU { {17 {1'b?}}, 3'b101, {5 {1'b?}}, `OPCODE_LOAD } +// STORE +`define INSTR_SB { {17 {1'b?}}, 3'b000, {5 {1'b?}}, `OPCODE_STORE } +`define INSTR_SH { {17 {1'b?}}, 3'b001, {5 {1'b?}}, `OPCODE_STORE } +`define INSTR_SW { {17 {1'b?}}, 3'b010, {5 {1'b?}}, `OPCODE_STORE } +// OPIMM +`define INSTR_ADDI { {17 {1'b?}}, 3'b000, {5 {1'b?}}, `OPCODE_OPIMM } +`define INSTR_SLTI { {17 {1'b?}}, 3'b010, {5 {1'b?}}, `OPCODE_OPIMM } +`define INSTR_SLTIU { {17 {1'b?}}, 3'b011, {5 {1'b?}}, `OPCODE_OPIMM } +`define INSTR_XORI { {17 {1'b?}}, 3'b100, {5 {1'b?}}, `OPCODE_OPIMM } +`define INSTR_ORI { {17 {1'b?}}, 3'b110, {5 {1'b?}}, `OPCODE_OPIMM } +`define INSTR_ANDI { {17 {1'b?}}, 3'b111, {5 {1'b?}}, `OPCODE_OPIMM } +`define INSTR_SLLI { 7'b0000000, {10 {1'b?}}, 3'b001, {5 {1'b?}}, `OPCODE_OPIMM } +`define INSTR_SRLI { 7'b0000000, {10 {1'b?}}, 3'b101, {5 {1'b?}}, `OPCODE_OPIMM } +`define INSTR_SRAI { 7'b0100000, {10 {1'b?}}, 3'b101, {5 {1'b?}}, `OPCODE_OPIMM } +// OP +`define INSTR_ADD { 7'b0000000, {10 {1'b?}}, 3'b000, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_SUB { 7'b0100000, {10 {1'b?}}, 3'b000, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_SLL { 7'b0000000, {10 {1'b?}}, 3'b001, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_SLT { 7'b0000000, {10 {1'b?}}, 3'b010, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_SLTU { 7'b0000000, {10 {1'b?}}, 3'b011, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_XOR { 7'b0000000, {10 {1'b?}}, 3'b100, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_SRL { 7'b0000000, {10 {1'b?}}, 3'b101, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_SRA { 7'b0100000, {10 {1'b?}}, 3'b101, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_OR { 7'b0000000, {10 {1'b?}}, 3'b110, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_AND { 7'b0000000, {10 {1'b?}}, 3'b111, {5 {1'b?}}, `OPCODE_OP } +// FENCE +`define INSTR_FENCE { 4'b0000, {8 {1'b?}}, {13 {1'b0}}, `OPCODE_FENCE } +`define INSTR_FENCEI { {17 {1'b0}}, 3'b001, {5 {1'b0}}, `OPCODE_FENCE } +// SYSTEM +`define INSTR_SCALL { {11 {1'b0}}, 1'b0, {13 {1'b0}}, `OPCODE_SYSTEM } +`define INSTR_SBREAK { {11 {1'b0}}, 1'b1, {13 {1'b0}}, `OPCODE_SYSTEM } +`define INSTR_RDCYCLE { 5'b11000, {5 {5'b0}}, 2'b00, {5 {1'b0}}, 3'b010, {5 {1'b?}}, `OPCODE_SYSTEM } +`define INSTR_RDCYCLEH { 5'b11001, {5 {5'b0}}, 2'b00, {5 {1'b0}}, 3'b010, {5 {1'b?}}, `OPCODE_SYSTEM } +`define INSTR_RDTIME { 5'b11000, {5 {5'b0}}, 2'b01, {5 {1'b0}}, 3'b010, {5 {1'b?}}, `OPCODE_SYSTEM } +`define INSTR_RDTIMEH { 5'b11001, {5 {5'b0}}, 2'b01, {5 {1'b0}}, 3'b010, {5 {1'b?}}, `OPCODE_SYSTEM } +`define INSTR_RDINSTRET { 5'b11000, {5 {5'b0}}, 2'b10, {5 {1'b0}}, 3'b010, {5 {1'b?}}, `OPCODE_SYSTEM } +`define INSTR_RDINSTRETH { 5'b11001, {5 {5'b0}}, 2'b10, {5 {1'b0}}, 3'b010, {5 {1'b?}}, `OPCODE_SYSTEM } + +// RV32M +`define INSTR_MUL { 7'b0000001, {10 {1'b?}}, 3'b000, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_MULH { 7'b0000001, {10 {1'b?}}, 3'b001, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_MULHSU { 7'b0000001, {10 {1'b?}}, 3'b010, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_MULHU { 7'b0000001, {10 {1'b?}}, 3'b011, {5 {1'b?}}, `OPCODE_OP } +/* +`define INSTR_DIV { 7'b0000001, {10 {1'b?}}, 3'b100, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_DIVU { 7'b0000001, {10 {1'b?}}, 3'b101, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_REM { 7'b0000001, {10 {1'b?}}, 3'b110, {5 {1'b?}}, `OPCODE_OP } +`define INSTR_REMU { 7'b0000001, {10 {1'b?}}, 3'b111, {5 {1'b?}}, `OPCODE_OP } +*/ + + + +////////////////////////////////////////////////////////////////////////////// +// _ _ _ _ ___ _ _ // +// / \ | | | | | | / _ \ _ __ ___ _ __ __ _| |_(_) ___ _ __ ___ // +// / _ \ | | | | | | | | | | '_ \ / _ \ '__/ _` | __| |/ _ \| '_ \/ __| // +// / ___ \| |__| |_| | | |_| | |_) | __/ | | (_| | |_| | (_) | | | \__ \ // +// /_/ \_\_____\___/ \___/| .__/ \___|_| \__,_|\__|_|\___/|_| |_|___/ // +// |_| // +////////////////////////////////////////////////////////////////////////////// + +`define ALU_OP_WIDTH 6 + +// Movhi Operation +`define ALU_MOVHI 6'b001111 +// Standard Operations +`define ALU_ADD 6'b000_000 +`define ALU_ADDC 6'b000_001 +`define ALU_SUB 6'b000_010 +`define ALU_AND 6'b000_011 +`define ALU_OR 6'b000_100 +`define ALU_XOR 6'b000_101 + +`define ALU_AVG 6'b000_110 +`define ALU_AVGU 6'b000_111 +// Shift Operations +`define ALU_SLL 6'b0010_00 +`define ALU_SRL 6'b0010_01 +`define ALU_SRA 6'b0010_10 +`define ALU_ROR 6'b0010_11 +// Set Lower Than Operations +`define ALU_SLTS 6'b0011_00 +`define ALU_SLTU 6'b0011_01 +// CMOV operation +`define ALU_CMOV 6'b0011_10 +// Extension Operations +`define ALU_EXTHS 6'b010_000 +`define ALU_EXTWS 6'b010_001 +`define ALU_EXTBS 6'b010_010 +`define ALU_EXTWZ 6'b010_011 +`define ALU_EXTHZ 6'b010_100 +`define ALU_EXTBZ 6'b010_110 +// No Operation +`define ALU_NOP 6'b011111 +// Comparison Operations +`define ALU_EQ 6'b10_0000 +`define ALU_NE 6'b10_0001 +`define ALU_GTU 6'b10_0010 +`define ALU_GEU 6'b10_0011 +`define ALU_LTU 6'b10_0100 +`define ALU_LEU 6'b10_0101 +`define ALU_GTS 6'b10_1010 +`define ALU_GES 6'b10_1011 +`define ALU_LTS 6'b10_1100 +`define ALU_LES 6'b10_1101 + +`define ALU_MIN 6'b10_1110 +`define ALU_MINU 6'b11_1110 +`define ALU_MAX 6'b10_1111 +`define ALU_MAXU 6'b11_1111 + +`define ALU_ABS 6'b11_1010 + +`define ALU_INS 6'b11_1101 +`define ALU_EXT 6'b11_1100 + +`define ALU_CNT 6'b11_0000 +`define ALU_FF1 6'b11_0010 +`define ALU_FL1 6'b11_0011 +`define ALU_CLB 6'b11_0001 + + +// Vector Mode +`define VEC_MODE32 2'b00 +`define VEC_MODE16 2'b10 +`define VEC_MODE8 2'b11 +`define VEC_MODE216 2'b01 + +// ALU comparison mode +`define ALU_CMP_FULL 2'b00 +`define ALU_CMP_ANY 2'b01 +`define ALU_CMP_ALL 2'b10 + + +//////////////////////////////////////////////////////// +// ____ ____ ____ _ _ // +// / ___|| _ \ | _ \ ___ __ _(_)___| |_ ___ _ __ // +// \___ \| |_) | | |_) / _ \/ _` | / __| __/ _ \ '__| // +// ___) | __/ | _ < __/ (_| | \__ \ || __/ | // +// |____/|_| |_| \_\___|\__, |_|___/\__\___|_| // +// |___/ // +//////////////////////////////////////////////////////// + +// Special-Purpose Register Addresses +// see OpenRISC manual p. 22ff +`define SP_NPC 16'h0010 +`define SP_SR 16'h0011 +`define SP_MACLO 16'h2801 // TODO: remove +`define SP_MACHI 16'h2802 // TODO: remove +`define SP_EPCR 16'd0032 +`define SP_ESR 16'd0064 +`define SP_DVR0 16'h3000 +`define SP_DCR0 16'h3008 +`define SP_DMR1 16'h3010 +`define SP_DMR2 16'h3011 + +// Core and Cluster ID are put into the system control and status +// registers group +`define SP_COREID 16'h0680 +`define SP_CLUSTERID 16'h0681 + +`define SP_DVR_MSB 8'h00 +`define SP_DCR_MSB 8'h01 +`define SP_DMR_MSB 8'h02 +`define SP_DSR_MSB 8'h04 + +// Supervision Register +`define SR_IEE 5'd2 +`define SR_F 5'd9 +`define SR_CY 5'd10 +`define SR_OV 5'd11 +`define SR_DSX 5'd13 + +//igor addon +`define REG_A 20:16 +`define REG_B 15:11 + +// forwarding operand mux +`define SEL_REGFILE 2'b00 +`define SEL_FW_EX 2'b01 +`define SEL_FW_WB 2'b10 + +// operand a selection +`define OP_A_REGA_OR_FWD 2'b00 +`define OP_A_CURRPC 2'b10 +`define OP_A_IMM16 2'b11 +`define OP_A_ZERO 2'b11 + +// operand b selection +`define OP_B_REGB_OR_FWD 2'b00 +`define OP_B_REGC_OR_FWD 2'b01 +`define OP_B_IMM 2'b10 + +// immediate selection +// - `define IMM_5N11 4'b0000 +// - `define IMM_21S 4'b0001 +// - `define IMM_8Z 4'b0010 +// - `define IMM_16Z 4'b0011 +// - `define IMM_16 4'b0100 +// - `define IMM_11S 4'b0101 +// - `define IMM_5N6S 4'b0110 +// - `define IMM_VEC 4'b0111 +// - `define IMM_HEX4 4'b1000 +`define IMM_I 3'b000 +`define IMM_S 3'b010 +`define IMM_SB 3'b011 +`define IMM_U 3'b100 +`define IMM_UJ 3'b101 + + +// PC mux selector defines +`define INCR_PC 3'b000 +`define NO_INCR 3'b001 +`define PC_FROM_REGFILE 3'b010 +`define PC_FROM_IMM 3'b011 +`define PC_EXCEPTION 3'b100 +`define EXC_PC_REG 3'b101 +`define HWLOOP_ADDR 3'b110 +`define PC_BRANCH_PRED 3'b111 + +// Exception PC mux selector defines +`define EXC_PC_NO_INCR 2'b00 +`define EXC_PC_ILLINSN 2'b01 +`define EXC_PC_IRQ 2'b10 +`define EXC_PC_IRQ_NM 2'b11 + +// Exceptions offsets +// It is assumed that the lower 8 bits are enough for all exception +// offsets, so the upper 24 bits of the boot address are used and the +// lower 8 bits of the exception offset +`define EXC_ILLINSN 8'h30 +`define EXC_IRQ 8'h38 +`define EXC_IRQ_NM 8'h70 + + +// Hardware loops addon +`define HWLOOP_REGS 2 + +// Debug module +`define N_WP 2 // #Watchpoints +`define DCR_DP 0 +`define DCR_CC 3:1 +`define DCR_SC 4 +`define DCR_CT 7:5 + +`define DMR1_ST 22 +`define DMR2_WGB0 12 +`define DMR2_WBS0 22 + +`define DSR_IIE 0 +`define DSR_INTE 1 + + + +// TCDM_ADDRES PRE CALCULATION --> Bring part of the alu_adder_o calculation in the ID stage +//`define TCDM_ADDR_PRECAL + + //`define BRANCH_PREDICTION \ No newline at end of file diff --git a/instr_core_interface.sv b/instr_core_interface.sv new file mode 100644 index 00000000..2306d22c --- /dev/null +++ b/instr_core_interface.sv @@ -0,0 +1,280 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// DEI @ UNIBO - University of Bologna // +// // +// Engineer: Igor Loi - igor.loi@unibo.it // +// // +// Additional contributions by: // +// // +// // +// Create Date: 06/08/2014 // +// Design Name: Instruction Fetch interface // +// Module Name: instr_core_interface.sv // +// Project Name: OR10N // +// Language: SystemVerilog // +// // +// Description: Instruction Fetch interface used to properly handle // +// cache stalls // +// // +// Revision: // +// Revision v0.1 - File Created // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + + + +module instr_core_interface + ( + input logic clk, + input logic rst_n, + + + input logic req_i, + input logic [31:0] addr_i, + output logic ack_o, + output logic [31:0] rdata_o, + + + output logic instr_req_o, + output logic [31:0] instr_addr_o, + input logic instr_gnt_i, + input logic instr_r_valid_i, + input logic [31:0] instr_r_rdata_i, + + input logic stall_if_i, + + input logic drop_request_i + ); + + + enum logic [2:0] {IDLE, PENDING, WAIT_RVALID, WAIT_IF_STALL, WAIT_GNT, ABORT} CS, NS; + + logic save_rdata; + logic [31:0] rdata_Q; + + logic wait_gnt; + logic [31:0] addr_Q; + + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + CS <= IDLE; + rdata_Q <= '0; + addr_Q <= '0; + end + else + begin + CS <= NS; + + if(wait_gnt) + addr_Q <= addr_i; + + if(save_rdata) + rdata_Q <= instr_r_rdata_i; + end + end + + + always_comb + begin + + instr_req_o = 1'b0; + ack_o = 1'b0; + save_rdata = 1'b0; + rdata_o = instr_r_rdata_i; + instr_addr_o = addr_i; + + wait_gnt = 1'b0; + + case(CS) + + IDLE : + begin + instr_req_o = req_i; + ack_o = 1'b0; + rdata_o = rdata_Q; + + + if(req_i) + begin + if(instr_gnt_i) //~> granted request + NS = PENDING; + else begin //~> got a request but no grant + NS = WAIT_GNT; + wait_gnt = 1'b1; + end + end //~> if(req_i == 0) + else + begin + NS = IDLE; + end + end // case: IDLE + + + WAIT_GNT : + begin + + instr_addr_o = addr_Q; + instr_req_o = 1'b1; + + if(instr_gnt_i) + NS = PENDING; + else + begin +// if (drop_request_i) +// NS = IDLE; +// else + NS = WAIT_GNT; + end + + end // case: WAIT_GNT + + + PENDING : + begin + + if(instr_r_valid_i) + begin + save_rdata = 1'b1; + + ack_o = 1'b1; + if(stall_if_i) + begin + instr_req_o = 1'b0; + NS = WAIT_IF_STALL; + end + else + begin + instr_req_o = req_i; + if(req_i) + begin + if( instr_gnt_i ) + begin + NS = PENDING; + end + else + begin + NS = WAIT_GNT; + wait_gnt = 1'b1; + end + end + else + NS = IDLE; + end + end + else + begin + NS = WAIT_RVALID; + instr_req_o = 1'b0; + ack_o = 1'b0; + end + end // case: PENDING + + WAIT_RVALID : + begin + + + if(instr_r_valid_i) + begin + + ack_o = 1'b1; + save_rdata = 1'b1; + + + if(stall_if_i) + begin + instr_req_o = 1'b0; + NS = WAIT_IF_STALL; + + end + else + begin + instr_req_o = req_i; + if(req_i) + if(instr_gnt_i) + NS = PENDING; + else begin + NS = WAIT_GNT; + wait_gnt = 1'b1; + end + else + NS = IDLE; + end + + end + else + begin + NS = WAIT_RVALID; + ack_o = 1'b0; + instr_req_o = 1'b0; + end + end // case: WAIT_RVALID + + + + WAIT_IF_STALL : + begin + ack_o = 1'b1; + rdata_o = rdata_Q; + + if(stall_if_i) + begin + instr_req_o = 1'b0; + NS = WAIT_IF_STALL; + end + else + begin + + instr_req_o = req_i; + if(req_i) + if(instr_gnt_i) + NS = PENDING; + else begin + NS = WAIT_GNT; + wait_gnt = 1'b1; + end + else + NS = IDLE; + end + + + + end // case: WAIT_IF_STALL + + ABORT: + begin + ack_o = 1'b1; + instr_req_o = 1'b1; + if(req_i) + begin + if(instr_gnt_i) //~> granted request + NS = PENDING; + else begin //~> got a request but no grant + NS = WAIT_GNT; + wait_gnt = 1'b1; + end + end //~> if(req_i == 0) + else + begin + NS = IDLE; + end + end // case: ABORT + + + default : + begin + NS = IDLE; + instr_req_o = 1'b0; + end + + endcase + + end + + +endmodule \ No newline at end of file diff --git a/load_store_unit.sv b/load_store_unit.sv new file mode 100644 index 00000000..0133d7d4 --- /dev/null +++ b/load_store_unit.sv @@ -0,0 +1,507 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// DEI @ UNIBO - University of Bologna // +// // +// Engineer: Igor Loi - igor.loi@unibo.it // +// // +// Additional contributions by: // +// // +// // +// Create Date: 01/07/2014 // +// Design Name: Load Store Unit // +// Module Name: load_store_unit.sv // +// Project Name: OR10N // +// Language: SystemVerilog // +// // +// Description: Load Store Unit, used to eliminate multiple access during // +// processor stalls, and to align bytes and halfwords // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (August 6th 2014) Added stall stupport when ID stage is // +// stalled // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + + +`include "defines.sv" + +module load_store_unit +( + input logic clk, + input logic rst_n, + + // signals from ex stage + input logic data_we_ex_i, // write enable -> from ex stage + input logic [1:0] data_type_ex_i, // Data type word, halfword, byte -> from ex stage + input logic [31:0] data_wdata_ex_i, // data to write to memory -> from ex stage + input logic data_sign_ext_ex_i, // sign extension -> from ex stage + input logic [1:0] data_reg_offset_ex_i, // offset inside register for stores -> from ex stage + output logic [31:0] data_rdata_ex_o, // requested data -> to ex stage + output logic [31:0] lsu_data_reg_o, // requested data registered -> to id stage + input logic data_req_ex_i, // data request -> from ex stage + input logic [31:0] data_addr_ex_i, // data address -> from ex stage + output logic data_ack_int_o, // data ack -> to controller + + input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline + output logic data_misaligned_o, // misaligned access was detected -> to controller + + // output to data memory + output logic data_req_o, + output logic [31:0] data_addr_o, + output logic data_we_o, + + output logic [3:0] data_be_o, + output logic [31:0] data_wdata_o, + input logic [31:0] data_rdata_i, + input logic data_rvalid_i, + input logic data_gnt_i, + + // stall signal + input logic ex_stall_i +); + + // registers for data_rdata alignment and sign extension + logic [1:0] data_type_q; + logic [1:0] rdata_offset_q; + logic data_sign_ext_q; + + logic [1:0] wdata_offset; // mux control for data to be written to memory + + // signals for tcdm contention + logic [3:0] data_be, data_be_q; + logic [31:0] data_wdata, data_wdata_q; + logic data_we_q; + logic [31:0] data_addr_q; + + logic misaligned_st; // high if we are currently performing the second part of a misaligned store + logic misaligned_st_q; // register for misaligned_st + + logic request_entered; + + enum logic [2:0] { IDLE, WAIT_GNT, PENDING_W_EX_STALL_2, PENDING_W_EX_STALL_1, PENDING_WO_EX_STALL} CS, NS; + + logic latch_rdata; + logic [31:0] rdata_q; + + ///////////////////////////////// BE generation //////////////////////////////// + always_comb + begin + casex (data_type_ex_i) // Data type 00 Word, 01 Half word, 11,10 byte + 2'b00: + begin // Writing a word + if (misaligned_st == 1'b0) + begin // non-misaligned case + case (data_addr_ex_i[1:0]) + 2'b00: data_be = 4'b1111; + 2'b01: data_be = 4'b1110; + 2'b10: data_be = 4'b1100; + 2'b11: data_be = 4'b1000; + endcase; // case (data_addr_ex_i[1:0]) + end + else + begin // misaligned case + case (data_addr_ex_i[1:0]) + 2'b00: data_be = 4'b0000; // this is not used, but included for completeness + 2'b01: data_be = 4'b0001; + 2'b10: data_be = 4'b0011; + 2'b11: data_be = 4'b0111; + endcase; // case (data_addr_ex_i[1:0]) + end + end + + 2'b01: + begin // Writing a half word + if (misaligned_st == 1'b0) + begin // non-misaligned case + case (data_addr_ex_i[1:0]) + 2'b00: data_be = 4'b0011; + 2'b01: data_be = 4'b0110; + 2'b10: data_be = 4'b1100; + 2'b11: data_be = 4'b1000; + endcase; // case (data_addr_ex_i[1:0]) + end + else + begin // misaligned case + data_be = 4'b0001; + end + end + + 2'b1X: begin // Writing a byte + case (data_addr_ex_i[1:0]) + 2'b00: data_be = 4'b0001; + 2'b01: data_be = 4'b0010; + 2'b10: data_be = 4'b0100; + 2'b11: data_be = 4'b1000; + endcase; // case (data_addr_ex_i[1:0]) + end + endcase; // casex (data_type_ex_i) + end + + // prepare data to be written to the memory + // we handle misaligned accesses, half word and byte accesses and + // register offsets here + assign wdata_offset = data_addr_ex_i[1:0] - data_reg_offset_ex_i[1:0]; + always_comb + begin + case (wdata_offset) + 2'b00: data_wdata = data_wdata_ex_i[31:0]; + 2'b01: data_wdata = {data_wdata_ex_i[23:0], data_wdata_ex_i[31:24]}; + 2'b10: data_wdata = {data_wdata_ex_i[15:0], data_wdata_ex_i[31:16]}; + 2'b11: data_wdata = {data_wdata_ex_i[ 7:0], data_wdata_ex_i[31: 8]}; + endcase; // case (wdata_offset) + end + + + // FF for rdata + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + data_type_q <= '0; + rdata_offset_q <= '0; + data_sign_ext_q <= '0; + end + else if (request_entered == 1'b1) // request entered FSM + begin + data_type_q <= data_type_ex_i; + rdata_offset_q <= data_addr_ex_i[1:0]; + data_sign_ext_q <= data_sign_ext_ex_i; + end + end + + // pipeline gnt signal + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + data_ack_int_o <= 1'b0; + end + else + begin + data_ack_int_o <= ~((data_req_o == 1'b1) & (data_gnt_i == 1'b0)); + end + end + + // FF for not accepted requests + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + data_be_q <= '0; + data_addr_q <= '0; + data_we_q <= '0; + data_wdata_q <= '0; + misaligned_st_q <= 1'b0; + end + else if ((data_req_o == 1'b1) & (data_gnt_i == 1'b0)) // request was not granted + begin + data_be_q <= data_be_o; + data_addr_q <= data_addr_o; + data_we_q <= data_we_o; + data_wdata_q <= data_wdata_o; + misaligned_st_q <= misaligned_st; + end + end + + //////////////////////////////////////////////////////////////////////// + // ____ _ _____ _ _ // + // / ___|(_) __ _ _ __ | ____|_ _| |_ ___ _ __ ___(_) ___ _ __ // + // \___ \| |/ _` | '_ \ | _| \ \/ / __/ _ \ '_ \/ __| |/ _ \| '_ \ // + // ___) | | (_| | | | | | |___ > <| || __/ | | \__ \ | (_) | | | | // + // |____/|_|\__, |_| |_| |_____/_/\_\\__\___|_| |_|___/_|\___/|_| |_| // + // |___/ // + //////////////////////////////////////////////////////////////////////// + + logic [31:0] data_rdata_ext; + + logic [31:0] rdata_w_ext; // sign extension for words, actually only misaligned assembly + logic [31:0] rdata_h_ext; // sign extension for half words + logic [31:0] rdata_b_ext; // sign extension for bytes + + // take care of misaligned words + always_comb + begin + case (rdata_offset_q) + 2'b00: rdata_w_ext = data_rdata_i[31:0]; + 2'b01: rdata_w_ext = {data_rdata_i[ 7:0], rdata_q[31:8]}; + 2'b10: rdata_w_ext = {data_rdata_i[15:0], rdata_q[31:16]}; + 2'b11: rdata_w_ext = {data_rdata_i[23:0], rdata_q[31:24]}; + endcase + end + + // sign extension for half words + always_comb + begin + case (rdata_offset_q) + 2'b00: + begin + if (data_sign_ext_q == 1'b0) + rdata_h_ext = {16'h0000, data_rdata_i[15:0]}; + else + rdata_h_ext = {{16{data_rdata_i[15]}}, data_rdata_i[15:0]}; + end + + 2'b01: + begin + if (data_sign_ext_q == 1'b0) + rdata_h_ext = {16'h0000, data_rdata_i[23:8]}; + else + rdata_h_ext = {{16{data_rdata_i[23]}}, data_rdata_i[23:8]}; + end + + 2'b10: + begin + if (data_sign_ext_q == 1'b0) + rdata_h_ext = {16'h0000, data_rdata_i[31:16]}; + else + rdata_h_ext = {{16{data_rdata_i[31]}}, data_rdata_i[31:16]}; + end + + 2'b11: + begin + if (data_sign_ext_q == 1'b0) + rdata_h_ext = {16'h0000, data_rdata_i[7:0], rdata_q[31:24]}; + else + rdata_h_ext = {{16{data_rdata_i[7]}}, data_rdata_i[7:0], rdata_q[31:24]}; + end + endcase // case (rdata_offset_q) + end + + // sign extension for bytes + always_comb + begin + case (rdata_offset_q) + 2'b00: + begin + if (data_sign_ext_q == 1'b0) + rdata_b_ext = {24'h00_0000, data_rdata_i[7:0]}; + else + rdata_b_ext = {{24{data_rdata_i[7]}}, data_rdata_i[7:0]}; + end + + 2'b01: begin + if (data_sign_ext_q == 1'b0) + rdata_b_ext = {24'h00_0000, data_rdata_i[15:8]}; + else + rdata_b_ext = {{24{data_rdata_i[15]}}, data_rdata_i[15:8]}; + end + + 2'b10: + begin + if (data_sign_ext_q == 1'b0) + rdata_b_ext = {24'h00_0000, data_rdata_i[23:16]}; + else + rdata_b_ext = {{24{data_rdata_i[23]}}, data_rdata_i[23:16]}; + end + + 2'b11: + begin + if (data_sign_ext_q == 1'b0) + rdata_b_ext = {24'h00_0000, data_rdata_i[31:24]}; + else + rdata_b_ext = {{24{data_rdata_i[31]}}, data_rdata_i[31:24]}; + end + endcase // case (rdata_offset_q) + end + + // select word, half word or byte sign extended version + always_comb + begin + case (data_type_q) + 2'b00: data_rdata_ext = rdata_w_ext; + 2'b01: data_rdata_ext = rdata_h_ext; + 2'b10,2'b11: data_rdata_ext = rdata_b_ext; + endcase //~case(rdata_type_q) + end + + + + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + CS <= IDLE; + rdata_q <= '0; + end + else + begin + CS <= NS; + + if(latch_rdata) + begin + // if we have detected a misaligned access, and we are + // currently doing the first part of this access, then + // store the data coming from memory in rdata_q. + // In all other cases, rdata_q gets the value that we are + // writing to the register file + if ((data_misaligned_ex_i == 1'b1) || (data_misaligned_o == 1'b1)) + rdata_q <= data_rdata_i; + else + rdata_q <= data_rdata_ext; + end + end + end + + // output to register file + assign data_rdata_ex_o = (latch_rdata == 1'b1) ? data_rdata_ext : rdata_q; + + // registered result of data request + assign lsu_data_reg_o = rdata_q; + + + // FSM + always_comb + begin + data_req_o = 1'b0; + data_we_o = 1'b0; + data_addr_o = data_addr_ex_i; + data_wdata_o = data_wdata; + data_be_o = data_be; + misaligned_st = data_misaligned_ex_i; + latch_rdata = 1'b0; + request_entered = 1'b0; + + case(CS) + IDLE: + begin + data_req_o = data_req_ex_i; + data_we_o = data_we_ex_i; + + if(data_req_ex_i) + begin + request_entered = 1'b1; + + if(data_gnt_i) + begin + if(ex_stall_i) + NS = PENDING_W_EX_STALL_1; + else + NS = PENDING_WO_EX_STALL; + end + else + begin + if(ex_stall_i) + NS = IDLE; + else + begin + NS = WAIT_GNT; + end + end + end + else + NS = IDLE; + end //~ IDLE + + WAIT_GNT: + begin + data_req_o = 1'b1; + data_we_o = data_we_q; + + data_addr_o = data_addr_q; + data_be_o = data_be_q; + data_wdata_o = data_wdata_q; + misaligned_st = misaligned_st_q; + + if(data_gnt_i) + begin + NS = PENDING_WO_EX_STALL; + end + else + begin + NS = WAIT_GNT; + end + end // case: WAIT_GNT + + PENDING_WO_EX_STALL: + begin + latch_rdata = ~data_we_o; + + data_req_o = data_req_ex_i; + data_we_o = data_we_ex_i; + + if(data_req_ex_i) + begin + request_entered = 1'b1; + + if(data_gnt_i) + begin + if(ex_stall_i) + NS = PENDING_W_EX_STALL_1; + else + NS = PENDING_WO_EX_STALL; + end + else + begin + if(ex_stall_i) + NS = IDLE; + else + NS = WAIT_GNT; + end + end + else + NS = IDLE; + end //~PENDING_WO_EX_STALL + + PENDING_W_EX_STALL_1 : + begin + data_req_o = 1'b0; + + latch_rdata = ~data_we_o; + + if(ex_stall_i) + begin + NS = PENDING_W_EX_STALL_2; + end + else + begin + NS = IDLE; + end + end //~ PENDING_W_EX_STALL_1 + + PENDING_W_EX_STALL_2 : + begin + if(ex_stall_i) + begin + NS = PENDING_W_EX_STALL_2; + end + else + begin + NS = IDLE; + end + end //~ PENDING_W_EX_STALL_2 + + default : + begin + NS = IDLE; + end + endcase + end + + // check for misaligned accesses that need a second memory access + // If one is detected, this is signaled with data_misaligned_o to + // the controller which selectively stalls the pipeline + always_comb + begin + data_misaligned_o = 1'b0; + + if((data_req_ex_i == 1'b1) && (data_misaligned_ex_i == 1'b0)) + begin + case (data_type_ex_i) + 2'b00: // word + begin + if(data_addr_ex_i[1:0] != 2'b00) + data_misaligned_o = 1'b1; + end + 2'b01: // half word + begin + if(data_addr_ex_i[1:0] == 2'b11) + data_misaligned_o = 1'b1; + end + endcase // case (data_type_ex_i) + end + end + +endmodule diff --git a/mult.sv b/mult.sv new file mode 100644 index 00000000..d5bb5080 --- /dev/null +++ b/mult.sv @@ -0,0 +1,125 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineer: Matthias Baer - baermatt@student.ethz.ch // +// // +// Additional contributions by: // +// Andreas Traber - atraber@student.ethz.ch // +// // +// // +// Create Date: 19/09/2013 // +// Design Name: Pipelined Processor // +// Module Name: mult.sv // +// Project Name: Processor // +// Language: SystemVerilog // +// // +// Description: Multiplier of the pipelined processor // +// Design ware multiplier requires two cycles to complete. // +// Generic multiplier requires only one cycle. result will be // +// stored in a FF. Best synthesis results are achieved with // +// moving the result register in the multiplier with automatic// +// retiming! // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (Oct 30th 2014) Added MAC to the multiplier // +// Revision v0.3 - (Jan 21th 2015) Changed to a 32 bit result for // +// multiplications, added vectorial support and subword // +// selection. There are no flags for multiplications anymore! // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + + +`include "defines.sv" + +module mult +( + input logic [1:0] vector_mode_i, + input logic [1:0] sel_subword_i, + input logic [1:0] signed_mode_i, + input logic use_carry_i, + input logic mac_en_i, + + input logic [31:0] op_a_i, + input logic [31:0] op_b_i, + input logic [31:0] mac_i, + input logic carry_i, + + output logic [31:0] result_o, + output logic carry_o, + output logic overflow_o +); + + logic [32:0] result; + + logic [31:0] op_a_sel; + logic [31:0] op_b_sel; + logic [32:0] mac_int; + + + assign mac_int = (mac_en_i == 1'b1) ? mac_i : 32'b0; + + // this block performs the subword selection and sign extensions + always_comb + begin + op_a_sel = op_a_i; + op_b_sel = op_b_i; + + if(vector_mode_i == `VEC_MODE216) + begin + if(sel_subword_i[1] == 1'b1) + op_a_sel[15:0] = op_a_i[31:16]; + else + op_a_sel[15:0] = op_a_i[15:0]; + + if(sel_subword_i[0] == 1'b1) + op_b_sel[15:0] = op_b_i[31:16]; + else + op_b_sel[15:0] = op_b_i[15:0]; + + op_a_sel[31:16] = {16{signed_mode_i[1] & op_a_sel[15]}}; + op_b_sel[31:16] = {16{signed_mode_i[0] & op_b_sel[15]}}; + end + end + + + always_comb + begin + case(vector_mode_i) + default: // VEC_MODE32, VEC_MODE216 + begin + result[32: 0] = mac_int + op_a_sel * op_b_sel + (use_carry_i & carry_i); + end + + `VEC_MODE16: + begin + result[15: 0] = op_a_sel[15: 0] * op_b_sel[15: 0]; + result[31:16] = op_a_sel[31:16] * op_b_sel[31:16]; + result[32] = 1'b0; + end + + `VEC_MODE8: + begin + result[ 7: 0] = op_a_sel[ 7: 0] * op_b_sel[ 7: 0]; + result[15: 8] = op_a_sel[15: 8] * op_b_sel[15: 8]; + result[23:16] = op_a_sel[23:16] * op_b_sel[23:16]; + result[31:24] = op_a_sel[31:24] * op_b_sel[31:24]; + result[32] = 1'b0; + end + endcase; // case (vec_mode_i) + end + + assign result_o = result[31:0]; + + assign carry_o = result[32]; + + // overflow is only used for MAC + // If the MSB of the input MAC and the result is not the same => overflow occurred + assign overflow_o = mac_i[31] ^ result[31]; + +endmodule // mult + diff --git a/register_file.sv b/register_file.sv new file mode 100644 index 00000000..b9629b1f --- /dev/null +++ b/register_file.sv @@ -0,0 +1,163 @@ +module riscv_register_file +#( + parameter ADDR_WIDTH = 5, + parameter DATA_WIDTH = 32 +) +( + // Clock and Reset + input logic clk, + input logic rst_n, + + //Read port R1 + input logic [ADDR_WIDTH-1:0] raddr_a_i, + output logic [DATA_WIDTH-1:0] rdata_a_o, + + //Read port R2 + input logic [ADDR_WIDTH-1:0] raddr_b_i, + output logic [DATA_WIDTH-1:0] rdata_b_o, + + //Read port R3 + input logic [ADDR_WIDTH-1:0] raddr_c_i, + output logic [DATA_WIDTH-1:0] rdata_c_o, + + // Write port W1 + input logic [ADDR_WIDTH-1:0] waddr_a_i, + input logic [DATA_WIDTH-1:0] wdata_a_i, + input logic we_a_i, + + // Write port W2 + input logic [ADDR_WIDTH-1:0] waddr_b_i, + input logic [DATA_WIDTH-1:0] wdata_b_i, + input logic we_b_i +); + + localparam NUM_WORDS = 2**ADDR_WIDTH; + + // Read address register, located at the input of the address decoder + logic [ADDR_WIDTH-1:0] RAddrRegxDPa; + logic [ADDR_WIDTH-1:0] RAddrRegxDPb; + logic [ADDR_WIDTH-1:0] RAddrRegxDPc; + logic [NUM_WORDS-1:0] RAddrOneHotxD; + logic [ADDR_WIDTH-1:0] s_raddr_c; + + logic [DATA_WIDTH-1:0] MemContentxDP[NUM_WORDS]; + + logic [NUM_WORDS-1:0] WAddrOneHotxDa; + logic [NUM_WORDS-1:0] WAddrOneHotxDb; + logic [NUM_WORDS-1:0] WAddrOneHotxDb_reg; + + logic [NUM_WORDS-1:0] ClocksxC; + logic [DATA_WIDTH-1:0] WDataIntxDa; + logic [DATA_WIDTH-1:0] WDataIntxDb; + + logic clk_int; + + logic we_int; + + int unsigned i; + int unsigned j; + int unsigned k; + int unsigned l; + int unsigned m; + + genvar x; + genvar y; + + assign we_int = we_a_i | we_b_i; + + cluster_clock_gating CG_WE_GLOBAL + ( + .clk_o(clk_int), + .en_i(we_int), + .test_en_i(1'b0), + .clk_i(clk) + ); + + //----------------------------------------------------------------------------- + //-- READ : Read address decoder RAD + //----------------------------------------------------------------------------- + assign rdata_a_o = MemContentxDP[raddr_a_i]; + assign rdata_b_o = MemContentxDP[raddr_b_i]; + assign rdata_c_o = MemContentxDP[raddr_c_i]; + + //----------------------------------------------------------------------------- + //-- WRITE : Write Address Decoder (WAD), combinatorial process + //----------------------------------------------------------------------------- + always_comb + begin : p_WADa + for(i=1; i load store unit + logic data_we_ex; + logic [1:0] data_type_ex; + logic data_sign_ext_ex; + logic [1:0] data_reg_offset_ex; + logic data_req_ex; + logic [31:0] data_addr_ex; + logic data_misaligned_ex; + logic [31:0] data_rdata_int; + logic [31:0] lsu_data_reg; + logic data_ack_int; + + // Supervision Register + logic set_flag_ex; + logic set_carry_ex; + logic set_overflow_ex; + logic set_carry_fw_ex; + logic set_overflow_fw_ex; + logic set_dsx; + + // Direct Supervision-Register access + logic sr_flag; + logic sr_flag_fw; + logic carry_sp; + + // Calculation Result + logic [15:0] result_wb; + + // Signals between instruction core interface and pipe (if and id stages) + logic [31:0] instr_rdata_int; // read instruction from the instruction core interface to if_stage + logic instr_req_int; // Id stage asserts a req to instruction core interface + logic instr_ack_int; // instr core interface acks the request now (read data is available) + logic [31:0] instr_addr_int; // adress sent to the inst core interface from if_Stage + + // Interrupts + logic irq_enable; + logic [31:0] epcr; + logic save_pc_if; + logic save_pc_id; + logic save_sr; + logic restore_sr; + + // hwloops + logic [31:0] hwloop_cnt_ex; // from id to ex stage (hwloop_regs) + logic [2:0] hwloop_we_ex; // from id to ex stage (hwloop_regs) + logic [1:0] hwloop_regid_ex; // from id to ex stage (hwloop_regs) + logic hwloop_wb_mux_sel_ex; // from id to ex stage (hwloop_regs) + logic [31:0] hwloop_start_data; // hwloop data to write to hwloop_regs + logic [31:0] hwloop_end_data; // hwloop data to write to hwloop_regs + logic [31:0] hwloop_cnt_data; // hwloop data to write to hwloop_regs + + + logic [`HWLOOP_REGS-1:0] [31:0] hwloop_start_addr; // to hwloop controller + logic [`HWLOOP_REGS-1:0] [31:0] hwloop_end_addr; // to hwloop controller + logic [`HWLOOP_REGS-1:0] [31:0] hwloop_counter; // to hwloop controller + logic [`HWLOOP_REGS-1:0] hwloop_dec_cnt; // from hwloop controller to hwloop regs + logic [31:0] hwloop_targ_addr; // from hwloop controller to if stage + + // Debug Unit + logic dbg_stall; + logic dbg_flush_pipe; + logic pipe_flushed; + logic dbg_trap; + logic dbg_st_en; // single-step trace mode enabled + logic [1:0] dbg_dsr; // Debug Stop Register + + logic dbg_reg_mux; + logic dbg_sp_mux; + logic dbg_reg_we; + logic [15:0] dbg_reg_addr; + logic [31:0] dbg_reg_wdata; + logic [31:0] dbg_reg_rdata; + logic [31:0] dbg_rdata; + + logic [31:0] dbg_npc; + logic dbg_set_npc; + +`ifdef BRANCH_PREDICTION + logic wrong_branch_taken; + logic take_branch; +`endif + logic drop_instruction; + +`ifdef TCDM_ADDR_PRECAL + logic [31:0] alu_adder_ex; +`endif + + + + ////////////////////////////////////////////////// + // ___ _____ ____ _____ _ ____ _____ // + // |_ _| ___| / ___|_ _|/ \ / ___| ____| // + // | || |_ \___ \ | | / _ \| | _| _| // + // | || _| ___) || |/ ___ \ |_| | |___ // + // |___|_| |____/ |_/_/ \_\____|_____| // + // // + ////////////////////////////////////////////////// + if_stage if_stage_i + ( + // Global signals reset and clock + .clk ( clk ), // Clock + .rst_n ( rst_n ), // active low reset + + // Boot address for exception vector offsets + .boot_addr_i ( boot_addr_i ), + + // outputs to ID stage + .instr_rdata_id_o ( instr_rdata_id ), // Output of IF Pipeline stage + .current_pc_if_o ( current_pc_if ), // current pc + .current_pc_id_o ( current_pc_id ), // current pc + + //Input - OUtput from-to instruction memory + .instr_rdata_i ( instr_rdata_int ), // From Instr memory + .instr_addr_o ( instr_addr_int ), // address for instruction fetch to instr memory/cache + + // Forwrding ports - control signals + .force_nop_i ( force_nop_id ), // select incoming instr or NOP + .exception_pc_reg_i ( epcr ), // Exception PC register + .pc_from_regfile_i ( pc_from_regfile_id ), // pc from reg file + .pc_from_immediate_i ( pc_from_immediate_id ), // pc from immediate + .pc_from_hwloop_i ( hwloop_targ_addr ), // pc from hwloop start address + .pc_mux_sel_i ( pc_mux_sel_id ), // sel for pc multiplexer + .pc_mux_boot_i ( pc_mux_boot ), // load boot address as PC + .exc_pc_mux_i ( exc_pc_mux_id ), // selector for exception multiplexer + + // from debug unit + .dbg_pc_from_npc ( dbg_npc ), + .dbg_set_npc ( dbg_set_npc ), + + // branch prediction + .drop_instruction_i ( drop_instruction ), +`ifdef BRANCH_PREDICTION + .wrong_branch_taken_i( wrong_branch_taken ), + .take_branch_i ( take_branch ), +`endif + // pipeline stalls + .stall_if_i ( stall_if ), + .stall_id_i ( stall_id ) + ); + + + /////////////////////////////////////////////////////////////////////////////////// + // ___ _ _ ____ _____ ____ ____ ___ ____ _____ ___ _ _ _____ _____ // + // |_ _| \ | / ___|_ _| _ \ / ___/ _ \| _ \| ____| |_ _| \ | |_ _| ___| // + // | || \| \___ \ | | | |_) | | | | | | | |_) | _| | || \| | | | | |_ // + // | || |\ |___) || | | _ < | |__| |_| | _ <| |___ | || |\ | | | | _| // + // |___|_| \_|____/ |_| |_| \_\ \____\___/|_| \_\_____| |___|_| \_| |_| |_| // + // // + /////////////////////////////////////////////////////////////////////////////////// + instr_core_interface instr_core_interface_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + .stall_if_i ( stall_if ), + + .req_i ( instr_req_int ), + .addr_i ( instr_addr_int ), + .ack_o ( instr_ack_int ), + .rdata_o ( instr_rdata_int ), + + .instr_req_o ( instr_req_o ), + .instr_addr_o ( instr_addr_o ), + .instr_gnt_i ( instr_grant_i ), + .instr_r_valid_i ( instr_rvalid_i ), + .instr_r_rdata_i ( instr_rdata_i ), + + .drop_request_i ( wrong_branch_taken ) + ); + + + ///////////////////////////////////////////////// + // ___ ____ ____ _____ _ ____ _____ // + // |_ _| _ \ / ___|_ _|/ \ / ___| ____| // + // | || | | | \___ \ | | / _ \| | _| _| // + // | || |_| | ___) || |/ ___ \ |_| | |___ // + // |___|____/ |____/ |_/_/ \_\____|_____| // + // // + ///////////////////////////////////////////////// + id_stage id_stage_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + // Processor Enable + .fetch_enable_i ( fetch_enable_i ), + + .core_busy_o ( core_busy_o ), + + // Interface to instruction memory + .instr_rdata_i ( instr_rdata_id ), + .instr_req_o ( instr_req_int ), + .instr_gnt_i ( instr_grant_i ), + .instr_ack_i ( instr_ack_int ), + + .pc_mux_sel_o ( pc_mux_sel_id ), + .pc_mux_boot_o ( pc_mux_boot ), + .exc_pc_mux_o ( exc_pc_mux_id ), + .force_nop_o ( force_nop_id ), + + .pc_from_regfile_fw_o ( pc_from_regfile_id ), + .current_pc_if_i ( current_pc_if ), + .current_pc_id_i ( current_pc_id ), + .pc_from_immediate_o ( pc_from_immediate_id ), + + .sr_flag_fw_i ( sr_flag_fw ), + .sr_flag_i ( sr_flag ), + + .drop_instruction_o ( drop_instruction ), +`ifdef BRANCH_PREDICTION + .wrong_branch_taken_o ( wrong_branch_taken ), + .take_branch_o ( take_branch ), +`endif + // STALLS + .stall_if_o ( stall_if ), + .stall_id_o ( stall_id ), + .stall_ex_o ( stall_ex ), + .stall_wb_o ( stall_wb ), + + // From the Pipeline ID/EX + .regfile_rb_data_ex_o ( regfile_rb_data_ex ), + + .alu_operand_a_ex_o ( alu_operand_a_ex ), + .alu_operand_b_ex_o ( alu_operand_b_ex ), + .alu_operand_c_ex_o ( alu_operand_c_ex ), + .alu_operator_ex_o ( alu_operator_ex ), + + .vector_mode_ex_o ( vector_mode_ex ), // from ID to EX stage + .alu_cmp_mode_ex_o ( alu_cmp_mode_ex ), // from ID to EX stage + .alu_vec_ext_ex_o ( alu_vec_ext_ex ), // from ID to EX stage + + .eoc_ex_o ( eoc_ex ), + + .mult_is_running_ex_o ( mult_is_running_ex ), // from ID to EX stage + .mult_sel_subword_ex_o ( mult_sel_subword_ex ), // from ID to EX stage + .mult_signed_mode_ex_o ( mult_signed_mode_ex ), // from ID to EX stage + .mult_use_carry_ex_o ( mult_use_carry_ex ), // from ID to EX stage + .mult_mac_en_ex_o ( mult_mac_en_ex ), // from ID to EX stage + + .regfile_waddr_ex_o ( regfile_waddr_ex ), + .regfile_wdata_mux_sel_ex_o ( regfile_wdata_mux_sel_ex ), + .regfile_we_ex_o ( regfile_we_ex ), + + .regfile_alu_we_ex_o ( regfile_alu_we_ex ), + .regfile_alu_waddr_ex_o ( regfile_alu_waddr_ex ), + + // hwloop signals + .hwloop_we_ex_o ( hwloop_we_ex ), + .hwloop_regid_ex_o ( hwloop_regid_ex ), + .hwloop_wb_mux_sel_ex_o ( hwloop_wb_mux_sel_ex ), + .hwloop_cnt_o ( hwloop_cnt_ex ), + .hwloop_dec_cnt_o ( hwloop_dec_cnt ), + .hwloop_targ_addr_o ( hwloop_targ_addr ), + + .sp_we_ex_o ( sp_we_ex ), + + .prepost_useincr_ex_o ( useincr_addr_ex ), + .data_misaligned_i ( data_misaligned ), + + .data_we_ex_o ( data_we_ex ), // to load store unit + .data_type_ex_o ( data_type_ex ), // to load store unit + .data_sign_ext_ex_o ( data_sign_ext_ex ), // to load store unit + .data_reg_offset_ex_o ( data_reg_offset_ex ), // to load store unit + .data_req_ex_o ( data_req_ex ), // to load store unit + .data_misaligned_ex_o ( data_misaligned_ex ), // to load store unit + .data_ack_i ( data_ack_int ), // from load store unit + .data_rvalid_i ( data_r_valid_i ), + + .set_flag_ex_o ( set_flag_ex ), // to ex_stage + .set_carry_ex_o ( set_carry_ex ), // to ex_stage + .set_overflow_ex_o ( set_overflow_ex ), // to ex_stage + .set_dsx_o ( set_dsx ), // to SPR + + // Interrupt Signals + .irq_i ( irq_i ), // incoming interrupts + .irq_nm_i ( irq_nm_i ), // incoming interrupts + .irq_enable_i ( irq_enable ), // global interrupt enable + .save_pc_if_o ( save_pc_if ), // control signal to save pc + .save_pc_id_o ( save_pc_id ), // control signal to save pc + .save_sr_o ( save_sr ), // control signal to save status register + .restore_sr_o ( restore_sr ), // restore status register + + // from hwloop regs + .hwloop_start_addr_i ( hwloop_start_addr ), + .hwloop_end_addr_i ( hwloop_end_addr ), + .hwloop_counter_i ( hwloop_counter ), + + // Debug Unit Signals + .dbg_flush_pipe_i ( dbg_flush_pipe ), + .pipe_flushed_o ( pipe_flushed ), + .dbg_st_en_i ( dbg_st_en ), + .dbg_dsr_i ( dbg_dsr ), + .dbg_stall_i ( dbg_stall ), + .dbg_trap_o ( dbg_trap ), + .dbg_reg_mux_i ( dbg_reg_mux ), + .dbg_reg_we_i ( dbg_reg_we ), + .dbg_reg_addr_i ( dbg_reg_addr[4:0] ), + .dbg_reg_wdata_i ( dbg_reg_wdata ), + .dbg_reg_rdata_o ( dbg_reg_rdata ), + .dbg_set_npc_i ( dbg_set_npc ), + + // Forward Signals + .regfile_alu_waddr_fw_i ( regfile_alu_waddr_fw ), + .regfile_alu_we_fw_i ( regfile_alu_we_fw ), + .regfile_alu_wdata_fw_i ( regfile_alu_wdata_fw ), + .regfile_alu_wdata_fw_pc_i ( regfile_alu_wdata_fw_pc ), + + .regfile_waddr_wb_i ( regfile_waddr_fw_wb_o ), // Write address ex-wb pipeline + .regfile_we_wb_i ( regfile_we_wb ), // write enable for the register file + .regfile_wdata_wb_i ( regfile_wdata ), // write data to commit in the register file + .wdata_reg_i ( wdata_reg_fw_id ) // write data to regfile, origin is always a register(not data memory) +`ifdef TCDM_ADDR_PRECAL + , + .alu_adder_o ( alu_adder_ex ) +`endif + ); + + + + ///////////////////////////////////////////////////// + // _______ __ ____ _____ _ ____ _____ // + // | ____\ \/ / / ___|_ _|/ \ / ___| ____| // + // | _| \ / \___ \ | | / _ \| | _| _| // + // | |___ / \ ___) || |/ ___ \ |_| | |___ // + // |_____/_/\_\ |____/ |_/_/ \_\____|_____| // + // // + ///////////////////////////////////////////////////// + ex_stage ex_stage_i + ( + // Global signals: Clock and active low asynchronous reset + .clk ( clk ), + .rst_n ( rst_n ), + + // Alu signals from ID stage + .alu_operator_i ( alu_operator_ex ), // from ID/EX pipe registers + .alu_operand_a_i ( alu_operand_a_ex ), // from ID/EX pipe registers + .alu_operand_b_i ( alu_operand_b_ex ), // from ID/EX pipe registers + .alu_operand_c_i ( alu_operand_c_ex ), // from ID/EX pipe registers + .alu_carry_i ( carry_sp ), // from spr carry + .alu_flag_i ( sr_flag ), // from spr flag + .alu_flag_o ( alu_flag_ex ), // to spr flag + + .vector_mode_i ( vector_mode_ex ), // from ID/EX pipe registers + .alu_cmp_mode_i ( alu_cmp_mode_ex ), // from ID/EX pipe registers + .alu_vec_ext_i ( alu_vec_ext_ex ), // from ID/EX pipe registers + + // Multipler + .mult_is_running_i ( mult_is_running_ex ), + .mult_sel_subword_i ( mult_sel_subword_ex ), + .mult_signed_mode_i ( mult_signed_mode_ex ), + .mult_use_carry_i ( mult_use_carry_ex ), + .mult_mac_en_i ( mult_mac_en_ex ), + + // interface with Special registers + .carry_o ( carry_ex ), + .overflow_o ( overflow_ex ), + .set_overflow_o ( set_overflow_fw_ex ), // to special registers + .set_carry_o ( set_carry_fw_ex ), // to special registers + + // input from ID stage + .stall_ex_i ( stall_ex ), + .stall_wb_i ( stall_wb ), + + .prepost_useincr_i ( useincr_addr_ex ), + + // From ID Stage: Regfile control signals + .regfile_waddr_i ( regfile_waddr_ex ), + .regfile_wdata_mux_sel_i ( regfile_wdata_mux_sel_ex ), + .regfile_we_i ( regfile_we_ex ), + + .regfile_alu_we_i ( regfile_alu_we_ex ), + .regfile_alu_waddr_i ( regfile_alu_waddr_ex ), + + // From ID stage: hwloop wb reg signals + .hwloop_wb_mux_sel_i ( hwloop_wb_mux_sel_ex ), + .hwloop_pc_plus4_i ( current_pc_id ), + .hwloop_cnt_i ( hwloop_cnt_ex ), + + //From ID stage.Controller + .set_overflow_i ( set_overflow_ex ), + .set_carry_i ( set_carry_ex ), + + .eoc_i ( eoc_ex ), + .regfile_rb_data_i ( regfile_rb_data_ex ), + .sp_we_i ( sp_we_ex ), + + + // Output of ex stage pipeline + .regfile_wdata_wb_o ( result_wb ), + .regfile_waddr_wb_o ( regfile_waddr_fw_wb_o ), + .regfile_wdata_mux_sel_wb_o ( regfile_wdata_mux_sel_wb ), + .regfile_we_wb_o ( regfile_we_wb ), + .regfile_rb_data_wb_o ( regfile_rb_data_wb ), + + .data_addr_ex_o ( data_addr_ex ), + + // To hwloop regs + .hwloop_start_data_o ( hwloop_start_data ), + .hwloop_end_data_o ( hwloop_end_data ), + .hwloop_cnt_data_o ( hwloop_cnt_data ), + + .sp_we_wb_o ( sp_we_wb ), + .eoc_o ( eoc_wb ), + + // To ID stage: Forwarding signals + .regfile_alu_waddr_fw_o ( regfile_alu_waddr_fw ), + .regfile_alu_we_fw_o ( regfile_alu_we_fw ), + .regfile_alu_wdata_fw_o ( regfile_alu_wdata_fw ), + .regfile_alu_wdata_fw_pc_o ( regfile_alu_wdata_fw_pc ) + +`ifdef TCDM_ADDR_PRECAL + , + .alu_adder_i ( alu_adder_ex ) +`endif + ); + + + + ///////////////////////////////////////////////////////// + // __ ______ ____ _____ _ ____ _____ // + // \ \ / / __ ) / ___|_ _|/ \ / ___| ____| // + // \ \ /\ / /| _ \ \___ \ | | / _ \| | _| _| // + // \ V V / | |_) | ___) || |/ ___ \ |_| | |___ // + // \_/\_/ |____/ |____/ |_/_/ \_\____|_____| // + // // + ///////////////////////////////////////////////////////// + wb_stage wb_stage_i + ( + //Mux selector of regfile wdata + .regfile_wdata_mux_sel_i ( regfile_wdata_mux_sel_wb ), + //Mux inputs + .sp_rdata_i ( sp_rdata ), + .data_rdata_i ( data_rdata_int ), + .lsu_data_reg_i ( lsu_data_reg ), + //Mux output + .regfile_wdata_o ( regfile_wdata ), + .wdata_reg_o ( wdata_reg_fw_id ), + + .eoc_i ( eoc_wb ), + .eoc_o ( eoc_o ) + ); + + + //////////////////////////////////////////////////////////////////////////////////////// + // _ ___ _ ____ ____ _____ ___ ____ _____ _ _ _ _ ___ _____ // + // | | / _ \ / \ | _ \ / ___|_ _/ _ \| _ \| ____| | | | | \ | |_ _|_ _| // + // | | | | | |/ _ \ | | | | \___ \ | || | | | |_) | _| | | | | \| || | | | // + // | |__| |_| / ___ \| |_| | ___) || || |_| | _ <| |___ | |_| | |\ || | | | // + // |_____\___/_/ \_\____/ |____/ |_| \___/|_| \_\_____| \___/|_| \_|___| |_| // + // // + //////////////////////////////////////////////////////////////////////////////////////// + load_store_unit load_store_unit_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + // signal from ex stage + .data_we_ex_i ( data_we_ex ), + .data_type_ex_i ( data_type_ex ), + .data_wdata_ex_i ( regfile_rb_data_ex ), + .data_reg_offset_ex_i ( data_reg_offset_ex ), + + .data_sign_ext_ex_i ( data_sign_ext_ex ), // sign extension + .data_rdata_ex_o ( data_rdata_int ), + .lsu_data_reg_o ( lsu_data_reg ), + .data_req_ex_i ( data_req_ex ), + .data_addr_ex_i ( data_addr_ex ), + .data_ack_int_o ( data_ack_int ), // ack used in controller to stall + + .data_misaligned_ex_i ( data_misaligned_ex ), // from ID/EX pipeline + .data_misaligned_o ( data_misaligned ), + + //output to data memory + .data_be_o ( data_be_o ), + .data_wdata_o ( data_wdata_o ), + .data_rdata_i ( data_rdata_i ), + .data_rvalid_i ( data_r_valid_i ), + .data_addr_o ( data_addr_o ), + .data_we_o ( data_we_o ), + .data_req_o ( data_req_o ), + .data_gnt_i ( data_gnt_i ), + + .ex_stall_i ( stall_ex ) + ); + + /* + + + ////////////////////////////////////////////// + // ____ ____ ____ // + // / ___|| _ \ | _ \ ___ __ _ ___ // + // \___ \| |_) | | |_) / _ \/ _` / __| // + // ___) | __/ | _ < __/ (_| \__ \ // + // |____/|_| |_| \_\___|\__, |___/ // + // |___/ // + // Special Purpose REGISTERS // + ////////////////////////////////////////////// + sp_registers sp_registers_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + // Core and Cluster ID from outside + .core_id_i ( core_id_i ), + .cluster_id_i ( cluster_id_i ), + + // Interface to Special register (SRAM LIKE) + .sp_addr_i ( sp_addr ), + .sp_wdata_i ( sp_wdata ), + .sp_we_i ( sp_we ), // from ex-wb pipe regs + .sp_rdata_o ( sp_rdata ), // to write back stage + + + // Direct interface with MUL-ALU and Controller + // Flag + .flag_i ( alu_flag_ex ), // comparison flag + + // Overflow and Carry - From ALU + .carry_i ( carry_ex ), + .overflow_i ( overflow_ex ), + + // From the controller + .set_flag_i ( set_flag_ex ), // From EX stage + .set_carry_i ( set_carry_fw_ex ), // From EX stage + .set_overflow_i ( set_overflow_fw_ex ), // From EX stage + .set_dsx_i ( set_dsx ), // from exc controller + + // Stall direct write + .enable_direct_write_i ( stall_wb ), + + .curr_pc_if_i ( current_pc_if ), // from IF stage + .curr_pc_id_i ( current_pc_id ), // from IF stage + .save_pc_if_i ( save_pc_if ), + .save_pc_id_i ( save_pc_id ), + .save_sr_i ( save_sr ), + .restore_sr_i ( restore_sr ), + .epcr_o ( epcr ), + .irq_enable_o ( irq_enable ), + + .npc_o ( dbg_npc ), // PC from debug unit + .set_npc_o ( dbg_set_npc ), // set PC to new value + + .flag_fw_o ( sr_flag_fw ), + .flag_o ( sr_flag ), + .carry_o ( carry_sp ) + ); + + // Mux for SPR access through Debug Unit + assign sp_addr = (dbg_sp_mux == 1'b0) ? result_wb : dbg_reg_addr; + assign sp_wdata = (dbg_sp_mux == 1'b0) ? regfile_rb_data_wb : dbg_reg_wdata; + assign sp_we = (dbg_sp_mux == 1'b0) ? sp_we_wb : dbg_reg_we; + assign dbg_rdata = (dbg_sp_mux == 1'b0) ? dbg_reg_rdata : sp_rdata; + + + ////////////////////////////////////////////// + // Hardware Loop Registers // + ////////////////////////////////////////////// + hwloop_regs hwloop_regs_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + // from ex stage + .hwloop_start_data_i ( hwloop_start_data ), + .hwloop_end_data_i ( hwloop_end_data ), + .hwloop_cnt_data_i ( hwloop_cnt_data ), + .hwloop_we_i ( hwloop_we_ex ), + .hwloop_regid_i ( hwloop_regid_ex ), + + // from controller + .stall_id_i ( stall_id ), + + // to hwloop controller + .hwloop_start_addr_o ( hwloop_start_addr ), + .hwloop_end_addr_o ( hwloop_end_addr ), + .hwloop_counter_o ( hwloop_counter ), + + // from hwloop controller + .hwloop_dec_cnt_i ( hwloop_dec_cnt ) + ); + + + */ + + + ///////////////////////////////////////////////////////////// + // ____ _____ ____ _ _ ____ _ _ _ _ ___ _____ // + // | _ \| ____| __ )| | | |/ ___| | | | | \ | |_ _|_ _| // + // | | | | _| | _ \| | | | | _ | | | | \| || | | | // + // | |_| | |___| |_) | |_| | |_| | | |_| | |\ || | | | // + // |____/|_____|____/ \___/ \____| \___/|_| \_|___| |_| // + // // + ///////////////////////////////////////////////////////////// + debug_unit debug_unit_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + // Debug Interface + .dbginf_stall_i ( dbginf_stall_i ), + .dbginf_bp_o ( dbginf_bp_o ), + .dbginf_strobe_i ( dbginf_strobe_i ), + .dbginf_ack_o ( dbginf_ack_o ), + .dbginf_we_i ( dbginf_we_i ), + .dbginf_addr_i ( dbginf_addr_i ), + .dbginf_data_i ( dbginf_data_i ), + .dbginf_data_o ( dbginf_data_o ), + + // To/From Core + .dbg_st_en_o ( dbg_st_en ), + .dbg_dsr_o ( dbg_dsr ), + .stall_core_o ( dbg_stall ), + .flush_pipe_o ( dbg_flush_pipe ), + .pipe_flushed_i ( pipe_flushed ), + .trap_i ( dbg_trap ), + + // register file access + .regfile_mux_o ( dbg_reg_mux ), + .sp_mux_o ( dbg_sp_mux ), + .regfile_we_o ( dbg_reg_we ), + .regfile_addr_o ( dbg_reg_addr ), + .regfile_wdata_o ( dbg_reg_wdata ), + .regfile_rdata_i ( dbg_rdata ) + ); + + + +/////////////////// +endmodule // cpu // +/////////////////// diff --git a/wb_stage.sv b/wb_stage.sv new file mode 100644 index 00000000..6faaadac --- /dev/null +++ b/wb_stage.sv @@ -0,0 +1,82 @@ +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// DEI @ UNIBO - University of Bologna // +// // +// Engineer: Renzo Andri - andrire@student.ethz.ch // +// // +// Additional contributions by: // +// Igor Loi - igor.loi@unibo.it // +// // +// // +// Create Date: 01/07/2014 // +// Design Name: Write Back stage // +// Module Name: wb_stage.sv // +// Project Name: OR10N // +// Language: SystemVerilog // +// // +// Description: Execution stage: hosts a Multiplexer that select data to // +// write in the register file (from data interface or SP reg // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (August 6th 2014) Changed port and signal names, addedd // +// comments // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// +// sp = special register +// id = instruction decode +// if = instruction fetch +// ex = execute stage +// wb = write back +// data = from data memory + + +`include "defines.sv" + +module wb_stage +( + // MUX SELECTOR --> Used to select what write in the register file + input logic regfile_wdata_mux_sel_i, // Comes from the controller (thru id-ex and ex-wb pipe) + + // MUX INPUTS + input logic [31:0] sp_rdata_i, // From the read port of the special register + input logic [31:0] data_rdata_i, // read Data from data memory system + input logic [31:0] lsu_data_reg_i, // read data registered in LSU + // MUX OUTPUT + output logic [31:0] regfile_wdata_o, // write data for register file + output logic [31:0] wdata_reg_o, // goes to pc_mux, origin is always a register! + + input logic eoc_i, + output logic eoc_o + +); + + assign eoc_o = eoc_i; + + // Register Write Data Selection --> Data to write in the regfile + // Select between: + // 00,01: From EX stage (Alu Results) + // 10: From Special Register + // 11: From Data Memory + always_comb + begin : REGFILE_WDATA_MUX + casex (regfile_wdata_mux_sel_i) + 1'b0: begin regfile_wdata_o <= sp_rdata_i; end + 1'b1: begin regfile_wdata_o <= data_rdata_i; end + endcase; // case (regfile_wdata_mux_sel_i) + end + + // wdata_reg_o is very similar to regfile_wdata_o, except that the + // output of the LSU is registered. This signal is then used by the PC + // mux instead of regfile_wdata_o in case forwarding is necessary + always_comb + begin : WDATA_FW_MUX + casex (regfile_wdata_mux_sel_i) + 1'b0: begin wdata_reg_o <= sp_rdata_i; end + 1'b1: begin wdata_reg_o <= lsu_data_reg_i; end + endcase; // case (regfile_wdata_mux_sel_i) + end + +endmodule \ No newline at end of file