diff --git a/alu.sv b/alu.sv index ea223f3d..9b3a8c0e 100644 --- a/alu.sv +++ b/alu.sv @@ -27,6 +27,9 @@ module riscv_alu ( + input logic clk, + input logic rst_n, + input logic [`ALU_OP_WIDTH-1:0] operator_i, input logic [31:0] operand_a_i, input logic [31:0] operand_b_i, @@ -38,7 +41,10 @@ module riscv_alu input logic [ 1:0] imm_vec_ext_i, output logic [31:0] result_o, - output logic comparison_result_o + output logic comparison_result_o, + + output logic ready_o, + input logic ex_ready_i ); @@ -562,6 +568,47 @@ module riscv_alu assign bclr_result = operand_a_i & bmask_inv; assign bset_result = operand_a_i | bmask; + //////////////////////////////////////////////////// + // ____ _____ __ __ ____ _____ __ __ // + // | _ \_ _\ \ / / / / | _ \| ____| \/ | // + // | | | | | \ \ / / / / | |_) | _| | |\/| | // + // | |_| | | \ V / / / | _ <| |___| | | | // + // |____/___| \_/ /_/ |_| \_\_____|_| |_| // + // // + //////////////////////////////////////////////////// + + logic [31:0] result_div; + + logic div_valid; + logic div_ready; + logic div_signed; + logic div_rem_quot; + + + assign div_valid = (operator_i == `ALU_DIV) || (operator_i == `ALU_DIVU) || + (operator_i == `ALU_REM) || (operator_i == `ALU_REMU); + + assign div_rem_quot = operator_i[1]; + + assign div_signed = operator_i[0]; + + riscv_alu_div div_i + ( + .clk ( clk ), + .rst_n ( rst_n ), + + .a_i ( operand_a_i ), + .b_i ( operand_b_i ), + .signed_i ( div_signed ), + .rem_quot_i ( div_rem_quot ), + + .result_o ( result_div ), + + .div_valid_i ( div_valid ), + .div_ready_o ( div_ready ), + .ex_ready_i ( ex_ready_i ) + ); + //////////////////////////////////////////////////////// // ____ _ _ __ __ // // | _ \ ___ ___ _ _| | |_ | \/ |_ ___ __ // @@ -632,9 +679,14 @@ module riscv_alu `ALU_CLB: result_o = {26'h0, clb_result}; `ALU_CNT: result_o = {26'h0, cnt_result}; + // Division Unit Commands + `ALU_DIV, `ALU_DIVU, + `ALU_REM, `ALU_REMU: result_o = result_div; + default: ; // default case to suppress unique warning endcase end -endmodule + assign ready_o = div_ready; +endmodule diff --git a/alu_div.sv b/alu_div.sv new file mode 100644 index 00000000..ff494c5d --- /dev/null +++ b/alu_div.sv @@ -0,0 +1,205 @@ +// Copyright 2016 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// this module produces both the absolute (positive) value and the negative +// value for any input. Both signed and unsigned numbers are supported +module riscv_alu_abs_neg +( + input logic [31:0] in_i, // can be either signed or unsigned + input logic signed_i, + + output logic [32:0] abs_o, // needs to be 33 bits wide to allow for -(-2**31) + output logic [32:0] neg_o // needs to be 33 bits wide to allow for -(2**32-1) +); + + logic [32:0] in_neg; + + assign in_neg = -{(signed_i & in_i[31]), in_i}; + + assign abs_o = (signed_i & in_i[31]) ? in_neg : {1'b0, in_i}; + assign neg_o = (signed_i & in_i[31]) ? {1'b1, in_i} : in_neg; + +endmodule + + +module riscv_alu_div +( + input logic clk, + input logic rst_n, + + input logic [31:0] a_i, // can be either signed or unsigned + input logic [31:0] b_i, // can be either signed or unsigned + input logic signed_i, + input logic rem_quot_i, // 1 for rem, 0 for div + + output logic [31:0] result_o, + + // handshake + input logic div_valid_i, // valid data available for division + output logic div_ready_o, // set when done or idle + + input logic ex_ready_i // if we have to wait for next stage +); + + enum logic [1:0] { IDLE, DIV, DIV_DONE } CS, NS; + + logic [31:0] quotient_q, quotient_n; + logic [63:0] remainder_q, remainder_n, remainder_int; + + logic [31:0] remainder_out; + logic [31:0] quotient_out; + logic [31:0] result_int; + + logic [32:0] a_abs; + logic [32:0] a_neg; + + logic [32:0] b_abs; + logic [32:0] b_neg; + + logic [32:0] sub_val; + + logic a_is_neg; + logic b_is_neg; + logic result_negate; + logic quot_negate; + logic rem_negate; + logic geq_b; + logic load_r; + logic is_active; + logic [4:0] counter_q, counter_n; + + riscv_alu_abs_neg b_abs_neg_i + ( + .in_i ( b_i ), + .signed_i ( signed_i ), + + .abs_o ( b_abs ), + .neg_o ( b_neg ) + ); + + riscv_alu_abs_neg a_abs_neg_i + ( + .in_i ( a_i ), + .signed_i ( signed_i ), + + .abs_o ( a_abs ), + .neg_o ( a_neg ) + ); + + always_comb + begin + NS = CS; + div_ready_o = 1'b1; + load_r = 1'b0; + is_active = 1'b0; + counter_n = counter_q - 1; + + case (CS) + IDLE: begin + div_ready_o = 1'b1; + + if (div_valid_i) begin + div_ready_o = 1'b0; + NS = DIV; + load_r = 1'b1; + is_active = 1'b1; + counter_n = 31; + end + end + + DIV: begin + div_ready_o = 1'b0; + is_active = 1'b1; + + if (counter_q == 0) begin + div_ready_o = 1'b1; + + if (ex_ready_i) + NS = IDLE; + else + NS = DIV_DONE; + end + end + + // if the next stage was stalled when we finished + DIV_DONE: begin + div_ready_o = 1'b1; + + if (ex_ready_i) + NS = IDLE; + end + endcase + end + + assign a_is_neg = a_i[31]; + assign b_is_neg = b_i[31]; + + assign quot_negate = ((a_is_neg ^ b_is_neg) && signed_i && (b_i != 0)); + assign rem_negate = a_is_neg && signed_i; + + assign geq_b = (remainder_q[63:31] >= b_abs); + + always_comb + begin + quotient_n = {quotient_q[30:0], 1'b0}; + sub_val = '0; + + if (geq_b) begin + sub_val = b_neg; + quotient_n = {quotient_q[30:0], 1'b1}; + end + end + + always_comb + begin + // add (or actually subtract) and shift left by one + remainder_int[63:32] = remainder_q[63:31] + sub_val; + remainder_int[31: 0] = {remainder_q[30:0], 1'b0}; + end + + assign remainder_n = load_r ? {31'b0, a_abs} : remainder_int; + + + //---------------------------------------------------------------------------- + // registers + //---------------------------------------------------------------------------- + + always_ff @(posedge clk, negedge rst_n) + begin + if (~rst_n) begin + quotient_q <= '0; + remainder_q <= '0; + counter_q <= 0; + CS <= IDLE; + end else begin + // only toggle when there is an active request + if (is_active) begin + quotient_q <= quotient_n; + remainder_q <= remainder_n; + counter_q <= counter_n; + end + CS <= NS; + end + end + + + //---------------------------------------------------------------------------- + // output assignments + //---------------------------------------------------------------------------- + + assign quotient_out = (CS == DIV) ? quotient_n : quotient_q; + assign remainder_out = (CS == DIV) ? remainder_n[63:32] : remainder_q[63:32]; + assign result_int = rem_quot_i ? remainder_out : quotient_out; + assign result_negate = rem_quot_i ? rem_negate : quot_negate; + + + assign result_o = result_negate ? -result_int : result_int; + +endmodule diff --git a/decoder.sv b/decoder.sv index 4ef76e71..b1e81f8d 100644 --- a/decoder.sv +++ b/decoder.sv @@ -461,13 +461,25 @@ module riscv_decoder {6'b10_0000, 3'b101}: alu_operator_o = `ALU_SRA; // Shift Right Arithmetic // supported RV32M instructions - {6'b00_0001, 3'b000}: mult_en = 1'b1; // Multiplication + {6'b00_0001, 3'b000}: mult_en = 1'b1; // Multiplication {6'b00_0001, 3'b001}: begin // MAC regc_used_o = 1'b1; regc_mux_o = `REGC_RD; mult_en = 1'b1; mult_mac_en = 1'b1; end + {6'b00_0001, 3'b100}: begin // div + alu_operator_o = `ALU_DIV; + end + {6'b00_0001, 3'b101}: begin // divu + alu_operator_o = `ALU_DIVU; + end + {6'b00_0001, 3'b110}: begin // rem + alu_operator_o = `ALU_REM; + end + {6'b00_0001, 3'b111}: begin // remu + alu_operator_o = `ALU_REMU; + end // PULP specific instructions {6'b00_0010, 3'b000}: alu_operator_o = `ALU_AVG; // Average diff --git a/ex_stage.sv b/ex_stage.sv index 4ef51269..cac2389c 100644 --- a/ex_stage.sv +++ b/ex_stage.sv @@ -94,6 +94,8 @@ module riscv_ex_stage logic [31:0] mult_result; logic alu_cmp_result; + logic alu_ready; + // EX stage result mux (ALU, MAC unit, CSR) assign alu_csr_result = csr_access_i ? csr_rdata_i : alu_result; @@ -121,18 +123,24 @@ module riscv_ex_stage riscv_alu alu_i ( - .operator_i ( alu_operator_i ), - .operand_a_i ( alu_operand_a_i ), - .operand_b_i ( alu_operand_b_i ), - .operand_c_i ( alu_operand_c_i ), + .clk ( clk ), + .rst_n ( rst_n ), - .vector_mode_i ( alu_vec_mode_i ), - .imm_bmask_a_i ( imm_bmask_a_i ), - .imm_bmask_b_i ( imm_bmask_b_i ), - .imm_vec_ext_i ( imm_vec_ext_i ), + .operator_i ( alu_operator_i ), + .operand_a_i ( alu_operand_a_i ), + .operand_b_i ( alu_operand_b_i ), + .operand_c_i ( alu_operand_c_i ), - .result_o ( alu_result ), - .comparison_result_o ( alu_cmp_result ) + .vector_mode_i ( alu_vec_mode_i ), + .imm_bmask_a_i ( imm_bmask_a_i ), + .imm_bmask_b_i ( imm_bmask_b_i ), + .imm_vec_ext_i ( imm_vec_ext_i ), + + .result_o ( alu_result ), + .comparison_result_o ( alu_cmp_result ), + + .ready_o ( alu_ready ), + .ex_ready_i ( ex_ready_o ) ); @@ -165,7 +173,7 @@ module riscv_ex_stage /////////////////////////////////////// always_ff @(posedge clk, negedge rst_n) begin : EX_WB_Pipeline_Register - if (rst_n == 1'b0) + if (~rst_n) begin regfile_waddr_wb_o <= '0; regfile_we_wb_o <= 1'b0; @@ -189,7 +197,7 @@ module riscv_ex_stage // As valid always goes to the right and ready to the left, and we are able // to finish branches without going to the WB stage, ex_valid does not // depend on ex_ready. - assign ex_ready_o = (lsu_ready_ex_i & wb_ready_i) | branch_in_ex_i; - assign ex_valid_o = (lsu_ready_ex_i & wb_ready_i); + assign ex_ready_o = (alu_ready & lsu_ready_ex_i & wb_ready_i) | branch_in_ex_i; + assign ex_valid_o = (alu_ready & lsu_ready_ex_i & wb_ready_i); endmodule diff --git a/include/riscv_defines.sv b/include/riscv_defines.sv index 0c16cc43..e4bb0909 100644 --- a/include/riscv_defines.sv +++ b/include/riscv_defines.sv @@ -155,6 +155,12 @@ `define ALU_MAXU 6'b010011 `define ALU_MAXU 6'b010011 +// div/rem +`define ALU_DIVU 6'b110000 // bit 0 is used for signed mode, bit 1 is used for remdiv +`define ALU_DIV 6'b110001 // bit 0 is used for signed mode, bit 1 is used for remdiv +`define ALU_REMU 6'b110010 // bit 0 is used for signed mode, bit 1 is used for remdiv +`define ALU_REM 6'b110011 // bit 0 is used for signed mode, bit 1 is used for remdiv + // vector modes `define VEC_MODE32 2'b00 @@ -276,15 +282,7 @@ // Debug module -`define N_WP 2 // #Watchpoints -`define DCR_DP 0 -`define DCR_CC 3:1 -`define DCR_SC 4 -`define DCR_CT 7:5 - `define DMR1_ST 22 -`define DMR2_WGB0 12 -`define DMR2_WBS0 22 `define DSR_IIE 0 `define DSR_INTE 1 diff --git a/include/riscv_tracer_defines.sv b/include/riscv_tracer_defines.sv index b5087496..fca1aec0 100644 --- a/include/riscv_tracer_defines.sv +++ b/include/riscv_tracer_defines.sv @@ -83,6 +83,10 @@ // RV32M `define INSTR_PMUL { 7'b0000001, 10'b?, 3'b000, 5'b?, `OPCODE_OP } +`define INSTR_DIV { 7'b0000001, 10'b?, 3'b100, 5'b?, `OPCODE_OP } +`define INSTR_DIVU { 7'b0000001, 10'b?, 3'b101, 5'b?, `OPCODE_OP } +`define INSTR_REM { 7'b0000001, 10'b?, 3'b110, 5'b?, `OPCODE_OP } +`define INSTR_REMU { 7'b0000001, 10'b?, 3'b111, 5'b?, `OPCODE_OP } `define INSTR_PMAC { 7'b0000001, 10'b?, 3'b001, 5'b?, `OPCODE_OP } // PULP custom instructions diff --git a/riscv_tracer.sv b/riscv_tracer.sv index 83c2808d..4c6acf6e 100644 --- a/riscv_tracer.sv +++ b/riscv_tracer.sv @@ -655,6 +655,10 @@ module riscv_tracer // PULP MULTIPLIER `INSTR_PMUL: trace.printRInstr("p.mul"); `INSTR_PMAC: trace.printR3Instr("p.mac"); + `INSTR_DIV: trace.printRInstr("div"); + `INSTR_DIVU: trace.printRInstr("divu"); + `INSTR_REM: trace.printRInstr("rem"); + `INSTR_REMU: trace.printRInstr("remu"); // opcodes with custom decoding {25'b?, `OPCODE_LOAD}: trace.printLoadInstr(); {25'b?, `OPCODE_LOAD_POST}: trace.printLoadInstr();