cva6/core/alu.sv
2024-07-10 11:02:18 +02:00

366 lines
14 KiB
Systemverilog

// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Matthias Baer <baermatt@student.ethz.ch>
// Author: Igor Loi <igor.loi@unibo.it>
// Author: Andreas Traber <atraber@student.ethz.ch>
// Author: Lukas Mueller <lukasmue@student.ethz.ch>
// Author: Florian Zaruba <zaruabf@iis.ee.ethz.ch>
//
// Date: 19.03.2017
// Description: Ariane ALU based on RI5CY's ALU
module alu
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type fu_data_t = logic
) (
// Subsystem Clock - SUBSYSTEM
input logic clk_i,
// Asynchronous reset active low - SUBSYSTEM
input logic rst_ni,
// FU data needed to execute instruction - ISSUE_STAGE
input fu_data_t fu_data_i,
// ALU result - ISSUE_STAGE
output logic [CVA6Cfg.XLEN-1:0] result_o,
// ALU branch compare result - branch_unit
output logic alu_branch_res_o
);
logic [CVA6Cfg.XLEN-1:0] operand_a_rev;
logic [ 31:0] operand_a_rev32;
logic [ CVA6Cfg.XLEN:0] operand_b_neg;
logic [CVA6Cfg.XLEN+1:0] adder_result_ext_o;
logic less; // handles both signed and unsigned forms
logic [ 31:0] rolw; // Rotate Left Word
logic [ 31:0] rorw; // Rotate Right Word
logic [31:0] orcbw, rev8w;
logic [ $clog2(CVA6Cfg.XLEN) : 0] cpop; // Count Population
logic [$clog2(CVA6Cfg.XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros
logic [ 4:0] lz_tz_wcount; // Count Leading Zeros Word
logic lz_tz_empty, lz_tz_wempty;
logic [CVA6Cfg.XLEN-1:0] orcbw_result, rev8w_result;
// bit reverse operand_a for left shifts and bit counting
generate
genvar k;
for (k = 0; k < CVA6Cfg.XLEN; k++)
assign operand_a_rev[k] = fu_data_i.operand_a[CVA6Cfg.XLEN-1-k];
for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k];
endgenerate
// ------
// Adder
// ------
logic adder_op_b_negate;
logic adder_z_flag;
logic [CVA6Cfg.XLEN:0] adder_in_a, adder_in_b;
logic [CVA6Cfg.XLEN-1:0] adder_result;
logic [CVA6Cfg.XLEN-1:0] operand_a_bitmanip, bit_indx;
always_comb begin
adder_op_b_negate = 1'b0;
unique case (fu_data_i.operation)
// ADDER OPS
EQ, NE, SUB, SUBW, ANDN, ORN, XNOR: adder_op_b_negate = 1'b1;
default: ;
endcase
end
always_comb begin
operand_a_bitmanip = fu_data_i.operand_a;
if (CVA6Cfg.RVB) begin
if (CVA6Cfg.IS_XLEN64) begin
unique case (fu_data_i.operation)
SH1ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1;
SH2ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2;
SH3ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3;
CTZW: operand_a_bitmanip = operand_a_rev32;
ADDUW, CPOPW, CLZW: operand_a_bitmanip = fu_data_i.operand_a[31:0];
default: ;
endcase
end
unique case (fu_data_i.operation)
SH1ADD: operand_a_bitmanip = fu_data_i.operand_a << 1;
SH2ADD: operand_a_bitmanip = fu_data_i.operand_a << 2;
SH3ADD: operand_a_bitmanip = fu_data_i.operand_a << 3;
CTZ: operand_a_bitmanip = operand_a_rev;
default: ;
endcase
end
end
// prepare operand a
assign adder_in_a = {operand_a_bitmanip, 1'b1};
// prepare operand b
assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {CVA6Cfg.XLEN + 1{adder_op_b_negate}};
assign adder_in_b = operand_b_neg;
// actual adder
assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
assign adder_result = adder_result_ext_o[CVA6Cfg.XLEN:1];
assign adder_z_flag = ~|adder_result;
// get the right branch comparison result
always_comb begin : branch_resolve
// set comparison by default
alu_branch_res_o = 1'b1;
case (fu_data_i.operation)
EQ: alu_branch_res_o = adder_z_flag;
NE: alu_branch_res_o = ~adder_z_flag;
LTS, LTU: alu_branch_res_o = less;
GES, GEU: alu_branch_res_o = ~less;
default: alu_branch_res_o = 1'b1;
endcase
end
// ---------
// Shifts
// ---------
// TODO: this can probably optimized significantly
logic shift_left; // should we shift left
logic shift_arithmetic;
logic [CVA6Cfg.XLEN-1:0] shift_amt; // amount of shift, to the right
logic [CVA6Cfg.XLEN-1:0] shift_op_a; // input of the shifter
logic [ 31:0] shift_op_a32; // input to the 32 bit shift operation
logic [CVA6Cfg.XLEN-1:0] shift_result;
logic [ 31:0] shift_result32;
logic [ CVA6Cfg.XLEN:0] shift_right_result;
logic [ 32:0] shift_right_result32;
logic [CVA6Cfg.XLEN-1:0] shift_left_result;
logic [ 31:0] shift_left_result32;
assign shift_amt = fu_data_i.operand_b;
assign shift_left = (fu_data_i.operation == SLL) | (CVA6Cfg.IS_XLEN64 && fu_data_i.operation == SLLW);
assign shift_arithmetic = (fu_data_i.operation == SRA) | (CVA6Cfg.IS_XLEN64 && fu_data_i.operation == SRAW);
// right shifts, we let the synthesizer optimize this
logic [CVA6Cfg.XLEN:0] shift_op_a_64;
logic [32:0] shift_op_a_32;
// choose the bit reversed or the normal input for shift operand a
assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a;
assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
assign shift_op_a_64 = {shift_arithmetic & shift_op_a[CVA6Cfg.XLEN-1], shift_op_a};
assign shift_op_a_32 = {shift_arithmetic & shift_op_a[31], shift_op_a32};
assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]);
assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]);
// bit reverse the shift_right_result for left shifts
genvar j;
generate
for (j = 0; j < CVA6Cfg.XLEN; j++)
assign shift_left_result[j] = shift_right_result[CVA6Cfg.XLEN-1-j];
for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j];
endgenerate
assign shift_result = shift_left ? shift_left_result : shift_right_result[CVA6Cfg.XLEN-1:0];
assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0];
// ------------
// Comparisons
// ------------
always_comb begin
logic sgn;
sgn = 1'b0;
if ((fu_data_i.operation == SLTS) ||
(fu_data_i.operation == LTS) ||
(fu_data_i.operation == GES) ||
(fu_data_i.operation == MAX) ||
(fu_data_i.operation == MIN))
sgn = 1'b1;
less = ($signed({sgn & fu_data_i.operand_a[CVA6Cfg.XLEN-1], fu_data_i.operand_a}) <
$signed({sgn & fu_data_i.operand_b[CVA6Cfg.XLEN-1], fu_data_i.operand_b}));
end
if (CVA6Cfg.RVB) begin : gen_bitmanip
// Count Population + Count population Word
popcount #(
.INPUT_WIDTH(CVA6Cfg.XLEN)
) i_cpop_count (
.data_i (operand_a_bitmanip),
.popcount_o(cpop)
);
// Count Leading/Trailing Zeros
// 64b
lzc #(
.WIDTH(CVA6Cfg.XLEN),
.MODE (1)
) i_clz_64b (
.in_i(operand_a_bitmanip),
.cnt_o(lz_tz_count),
.empty_o(lz_tz_empty)
);
if (CVA6Cfg.IS_XLEN64) begin
//32b
lzc #(
.WIDTH(32),
.MODE (1)
) i_clz_32b (
.in_i(operand_a_bitmanip[31:0]),
.cnt_o(lz_tz_wcount),
.empty_o(lz_tz_wempty)
);
end
end
if (CVA6Cfg.RVB) begin : gen_orcbw_rev8w_results
assign orcbw = {
{8{|fu_data_i.operand_a[31:24]}},
{8{|fu_data_i.operand_a[23:16]}},
{8{|fu_data_i.operand_a[15:8]}},
{8{|fu_data_i.operand_a[7:0]}}
};
assign rev8w = {
{fu_data_i.operand_a[7:0]},
{fu_data_i.operand_a[15:8]},
{fu_data_i.operand_a[23:16]},
{fu_data_i.operand_a[31:24]}
};
if (CVA6Cfg.IS_XLEN64) begin : gen_64b
assign orcbw_result = {
{8{|fu_data_i.operand_a[63:56]}},
{8{|fu_data_i.operand_a[55:48]}},
{8{|fu_data_i.operand_a[47:40]}},
{8{|fu_data_i.operand_a[39:32]}},
orcbw
};
assign rev8w_result = {
rev8w,
{fu_data_i.operand_a[39:32]},
{fu_data_i.operand_a[47:40]},
{fu_data_i.operand_a[55:48]},
{fu_data_i.operand_a[63:56]}
};
end else begin : gen_32b
assign orcbw_result = orcbw;
assign rev8w_result = rev8w;
end
end
// -----------
// Result MUX
// -----------
always_comb begin
result_o = '0;
if (CVA6Cfg.IS_XLEN64) begin
unique case (fu_data_i.operation)
// Add word: Ignore the upper bits and sign extend to 64 bit
ADDW, SUBW: result_o = {{CVA6Cfg.XLEN - 32{adder_result[31]}}, adder_result[31:0]};
SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result;
// Shifts 32 bit
SLLW, SRLW, SRAW:
result_o = {{CVA6Cfg.XLEN - 32{shift_result32[31]}}, shift_result32[31:0]};
default: ;
endcase
end
unique case (fu_data_i.operation)
// Standard Operations
ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[CVA6Cfg.XLEN:1];
ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[CVA6Cfg.XLEN:1];
XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[CVA6Cfg.XLEN:1];
// Adder Operations
ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD: result_o = adder_result;
// Shift Operations
SLL, SRL, SRA: result_o = (CVA6Cfg.IS_XLEN64) ? shift_result : shift_result32;
// Comparison Operations
SLTS, SLTU: result_o = {{CVA6Cfg.XLEN - 1{1'b0}}, less};
default: ; // default case to suppress unique warning
endcase
if (CVA6Cfg.RVB) begin
// Index for Bitwise Rotation
bit_indx = 1 << (fu_data_i.operand_b & (CVA6Cfg.XLEN - 1));
// rolw, roriw, rorw
rolw = ({{CVA6Cfg.XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{CVA6Cfg.XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (CVA6Cfg.XLEN-32-fu_data_i.operand_b[4:0]));
rorw = ({{CVA6Cfg.XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{CVA6Cfg.XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (CVA6Cfg.XLEN-32-fu_data_i.operand_b[4:0]));
if (CVA6Cfg.IS_XLEN64) begin
unique case (fu_data_i.operation)
CLZW, CTZW:
result_o = (lz_tz_wempty) ? 32 : {{CVA6Cfg.XLEN - 5{1'b0}}, lz_tz_wcount}; // change
ROLW: result_o = {{CVA6Cfg.XLEN - 32{rolw[31]}}, rolw};
RORW, RORIW: result_o = {{CVA6Cfg.XLEN - 32{rorw[31]}}, rorw};
default: ;
endcase
end
unique case (fu_data_i.operation)
// Integer minimum/maximum
MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
// Single bit instructions operations
BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx;
BEXT, BEXTI: result_o = {{CVA6Cfg.XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)};
BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx;
BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx;
// Count Leading/Trailing Zeros
CLZ, CTZ:
result_o = (lz_tz_empty) ? ({{CVA6Cfg.XLEN - $clog2(CVA6Cfg.XLEN) {1'b0}}, lz_tz_count} + 1)
: {{CVA6Cfg.XLEN - $clog2(CVA6Cfg.XLEN) {1'b0}}, lz_tz_count};
// Count population
CPOP, CPOPW: result_o = {{(CVA6Cfg.XLEN - ($clog2(CVA6Cfg.XLEN) + 1)) {1'b0}}, cpop};
// Sign and Zero Extend
SEXTB: result_o = {{CVA6Cfg.XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]};
SEXTH: result_o = {{CVA6Cfg.XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]};
ZEXTH: result_o = {{CVA6Cfg.XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]};
// Bitwise Rotation
ROL:
result_o = (CVA6Cfg.IS_XLEN64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (CVA6Cfg.XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (CVA6Cfg.XLEN-fu_data_i.operand_b[4:0])));
ROR, RORI:
result_o = (CVA6Cfg.IS_XLEN64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (CVA6Cfg.XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (CVA6Cfg.XLEN-fu_data_i.operand_b[4:0])));
ORCB: result_o = orcbw_result;
REV8: result_o = rev8w_result;
default:
if (fu_data_i.operation == SLLIUW && CVA6Cfg.IS_XLEN64)
result_o = {{CVA6Cfg.XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0]; // Left Shift 32 bit unsigned
endcase
end
if (CVA6Cfg.RVZiCond) begin
unique case (fu_data_i.operation)
CZERO_EQZ:
result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1
CZERO_NEZ:
result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1
default: ; // default case to suppress unique warning
endcase
end
end
endmodule