mirror of
https://github.com/openhwgroup/cve2.git
synced 2025-04-25 06:27:22 -04:00
Different optimizations in the ALU to make it smaller
- New generation for ff1, fl1, clb - Muxing for ff1, fl1, clb is different
This commit is contained in:
parent
28b5a8e8dc
commit
d1db668404
1 changed files with 177 additions and 53 deletions
230
alu.sv
230
alu.sv
|
@ -49,16 +49,30 @@ module riscv_alu
|
||||||
|
|
||||||
|
|
||||||
logic [31:0] operand_a_rev;
|
logic [31:0] operand_a_rev;
|
||||||
|
logic [31:0] operand_a_neg;
|
||||||
|
logic [31:0] operand_a_neg_rev;
|
||||||
|
|
||||||
|
assign operand_a_neg = ~operand_a_i;
|
||||||
|
|
||||||
// bit reverse operand_a for left shifts and bit counting
|
// bit reverse operand_a for left shifts and bit counting
|
||||||
genvar k;
|
|
||||||
generate
|
generate
|
||||||
|
genvar k;
|
||||||
for(k = 0; k < 32; k++)
|
for(k = 0; k < 32; k++)
|
||||||
begin
|
begin
|
||||||
assign operand_a_rev[k] = operand_a_i[31-k];
|
assign operand_a_rev[k] = operand_a_i[31-k];
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
// bit reverse operand_a_neg for left shifts and bit counting
|
||||||
|
generate
|
||||||
|
genvar m;
|
||||||
|
for(m = 0; m < 32; m++)
|
||||||
|
begin
|
||||||
|
assign operand_a_neg_rev[m] = operand_a_neg[31-m];
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// ____ _ _ _ _ _ _ _ _ //
|
// ____ _ _ _ _ _ _ _ _ //
|
||||||
|
@ -75,7 +89,7 @@ module riscv_alu
|
||||||
logic [35:0] adder_result_expanded;
|
logic [35:0] adder_result_expanded;
|
||||||
|
|
||||||
// prepare operand a
|
// prepare operand a
|
||||||
assign adder_op_a = (operator_i == `ALU_ABS) ? ~operand_a_i : operand_a_i;
|
assign adder_op_a = (operator_i == `ALU_ABS) ? operand_a_neg : operand_a_i;
|
||||||
|
|
||||||
// prepare operand b
|
// prepare operand b
|
||||||
assign adder_op_b = (operator_i == `ALU_SUB) ? ~operand_b_i : operand_b_i;
|
assign adder_op_b = (operator_i == `ALU_SUB) ? ~operand_b_i : operand_b_i;
|
||||||
|
@ -192,7 +206,9 @@ module riscv_alu
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
assign shift_left = (operator_i == `ALU_SLL) || (operator_i == `ALU_BINS);
|
// `ALU_FL1 and `ALU_CBL are used for the bit counting ops later
|
||||||
|
assign shift_left = (operator_i == `ALU_SLL) || (operator_i == `ALU_BINS) ||
|
||||||
|
(operator_i == `ALU_FL1) || (operator_i == `ALU_CLB);
|
||||||
|
|
||||||
// choose the bit reversed or the normal input for shift operand a
|
// choose the bit reversed or the normal input for shift operand a
|
||||||
assign shift_op_a = (shift_left == 1'b1) ? operand_a_rev : operand_a_i;
|
assign shift_op_a = (shift_left == 1'b1) ? operand_a_rev : operand_a_i;
|
||||||
|
@ -474,66 +490,61 @@ module riscv_alu
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
logic [31:0] ff_input; // either op_a_i or its bit reversed version
|
logic [31:0] ff_input; // either op_a_i or its bit reversed version
|
||||||
|
logic [5:0] cnt_result; // population count
|
||||||
logic [5:0] clb_result; // count leading bits
|
logic [5:0] clb_result; // count leading bits
|
||||||
logic [5:0] ff1_result; // holds the index of the first '1'
|
logic [5:0] ff1_result; // holds the index of the first '1'
|
||||||
|
logic ff_no_one; // if no ones are found
|
||||||
logic [5:0] fl1_result; // holds the index of the last '1'
|
logic [5:0] fl1_result; // holds the index of the last '1'
|
||||||
logic ff_cmp; // compare value for ff1 and fl1
|
logic [5:0] bitop_result; // result of all bitop operations muxed together
|
||||||
integer q;
|
|
||||||
|
|
||||||
assign ff_input = (operator_i == `ALU_FF1) ? operand_a_i : operand_a_rev;
|
alu_popcnt alu_popcnt_i
|
||||||
assign ff_cmp = (operator_i == `ALU_CLB) ? ~operand_a_i[31] : 1'b1;
|
(
|
||||||
|
.in_i ( operand_a_i ),
|
||||||
|
.result_o ( cnt_result )
|
||||||
|
);
|
||||||
|
|
||||||
always_comb
|
always_comb
|
||||||
begin
|
begin
|
||||||
ff1_result = 6'd0;
|
ff_input = 'x;
|
||||||
|
|
||||||
for(q = 1; q < 33; q++)
|
case (operator_i)
|
||||||
begin
|
`ALU_FF1: ff_input = operand_a_i;
|
||||||
if(ff_input[q - 1] == ff_cmp)
|
`ALU_FL1: ff_input = operand_a_rev;
|
||||||
begin
|
`ALU_CLB: begin
|
||||||
ff1_result = q;
|
if (operand_a_i[31])
|
||||||
break;
|
ff_input = operand_a_neg_rev;
|
||||||
|
else
|
||||||
|
ff_input = operand_a_rev;
|
||||||
end
|
end
|
||||||
end
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
|
alu_ff alu_ff_i
|
||||||
|
(
|
||||||
|
.in_i ( ff_input ),
|
||||||
|
.first_one_o ( ff1_result ),
|
||||||
|
.no_ones_o ( ff_no_one )
|
||||||
|
);
|
||||||
|
|
||||||
// special case if ff1_res is 0 (no 1 found), then we keep the 0
|
// special case if ff1_res is 0 (no 1 found), then we keep the 0
|
||||||
assign fl1_result = (ff1_result == 6'd0) ? 6'd0 : (6'd33 - ff1_result);
|
// this is done in the result mux
|
||||||
assign clb_result = (ff1_result == 6'd0) ? 6'd0 : (ff1_result - 6'd2);
|
assign fl1_result = 6'd33 - ff1_result;
|
||||||
|
assign clb_result = ff1_result - 6'd2;
|
||||||
|
|
||||||
// count the number of '1's in a word
|
always_comb
|
||||||
logic [5:0] cnt_result;
|
begin
|
||||||
logic [1:0] cnt_l1[16];
|
bitop_result = 'x;
|
||||||
logic [2:0] cnt_l2[8];
|
case (operator_i)
|
||||||
logic [3:0] cnt_l3[4];
|
`ALU_FF1: bitop_result = ff1_result;
|
||||||
logic [4:0] cnt_l4[2];
|
`ALU_FL1: bitop_result = fl1_result;
|
||||||
|
`ALU_CLB: bitop_result = clb_result;
|
||||||
|
`ALU_CNT: bitop_result = cnt_result;
|
||||||
|
default:;
|
||||||
|
endcase
|
||||||
|
|
||||||
genvar l, m, n, p;
|
if (ff_no_one)
|
||||||
generate for(l = 0; l < 16; l++)
|
bitop_result = '0;
|
||||||
begin
|
end
|
||||||
assign cnt_l1[l] = operand_a_i[2*l] + operand_a_i[2*l + 1];
|
|
||||||
end
|
|
||||||
endgenerate
|
|
||||||
|
|
||||||
generate for(m = 0; m < 8; m++)
|
|
||||||
begin
|
|
||||||
assign cnt_l2[m] = cnt_l1[2*m] + cnt_l1[2*m + 1];
|
|
||||||
end
|
|
||||||
endgenerate
|
|
||||||
|
|
||||||
generate for(n = 0; n < 4; n++)
|
|
||||||
begin
|
|
||||||
assign cnt_l3[n] = cnt_l2[2*n] + cnt_l2[2*n + 1];
|
|
||||||
end
|
|
||||||
endgenerate
|
|
||||||
|
|
||||||
generate for(p = 0; p < 2; p++)
|
|
||||||
begin
|
|
||||||
assign cnt_l4[p] = cnt_l3[2*p] + cnt_l3[2*p + 1];
|
|
||||||
end
|
|
||||||
endgenerate
|
|
||||||
|
|
||||||
assign cnt_result = cnt_l4[0] + cnt_l4[1];
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
|
@ -674,10 +685,7 @@ module riscv_alu
|
||||||
// Set Lower Equal Than Operations (result = 1, if a <= b)
|
// Set Lower Equal Than Operations (result = 1, if a <= b)
|
||||||
`ALU_SLETS, `ALU_SLETU: result_o = {31'b0, comparison_result_o};
|
`ALU_SLETS, `ALU_SLETU: result_o = {31'b0, comparison_result_o};
|
||||||
|
|
||||||
`ALU_FF1: result_o = {26'h0, ff1_result};
|
`ALU_FF1, `ALU_FL1, `ALU_CLB, `ALU_CNT: result_o = {26'h0, bitop_result};
|
||||||
`ALU_FL1: result_o = {26'h0, fl1_result};
|
|
||||||
`ALU_CLB: result_o = {26'h0, clb_result};
|
|
||||||
`ALU_CNT: result_o = {26'h0, cnt_result};
|
|
||||||
|
|
||||||
// Division Unit Commands
|
// Division Unit Commands
|
||||||
`ALU_DIV, `ALU_DIVU,
|
`ALU_DIV, `ALU_DIVU,
|
||||||
|
@ -690,3 +698,119 @@ module riscv_alu
|
||||||
assign ready_o = div_ready;
|
assign ready_o = div_ready;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
module alu_ff
|
||||||
|
#(
|
||||||
|
parameter LEN = 32
|
||||||
|
)
|
||||||
|
(
|
||||||
|
input logic [LEN-1:0] in_i,
|
||||||
|
|
||||||
|
output logic [$clog2(LEN):0] first_one_o,
|
||||||
|
output logic no_ones_o
|
||||||
|
);
|
||||||
|
|
||||||
|
localparam NUM_LEVELS = $clog2(LEN);
|
||||||
|
|
||||||
|
logic [LEN-1:0] [NUM_LEVELS:0] index_lut;
|
||||||
|
logic [2**NUM_LEVELS-1:0] sel_nodes;
|
||||||
|
logic [2**NUM_LEVELS-1:0] [NUM_LEVELS:0] index_nodes;
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// generate tree structure
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
generate
|
||||||
|
genvar j;
|
||||||
|
for (j = 0; j < LEN; j++) begin
|
||||||
|
assign index_lut[j] = $unsigned(j + 1);
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
generate
|
||||||
|
genvar k;
|
||||||
|
genvar l;
|
||||||
|
genvar level;
|
||||||
|
for (level = 0; level < NUM_LEVELS; level++) begin
|
||||||
|
//------------------------------------------------------------
|
||||||
|
if (level < NUM_LEVELS-1) begin
|
||||||
|
for (l = 0; l < 2**level; l++) begin
|
||||||
|
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
|
||||||
|
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
|
||||||
|
index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
//------------------------------------------------------------
|
||||||
|
if (level == NUM_LEVELS-1) begin
|
||||||
|
for (k = 0; k < 2**level; k++) begin
|
||||||
|
// if two successive indices are still in the vector...
|
||||||
|
if (k * 2 < LEN) begin
|
||||||
|
assign sel_nodes[2**level-1+k] = in_i[k*2] | in_i[k*2+1];
|
||||||
|
assign index_nodes[2**level-1+k] = (in_i[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
|
||||||
|
end
|
||||||
|
// if only the first index is still in the vector...
|
||||||
|
if (k * 2 == LEN) begin
|
||||||
|
assign sel_nodes[2**level-1+k] = in_i[k*2];
|
||||||
|
assign index_nodes[2**level-1+k] = index_lut[k*2];
|
||||||
|
end
|
||||||
|
// if index is out of range
|
||||||
|
if (k * 2 > LEN) begin
|
||||||
|
assign sel_nodes[2**level-1+k] = 1'b0;
|
||||||
|
assign index_nodes[2**level-1+k] = '0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
//------------------------------------------------------------
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// connect output
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
assign first_one_o = index_nodes[0];
|
||||||
|
assign no_ones_o = ~sel_nodes[0];
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
// count the number of '1's in a word
|
||||||
|
module alu_popcnt
|
||||||
|
(
|
||||||
|
input logic [31:0] in_i,
|
||||||
|
output logic [5: 0] result_o
|
||||||
|
);
|
||||||
|
|
||||||
|
logic [1:0] cnt_l1[16];
|
||||||
|
logic [2:0] cnt_l2[8];
|
||||||
|
logic [3:0] cnt_l3[4];
|
||||||
|
logic [4:0] cnt_l4[2];
|
||||||
|
|
||||||
|
genvar l, m, n, p;
|
||||||
|
generate for(l = 0; l < 16; l++)
|
||||||
|
begin
|
||||||
|
assign cnt_l1[l] = in_i[2*l] + in_i[2*l + 1];
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
generate for(m = 0; m < 8; m++)
|
||||||
|
begin
|
||||||
|
assign cnt_l2[m] = cnt_l1[2*m] + cnt_l1[2*m + 1];
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
generate for(n = 0; n < 4; n++)
|
||||||
|
begin
|
||||||
|
assign cnt_l3[n] = cnt_l2[2*n] + cnt_l2[2*n + 1];
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
generate for(p = 0; p < 2; p++)
|
||||||
|
begin
|
||||||
|
assign cnt_l4[p] = cnt_l3[2*p] + cnt_l3[2*p + 1];
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
assign result_o = cnt_l4[0] + cnt_l4[1];
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue