Added mult files

This commit is contained in:
Pasquale Davide Schiavone 2017-03-06 12:22:59 +01:00
parent 3cffca4756
commit 00325be31e
17 changed files with 4688 additions and 0 deletions

369
mult.sv Normal file
View file

@ -0,0 +1,369 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_mult
#(
parameter ADD_TYPE = 0, //0 shared
parameter ADD_CYCL = 0 //if ADD_CYCL is 1, ADD_TYPE must be 0
)
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
// input logic [ 2:0] operator_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] op_acc_i,
output logic [31:0] pp_acc_o,
output logic [31:0] mult_result_o,
output logic ready_o
);
enum logic [3:0] { IDLE, STEP0, STEP1, STEP2, STEP3, STEP4, STEP5, STEP6, STEP7, STEP8, STEP9 } mult_state_q, mult_state_n;
enum logic [1:0] { MULT_00_SHIFT, MULT_08_SHIFT, MULT_16_SHIFT, MULT_24_SHIFT } shift_mul;
logic [31:0] accum_q;
logic [31:0] mult_res_q;
logic [ 7:0] mult_op_a;
logic [ 7:0] mult_op_b;
logic [31:0] mac_op, op_acc_int;
logic [31:0] mult_extended;
logic [31:0] mult_shifted;
logic sign_a,sign_b;
logic do_mul_n, do_mul_q;
assign mult_extended = $signed({sign_a,mult_op_a})*$signed({sign_b,mult_op_b});
if(ADD_CYCL)
assign mult_result_o = mult_res_q;
else
assign mult_result_o = ADD_TYPE ? mult_shifted + mac_op : mult_shifted;
assign pp_acc_o = ADD_CYCL ? accum_q : mac_op;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= IDLE;
accum_q <= '0;
mult_res_q <= '0;
do_mul_q <= 1'b1;
end else begin
if(mult_en_i) begin
mult_state_q <= mult_state_n;
if(ADD_CYCL) begin
if(~do_mul_q || mult_state_q == IDLE)
accum_q <= op_acc_int;
end else
accum_q <= ADD_TYPE ? mult_result_o : op_acc_i;
if(do_mul_q && mult_state_q != IDLE)
mult_res_q <= ADD_CYCL ? mult_shifted : '0;
do_mul_q <= do_mul_n;
end
end
end
if(ADD_CYCL) begin
always_comb
begin : mult_fsm
ready_o = 1'b0;
do_mul_n = ~do_mul_q;
op_acc_int = op_acc_i;
unique case (mult_state_q)
IDLE: begin
//idle
do_mul_n = 1'b1;
op_acc_int = '0;
mult_state_n = STEP0;
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[ 7:0 ];
mac_op = 32'h0;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
end
STEP0: begin
//all*bll
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[ 7:0 ];
mac_op = 32'h0;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = ~do_mul_q ? STEP1 : STEP0;
end
STEP1: begin
//all*blh<<8
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[15:8 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
mult_state_n = ~do_mul_q ? STEP2 : STEP1;
end
STEP2: begin
//all*bhl<<16
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[23:16];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = ~do_mul_q ? STEP3 : STEP2;
end
STEP3: begin
//all*bhh<<24
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[31:24];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_24_SHIFT;
mult_state_n = ~do_mul_q ? STEP4 : STEP3;
end
STEP4: begin
//alh*bll<<8
mult_op_a = op_a_i[15:8 ];
mult_op_b = op_b_i[ 7:0 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
mult_state_n = ~do_mul_q ? STEP5 : STEP4;
end
STEP5: begin
//alh*blh<<16
mult_op_a = op_a_i[15:8 ];
mult_op_b = op_b_i[15:8 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = ~do_mul_q ? STEP6 : STEP5;
end
STEP6: begin
//alh*bhl<<24
mult_op_a = op_a_i[15:8 ];
mult_op_b = op_b_i[23:16];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
mult_state_n = ~do_mul_q ? STEP7 : STEP6;
end
STEP7: begin
//ahl*bll<<16
mult_op_a = op_a_i[23:16];
mult_op_b = op_b_i[ 7:0 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = ~do_mul_q ? STEP8 : STEP7;
end
STEP8: begin
//ahl*blh<<24
mult_op_a = op_a_i[23:16];
mult_op_b = op_b_i[15:8 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
mult_state_n = ~do_mul_q ? STEP9 : STEP8;
end
STEP9: begin
//ahh*bll<<24
mult_op_a = op_a_i[31:24];
mult_op_b = op_b_i[ 7:0 ];
mac_op = accum_q;
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
mult_state_n = ~do_mul_q ? IDLE : STEP9;
ready_o = ~do_mul_q;
end
default: begin
//idle
do_mul_n = 1'b1;
op_acc_int = '0;
mult_state_n = STEP0;
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[ 7:0 ];
mac_op = 32'h0;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
end
endcase // mult_state_q
end
end else begin
always_comb
begin : mult_fsm
ready_o = 1'b0;
unique case (mult_state_q)
STEP0: begin
//all*bll
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[ 7:0 ];
mac_op = 32'h0;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = STEP1;
end
STEP1: begin
//all*blh<<8
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[15:8 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
mult_state_n = STEP2;
end
STEP2: begin
//all*bhl<<16
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[23:16];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = STEP3;
end
STEP3: begin
//all*bhh<<24
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[31:24];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_24_SHIFT;
mult_state_n = STEP4;
end
STEP4: begin
//alh*bll<<8
mult_op_a = op_a_i[15:8 ];
mult_op_b = op_b_i[ 7:0 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
mult_state_n = STEP5;
end
STEP5: begin
//alh*blh<<16
mult_op_a = op_a_i[15:8 ];
mult_op_b = op_b_i[15:8 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = STEP6;
end
STEP6: begin
//alh*bhl<<24
mult_op_a = op_a_i[15:8 ];
mult_op_b = op_b_i[23:16];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
mult_state_n = STEP7;
end
STEP7: begin
//ahl*bll<<16
mult_op_a = op_a_i[23:16];
mult_op_b = op_b_i[ 7:0 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = STEP8;
end
STEP8: begin
//ahl*blh<<24
mult_op_a = op_a_i[23:16];
mult_op_b = op_b_i[15:8 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
mult_state_n = STEP9;
end
STEP9: begin
//ahh*bll<<24
mult_op_a = op_a_i[31:24];
mult_op_b = op_b_i[ 7:0 ];
mac_op = accum_q;
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
mult_state_n = STEP0;
ready_o = 1'b1;
end
default: begin
//all*bll
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[ 7:0 ];
mac_op = 32'h0;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = STEP1;
end
endcase // mult_state_q
end
end
always_comb
begin
unique case (shift_mul)
MULT_00_SHIFT:
mult_shifted = mult_extended;
MULT_08_SHIFT:
mult_shifted = {mult_extended[23:0],8'h0};
MULT_16_SHIFT:
mult_shifted = {mult_extended[15:0],16'h0};
MULT_24_SHIFT:
mult_shifted = {mult_extended[ 7:0],24'h0};
default:
mult_shifted = mult_extended;
endcase
end
// assign result_o = mult_shifted + mac_op;
endmodule // zeroriscy_mult

136
mult_16.sv Normal file
View file

@ -0,0 +1,136 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_mult16
#(
parameter ADD_TYPE = 0 //0 shared
)
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
// input logic [ 2:0] operator_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] op_acc_i,
output logic [31:0] pp_acc_o,
output logic [31:0] mult_result_o,
output logic ready_o
);
enum logic [1:0] { STEP0, STEP1, STEP2 } mult_state_q, mult_state_n;
enum logic { MULT_00_SHIFT, MULT_16_SHIFT } shift_mul;
logic [31:0] accum_q;
logic [15:0] mult_op_a;
logic [15:0] mult_op_b;
logic [31:0] mac_op;
logic [31:0] mult_extended;
logic [31:0] mult_shifted;
logic sign_a,sign_b;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= STEP0;
accum_q <= '0;
end else begin
if(mult_en_i) begin
mult_state_q <= mult_state_n;
accum_q <= ADD_TYPE ? mult_result_o : op_acc_i;
end
end
end
always_comb
begin : mult_fsm
ready_o = 1'b0;
unique case (mult_state_q)
STEP0: begin
//al*bl
mult_op_a = op_a_i[15:0 ];
mult_op_b = op_b_i[15:0 ];
mac_op = 32'h0;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = STEP1;
end
STEP1: begin
//al*bh<<16
mult_op_a = op_a_i[15:0 ];
mult_op_b = op_b_i[31:16 ];
mac_op = accum_q;
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_16_SHIFT;
mult_state_n = STEP2;
end
STEP2: begin
//ah*bl<<16
mult_op_a = op_a_i[31:16];
mult_op_b = op_b_i[15:0 ];
mac_op = accum_q;
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = STEP0;
ready_o = 1'b1;
end
default: begin
//al*bl
mult_op_a = op_a_i[15:0 ];
mult_op_b = op_b_i[15:0 ];
mac_op = 32'h0;
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = STEP1;
end
endcase // mult_state_q
end
assign mult_extended = $signed({sign_a,mult_op_a})*$signed({sign_b,mult_op_b});
assign mult_result_o = ADD_TYPE ? mult_shifted + mac_op : mult_shifted;
assign pp_acc_o = mac_op;
always_comb
begin
unique case (shift_mul)
MULT_00_SHIFT:
mult_shifted = mult_extended;
MULT_16_SHIFT:
mult_shifted = {mult_extended[15:0],16'h0};
default:
mult_shifted = mult_extended;
endcase
end
// assign result_o = mult_shifted + mac_op;
endmodule // zeroriscy_mult

161
mult_16_hq.sv Normal file
View file

@ -0,0 +1,161 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
`define OP_L 15:0
`define OP_H 31:16
module zeroriscy_mult16_hq
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
input logic operator_i,
input logic [1:0] signed_mode_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
output logic [31:0] mult_result_o,
output logic ready_o
);
enum logic [2:0] { ALBL, ALBH, AHBL, AHBH, FINISH } mult_state_q, mult_state_n;
logic [33:0] mul_res_ext;
logic [34:0] mac_res_ext;
logic [33:0] mac_res_q, mac_res_n, mac_res;
logic [15:0] mult_op_a;
logic [15:0] mult_op_b;
logic [33:0] accum;
logic sign_a,sign_b, accum_sign, signed_mult;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= ALBL;
mac_res_q <= '0;
end else begin
if(mult_en_i) begin
mult_state_q <= mult_state_n;
mac_res_q <= mac_res_n;
end
end
end
assign signed_mult = (signed_mode_i != 2'b00);
assign mult_result_o = mac_res_q[31:0];
assign mac_res_ext = $signed({sign_a, mult_op_a})*$signed({sign_b, mult_op_b}) + $signed(accum);
assign mac_res = mac_res_ext[33:0];
always_comb
begin : mult_fsm
ready_o = 1'b0;
mult_op_a = op_a_i[`OP_L];
mult_op_b = op_b_i[`OP_L];
sign_a = 1'b0;
sign_b = 1'b0;
accum = mac_res_q;
mac_res_n = mac_res;
unique case (mult_state_q)
ALBL: begin
//al*bl
mult_op_a = op_a_i[`OP_L];
mult_op_b = op_b_i[`OP_L];
sign_a = 1'b0;
sign_b = 1'b0;
accum = '0;
mac_res_n = mac_res;
mult_state_n = ALBH;
end
ALBH: begin
//al*bh<<16
mult_op_a = op_a_i[`OP_L];
mult_op_b = op_b_i[`OP_H];
sign_a = 1'b0;
sign_b = signed_mode_i[1] & op_b_i[31];
//result of AL*BL (in mac_res_q) always unsigned with no carry, so carries_q always 00
accum = {18'b0,mac_res_q[31:16]};
unique case(operator_i)
MUL_L: begin
mac_res_n = {2'b0,mac_res[`OP_L],mac_res_q[`OP_L]};
end
MUL_H: begin
mac_res_n = mac_res;
end
endcase
mult_state_n = AHBL;
end
AHBL: begin
//ah*bl<<16
mult_op_a = op_a_i[`OP_H];
mult_op_b = op_b_i[`OP_L];
sign_a = signed_mode_i[0] & op_a_i[31];
sign_b = 1'b0;
unique case(operator_i)
MUL_L: begin
accum = {18'b0,mac_res_q[31:16]};
mac_res_n = {2'b0,mac_res[15:0],mac_res_q[15:0]};
mult_state_n = FINISH;
end
MUL_H: begin
accum = mac_res_q;
mac_res_n = mac_res;
mult_state_n = AHBH;
end
endcase
end
AHBH: begin
//only MUL_H here
//ah*bh
mult_op_a = op_a_i[`OP_H];
mult_op_b = op_b_i[`OP_H];
sign_a = signed_mode_i[0] & op_a_i[31];
sign_b = signed_mode_i[1] & op_b_i[31];
accum[17:0 ] = mac_res_q[33:16];
accum[33:18] = {18{signed_mult & mac_res_q[33]}};
//result of AH*BL is not signed only if signed_mode_i == 2'b00
mac_res_n = mac_res;
mult_state_n = FINISH;
end
FINISH: begin
mult_state_n = ALBL;
//ready_o must not be a timing critical signal
ready_o = 1'b1;
end
default:;
endcase // mult_state_q
end
endmodule // zeroriscy_mult

77
mult_32.sv Normal file
View file

@ -0,0 +1,77 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_mult32
#(
parameter ADD_TYPE = 0, //0 shared
parameter B_SHIFT = 0
)
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
// input logic [ 2:0] operator_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] op_acc_i,
output logic [31:0] pp_acc_o,
output logic [31:0] mult_result_o,
output logic ready_o
);
logic [ 4:0] mult_state_q;
logic [31:0] accum_q;
logic [31:0] op_b_shift_q, op_b_shift;
logic [31:0] mult_op_a, mult_extended;
logic bit_b;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= '0;
accum_q <= '0;
op_b_shift_q <= '0;
end else begin
if(mult_en_i) begin
mult_state_q <= mult_state_q + 5'h1; //rounds to 0 by itself
accum_q <= ADD_TYPE ? mult_result_o : op_acc_i;
op_b_shift_q <= B_SHIFT ? op_b_shift >> 1 : '0;
end
end
end
assign ready_o = mult_state_q == 5'd31; //(&mult_state_q)
assign bit_b = B_SHIFT ? op_b_shift[0] : op_b_i[mult_state_q];
assign mult_op_a = op_a_i & {32{bit_b}};
assign mult_extended = mult_op_a << mult_state_q;
assign pp_acc_o = mult_state_q == 5'd0 ? 32'h0 : accum_q;
assign op_b_shift = mult_state_q == 5'd0 ? op_b_i : op_b_shift_q;
assign mult_result_o = ADD_TYPE ? mult_extended + pp_acc_o : mult_extended;
endmodule // zeroriscy_mult

88
mult_32_2.sv Normal file
View file

@ -0,0 +1,88 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_mult33
#(
parameter ADD_TYPE = 0 //0 shared
)
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
// input logic [ 2:0] operator_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] op_acc_i,
output logic [31:0] pp_acc_o,
output logic [31:0] mult_result_o,
output logic ready_o
);
logic [ 4:0] mult_state_q;
logic curr_state_q, curr_state_n;
logic [31:0] accum_q;
logic [31:0] op_b_shift_q, op_b_shift;
logic [31:0] op_a_shift_q, op_a_shift;
logic [31:0] mult_op_a, mult_extended;
logic bit_b;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= '0;
accum_q <= '0;
op_b_shift_q <= '0;
op_a_shift_q <= '0;
curr_state_q <= MULT_IDLE;
end else begin
if(mult_en_i) begin
unique case(curr_state_q)
MULT_IDLE: begin
op_a_shift_q <= op_a_i & {32{op_b_i[0]}};
op_b_shift_q <= op_b_i >> 1;
mult_state_q <= 5'd0;
accum_q <= '0;
end
MULT_COMP: begin
op_a_shift_q <= (op_a_i & {32{op_b_shift_q[0]}}) << 1;
op_b_shift_q <= op_b_i >> 1;
mult_state_q <= mult_state_q + 1;
accum_q <= ADD_TYPE ? mult_result_o : op_acc_i;
end
endcase // curr_state_q
end
end
end
assign ready_o = mult_state_q == 5'd31; //(&mult_state_q)
assign pp_acc_o = accum_q;
assign mult_result_o = ADD_TYPE ? accum_q + op_a_shift_q : op_a_shift_q;
endmodule // zeroriscy_mult

77
mult_32_hq.sv Normal file
View file

@ -0,0 +1,77 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_mult32
#(
parameter ADD_TYPE = 0, //0 shared
parameter B_SHIFT = 0
)
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
// input logic [ 2:0] operator_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] op_acc_i,
output logic [31:0] pp_acc_o,
output logic [31:0] mult_result_o,
output logic ready_o
);
logic [ 4:0] mult_state_q;
logic [31:0] accum_q;
logic [31:0] op_b_shift_q, op_b_shift;
logic [31:0] mult_op_a, mult_extended;
logic bit_b;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= '0;
accum_q <= '0;
op_b_shift_q <= '0;
end else begin
if(mult_en_i) begin
mult_state_q <= mult_state_q + 5'h1; //rounds to 0 by itself
accum_q <= ADD_TYPE ? mult_result_o : op_acc_i;
op_b_shift_q <= B_SHIFT ? op_b_shift >> 1 : '0;
end
end
end
assign ready_o = mult_state_q == 5'd31; //(&mult_state_q)
assign bit_b = B_SHIFT ? op_b_shift[0] : op_b_i[mult_state_q];
assign mult_op_a = op_a_i & {32{bit_b}};
assign mult_extended = mult_op_a << mult_state_q;
assign pp_acc_o = mult_state_q == 5'd0 ? 32'h0 : accum_q;
assign op_b_shift = mult_state_q == 5'd0 ? op_b_i : op_b_shift_q;
assign mult_result_o = ADD_TYPE ? mult_extended + pp_acc_o : mult_extended;
endmodule // zeroriscy_mult

95
mult_33.sv Normal file
View file

@ -0,0 +1,95 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_mult33
#(
parameter ADD_TYPE = 0 //0 shared
)
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
// input logic [ 2:0] operator_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] op_acc_i,
output logic [31:0] pp_acc_o,
output logic [31:0] mult_result_o,
output logic ready_o
);
logic [ 4:0] mult_state_q;
enum logic [1:0] { MULT_IDLE, MULT_COMP, MULT_FINISH } curr_state_q;
logic [31:0] accum_q;
logic [31:0] op_b_shift_q;
logic [31:0] op_a_shift_q;
logic [31:0] mult_op_a;
logic bit_b;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= '0;
accum_q <= '0;
op_b_shift_q <= '0;
op_a_shift_q <= '0;
curr_state_q <= MULT_IDLE;
end else begin
if(mult_en_i) begin
unique case(curr_state_q)
MULT_IDLE: begin
op_a_shift_q <= op_a_i;
op_b_shift_q <= op_b_i;
mult_state_q <= 5'd0;
accum_q <= '0;
curr_state_q <= MULT_COMP;
end
MULT_COMP: begin
op_a_shift_q <= op_a_shift_q << 1;
op_b_shift_q <= op_b_shift_q >> 1;
mult_state_q <= mult_state_q + 1;
accum_q <= ADD_TYPE ? mult_result_o : op_acc_i;
curr_state_q <= mult_state_q == 5'd31 ? MULT_FINISH : MULT_COMP;
end
MULT_FINISH: begin
curr_state_q <= MULT_IDLE;
end
default:;
endcase // curr_state_q
end
end
end
assign mult_op_a = op_a_shift_q & {32{op_b_shift_q[0]}};
assign ready_o = curr_state_q == MULT_FINISH;
assign pp_acc_o = accum_q;
assign mult_result_o = ADD_TYPE ? accum_q + mult_op_a : mult_op_a;
endmodule // zeroriscy_mult

114
mult_33_hq.sv Normal file
View file

@ -0,0 +1,114 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_mult33_hq
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
input logic operator_i,
input logic [1:0] signed_mode_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] alu_adder_i,
output logic do_sub_o,
output logic [31:0] alu_operand_a_o,
output logic [31:0] alu_operand_b_o,
output logic [31:0] mult_result_o,
output logic carry_out_mul_o,
output logic ready_o
);
logic [ 4:0] mult_state_q;
enum logic [1:0] { MULT_IDLE, MULT_COMP, MULT_FINISH } curr_state_q;
logic [31:0] accum_high_q, accum_low_q;
logic [31:0] accum_high, accum_low;
logic [33:0] res_adder_low_ext;
logic [31:0] res_adder_low;
logic [31:0] res_adder_high;
logic [31:0] op_b_shift_q;
logic [63:0] op_a_shift_q;
logic [63:0] mult_op_a;
logic bit_b;
assign res_adder_high = alu_adder_i;
assign carry_out_mul_o = res_adder_low_ext[33];
assign res_adder_low = res_adder_low_ext[32:1];
assign res_adder_low_ext = {mult_op_a[31:0],1'b1} + {accum_low_q ^ {32{do_sub_o}}, do_sub_o};
assign alu_operand_a_o = mult_op_a[63:32] ^ {32{do_sub_o}};
assign alu_operand_b_o = accum_high_q ^ {32{do_sub_o}};
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= '0;
accum_low_q <= '0;
accum_high_q <= '0;
op_b_shift_q <= '0;
op_a_shift_q <= '0;
curr_state_q <= MULT_IDLE;
end else begin
if(mult_en_i) begin
unique case(curr_state_q)
MULT_IDLE: begin
op_a_shift_q <= $signed(op_a_i);
op_b_shift_q <= op_b_i;
mult_state_q <= 5'd0;
accum_low_q <= '0;
accum_high_q <= '0;
curr_state_q <= MULT_COMP;
end
MULT_COMP: begin
op_a_shift_q <= op_a_shift_q << 1;
op_b_shift_q <= op_b_shift_q >> 1;
mult_state_q <= mult_state_q + 1;
accum_low_q <= res_adder_low;
accum_high_q <= res_adder_high;
curr_state_q <= mult_state_q == 5'd31 ? MULT_FINISH : MULT_COMP;
end
MULT_FINISH: begin
curr_state_q <= MULT_IDLE;
end
default:;
endcase // curr_state_q
end
end
end
assign do_sub_o = op_b_shift_q[0] && mult_state_q == 5'd31 && operator_i == MUL_H;
assign mult_op_a = op_a_shift_q & {64{op_b_shift_q[0]}};
assign ready_o = curr_state_q == MULT_FINISH;
assign mult_result_o = operator_i == MUL_H ? accum_high_q : accum_low_q;
endmodule // zeroriscy_mult

140
mult_BW33_hq.sv Normal file
View file

@ -0,0 +1,140 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_multBW33_hq
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
input logic operator_i,
input logic [1:0] signed_mode_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [33:0] alu_adder_ext_i,
output logic [32:0] alu_operand_a_o,
output logic [32:0] alu_operand_b_o,
output logic [31:0] mult_result_o,
output logic ready_o
);
logic [ 4:0] mult_state_q;
enum logic [1:0] { MULT_IDLE, MULT_COMP, MULT_LASTPP, MULT_FINISH } curr_state_q;
logic [32:0] accum_window_q;
logic [32:0] res_adder_l;
logic [32:0] res_adder_h;
logic [32:0] op_b_shift_q;
logic [32:0] op_a_shift_q;
logic [32:0] op_a_ext, op_b_ext;
logic [32:0] op_a_bw_pp, op_a_bw_last_pp;
logic [31:0] b_0;
//(accum_window_q + op_a_shift_q)
assign res_adder_l = alu_adder_ext_i[32:0];
//(accum_window_q + op_a_shift_q)>>1
assign res_adder_h = alu_adder_ext_i[33:1];
always_comb
begin
alu_operand_a_o = accum_window_q;
unique case(operator_i)
MUL_L: begin
alu_operand_b_o = op_a_bw_pp;
end
MUL_H: begin
if(curr_state_q == MULT_LASTPP)
alu_operand_b_o = op_a_bw_last_pp;
else
alu_operand_b_o = op_a_bw_pp;
end
endcase
end
assign b_0 = {32{op_b_shift_q[0]}};
//build the partial product
assign op_a_bw_pp = { ~(op_a_shift_q[32] & op_b_shift_q[0]), op_a_shift_q[31:0] & b_0 };
assign op_a_bw_last_pp = { op_a_shift_q[32] & op_b_shift_q[0], ~(op_a_shift_q[31:0] & b_0) };
assign op_a_ext = {op_a_i[31] & signed_mode_i[0], op_a_i};
assign op_b_ext = {op_b_i[31] & signed_mode_i[1], op_b_i};
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= '0;
accum_window_q <= '0;
op_b_shift_q <= '0;
op_a_shift_q <= '0;
curr_state_q <= MULT_IDLE;
end else begin
if(mult_en_i) begin
unique case(curr_state_q)
MULT_IDLE: begin
op_a_shift_q <= operator_i == MUL_H ? op_a_ext : op_a_ext << 1;
op_b_shift_q <= op_b_ext >> 1;
mult_state_q <= 5'd1;
accum_window_q <= operator_i == MUL_H ? { 1'b1, ~(op_a_ext[32] & op_b_i[0]), op_a_ext[31:1] & {31{op_b_i[0]}} } : { ~(op_a_ext[32] & op_b_i[0]), op_a_ext[31:0] & {32{op_b_i[0]}} };
curr_state_q <= MULT_COMP;
end
MULT_COMP: begin
if(operator_i == MUL_L)
op_a_shift_q <= op_a_shift_q << 1;
op_b_shift_q <= op_b_shift_q >> 1;
mult_state_q <= mult_state_q + 1;
accum_window_q <= operator_i == MUL_H ? res_adder_h : res_adder_l;
if(mult_state_q == 5'd31)
//if(operator_i == MUL_H)
curr_state_q <= MULT_LASTPP;
//else
// curr_state_q <= MULT_FINISH;
else
curr_state_q <= MULT_COMP;
end
MULT_LASTPP: begin
accum_window_q <= res_adder_l;
curr_state_q <= MULT_FINISH;
end
MULT_FINISH: begin
curr_state_q <= MULT_IDLE;
end
default:;
endcase // curr_state_q
end
end
end
assign ready_o = curr_state_q == MULT_FINISH;
assign mult_result_o = accum_window_q;
endmodule // zeroriscy_mult

286
mult_dir/alu.sv Normal file
View file

@ -0,0 +1,286 @@
// Copyright 2017 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Markus Wegmann - markus.wegmann@technokrat.ch //
// //
// Additional contributions by: //
// Davide Schiavone - pschiavo@iis.ee.ethz.ch //
// //
// Design Name: ALU //
// Project Name: zero-riscy //
// Language: SystemVerilog //
// //
// Description: Arithmetic logic unit of the pipelined processor. //
// Reduced in area and ISA (RV32I) for small area //
// and power consumption. Based on ALU by Matthias Baer. //
// //
////////////////////////////////////////////////////////////////////////////////
`include "zeroriscy_config.sv"
import zeroriscy_defines::*;
module zeroriscy_alu
(
input logic [ALU_OP_WIDTH-1:0] operator_i,
input logic [31:0] operand_a_i,
input logic [31:0] operand_b_i,
input logic mult_en_i,
input logic [4:0] shifter_amt_i,
output logic [31:0] adder_result_o,
output logic [31:0] result_o,
output logic comparison_result_o
);
logic [31:0] operand_a_rev;
logic [32:0] operand_b_neg;
logic [31:0] mult_operand_a;
assign mult_operand_a = operand_a_i & {32{operand_b_i[shifter_amt_i]}};
// bit reverse operand_a for left shifts and bit counting
generate
genvar k;
for(k = 0; k < 32; k++)
begin
assign operand_a_rev[k] = (mult_en_i) ? mult_operand_a[31-k] : operand_a_i[31-k];
end
endgenerate
////////////////////////////////////////
// ____ _ _ ___ _____ _____ //
// / ___|| | | |_ _| ___|_ _| //
// \___ \| |_| || || |_ | | //
// ___) | _ || || _| | | //
// |____/|_| |_|___|_| |_| //
// //
////////////////////////////////////////
logic shift_left; // should we shift left
logic shift_arithmetic;
logic [31:0] shift_amt; // amount of shift, to the right
logic [31:0] shift_op_a; // input of the shifter
logic [31:0] shift_result;
logic [31:0] shift_right_result;
logic [31:0] shift_left_result;
//assign shift_amt = operand_b_i;
assign shift_left = (operator_i == ALU_SLL);
assign shift_arithmetic = (operator_i == ALU_SRA);
// choose the bit reversed or the normal input for shift operand a
assign shift_op_a = shift_left ? operand_a_rev : operand_a_i;
// right shifts, we let the synthesizer optimize this
logic [32:0] shift_op_a_32;
assign shift_op_a_32 = { shift_arithmetic & shift_op_a[31], shift_op_a};
always_comb
begin
shift_right_result = $signed(shift_op_a_32) >>> shifter_amt_i;
end
// bit reverse the shift_right_result for left shifts
genvar j;
generate
for(j = 0; j < 32; j++)
begin
assign shift_left_result[j] = shift_right_result[31-j];
end
endgenerate
assign shift_result = shift_left ? shift_left_result : shift_right_result;
/////////////////////////////////////
// _ _ _ //
// / \ __| | __| | ___ _ __ //
// / _ \ / _` |/ _` |/ _ \ '__| //
// / ___ \ (_| | (_| | __/ | //
// /_/ \_\__,_|\__,_|\___|_| //
// //
/////////////////////////////////////
logic adder_op_b_negate;
logic [32:0] adder_in_a, adder_in_b;
logic [33:0] adder_result_ext;
logic [31:0] adder_result;
always_comb
begin
adder_op_b_negate = 1'b0;
unique case (operator_i)
// Adder OPs
ALU_SUB,
// Comparator OPs
ALU_EQ, ALU_NE,
ALU_GTU, ALU_GEU,
ALU_LTU, ALU_LEU,
ALU_GTS, ALU_GES,
ALU_LTS, ALU_LES,
ALU_SLTS, ALU_SLTU,
ALU_SLETS, ALU_SLETU: adder_op_b_negate = 1'b1;
default: ;
endcase
end
// prepare operand a
assign adder_in_a = mult_en_i ? {shift_result,1'b1} : {operand_a_i,1'b1};
// prepare operand b
assign adder_in_b = {operand_b_i,1'b0};
assign operand_b_neg = adder_in_b ^ {33{adder_op_b_negate}};
// actual adder
assign adder_result_ext = $signed(adder_in_a) + $signed(operand_b_neg);
assign adder_result = adder_result_ext[32:1];
assign adder_result_o = adder_result;
//////////////////////////////////////////////////////////////////
// ____ ___ __ __ ____ _ ____ ___ ____ ___ _ _ //
// / ___/ _ \| \/ | _ \ / \ | _ \|_ _/ ___| / _ \| \ | | //
// | | | | | | |\/| | |_) / _ \ | |_) || |\___ \| | | | \| | //
// | |__| |_| | | | | __/ ___ \| _ < | | ___) | |_| | |\ | //
// \____\___/|_| |_|_| /_/ \_\_| \_\___|____/ \___/|_| \_| //
// //
//////////////////////////////////////////////////////////////////
logic is_equal;
logic is_greater_equal; // handles both signed and unsigned forms
logic cmp_signed;
always_comb
begin
cmp_signed = 1'b0;
unique case (operator_i)
ALU_GTS,
ALU_GES,
ALU_LTS,
ALU_LES,
ALU_SLTS,
ALU_SLETS: begin
cmp_signed = 1'b1;
end
default:;
endcase
end
assign is_equal = (adder_result == 32'b0);
// Is greater equal
always_comb
begin
if ((operand_a_i[31] ^ operand_b_i[31]) == 0)
is_greater_equal = (adder_result[31] == 0);
else
is_greater_equal = operand_a_i[31] ^ (cmp_signed);
end
// GTE unsigned:
// (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
// (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
// (a[31] == 1 && b[31] == 0) => 1
// (a[31] == 0 && b[31] == 1) => 0
// GTE signed:
// (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
// (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
// (a[31] == 1 && b[31] == 0) => 0
// (a[31] == 0 && b[31] == 1) => 1
// generate comparison result
logic cmp_result;
always_comb
begin
cmp_result = is_equal;
unique case (operator_i)
ALU_EQ: cmp_result = is_equal;
ALU_NE: cmp_result = (~is_equal);
ALU_GTS, ALU_GTU: cmp_result = is_greater_equal && (~is_equal);
ALU_GES, ALU_GEU: cmp_result = is_greater_equal;
ALU_LTS, ALU_SLTS,
ALU_LTU, ALU_SLTU: cmp_result = (~is_greater_equal);
ALU_SLETS,
ALU_SLETU,
ALU_LES, ALU_LEU: cmp_result = (~is_greater_equal) || is_equal;
default: ;
endcase
end
assign comparison_result_o = cmp_result;
////////////////////////////////////////////////////////
// ____ _ _ __ __ //
// | _ \ ___ ___ _ _| | |_ | \/ |_ ___ __ //
// | |_) / _ \/ __| | | | | __| | |\/| | | | \ \/ / //
// | _ < __/\__ \ |_| | | |_ | | | | |_| |> < //
// |_| \_\___||___/\__,_|_|\__| |_| |_|\__,_/_/\_\ //
// //
////////////////////////////////////////////////////////
always_comb
begin
result_o = 'x;
unique case (operator_i)
// Standard Operations
ALU_AND: result_o = operand_a_i & operand_b_i;
ALU_OR: result_o = operand_a_i | operand_b_i;
ALU_XOR: result_o = operand_a_i ^ operand_b_i;
// Adder Operations
ALU_ADD, ALU_SUB: result_o = adder_result;
// Shift Operations
ALU_SLL,
ALU_SRL, ALU_SRA: result_o = shift_result;
// Comparison Operations
ALU_EQ, ALU_NE,
ALU_GTU, ALU_GEU,
ALU_LTU, ALU_LEU,
ALU_GTS, ALU_GES,
ALU_LTS, ALU_LES,
ALU_SLTS, ALU_SLTU,
ALU_SLETS, ALU_SLETU: result_o = cmp_result;
default: ; // default case to suppress unique warning
endcase
end
endmodule

550
mult_dir/decoder.sv Normal file
View file

@ -0,0 +1,550 @@
// Copyright 2017 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer Andreas Traber - atraber@iis.ee.ethz.ch //
// //
// Additional contributions by: //
// Matthias Baer - baermatt@student.ethz.ch //
// Igor Loi - igor.loi@unibo.it //
// Sven Stucki - svstucki@student.ethz.ch //
// Davide Schiavone - pschiavo@iis.ee.ethz.ch //
// Markus Wegmann - markus.wegmann@technokrat.ch //
// //
// Design Name: Decoder //
// Project Name: zero-riscy //
// Language: SystemVerilog //
// //
// Description: Decoder //
// //
////////////////////////////////////////////////////////////////////////////////
`include "zeroriscy_config.sv"
import zeroriscy_defines::*;
module zeroriscy_decoder
(
// singals running to/from controller
input logic deassert_we_i, // deassert we, we are stalled or not active
input logic data_misaligned_i, // misaligned data load/store in progress
input logic branch_2nd_stage_i,
output logic illegal_insn_o, // illegal instruction encountered
output logic ebrk_insn_o, // trap instruction encountered
output logic eret_insn_o, // return from exception instruction encountered
output logic ecall_insn_o, // environment call (syscall) instruction encountered
output logic pipe_flush_o, // pipeline flush is requested
output logic rega_used_o, // rs1 is used by current instruction
output logic regb_used_o, // rs2 is used by current instruction
// from IF/ID pipeline
input logic [31:0] instr_rdata_i, // instruction read from instr memory/cache
input logic illegal_c_insn_i, // compressed instruction decode failed
// ALU signals
output logic [ALU_OP_WIDTH-1:0] alu_operator_o, // ALU operation selection
output logic [2:0] alu_op_a_mux_sel_o, // operand a selection: reg value, PC, immediate or zero
output logic [2:0] alu_op_b_mux_sel_o, // oNOperand b selection: reg value or immediate
output logic [1:0] alu_op_c_mux_sel_o, // operand c selection: reg value or jump target
output logic [0:0] imm_a_mux_sel_o, // immediate selection for operand a
output logic [3:0] imm_b_mux_sel_o, // immediate selection for operand b
// MUL related control signals
output logic mult_int_en_o, // perform integer multiplication
// register file related signals
output logic regfile_mem_we_o, // write enable for regfile
output logic regfile_alu_we_o, // write enable for 2nd regfile port
// CSR manipulation
output logic csr_access_o, // access to CSR
output logic [1:0] csr_op_o, // operation to perform on CSR
// LD/ST unit signals
output logic data_req_o, // start transaction to data memory
output logic data_we_o, // data memory write enable
output logic [1:0] data_type_o, // data type on data memory: byte, half word or word
output logic data_sign_extension_o, // sign extension on read data from data memory
output logic [1:0] data_reg_offset_o, // offset in byte inside register for stores
output logic data_load_event_o, // data request is in the special event range
// jump/branches
output logic [1:0] jump_in_dec_o, // jump_in_id without deassert
output logic [1:0] jump_in_id_o // jump is being calculated in ALU
);
// write enable/request control
logic regfile_mem_we;
logic regfile_alu_we;
logic data_req;
logic ebrk_insn;
logic eret_insn;
logic pipe_flush;
logic [1:0] jump_in_id;
logic [1:0] csr_op;
/////////////////////////////////////////////
// ____ _ //
// | _ \ ___ ___ ___ __| | ___ _ __ //
// | | | |/ _ \/ __/ _ \ / _` |/ _ \ '__| //
// | |_| | __/ (_| (_) | (_| | __/ | //
// |____/ \___|\___\___/ \__,_|\___|_| //
// //
/////////////////////////////////////////////
always_comb
begin
jump_in_id = BRANCH_NONE;
alu_operator_o = ALU_SLTU;
alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD;
alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD;
alu_op_c_mux_sel_o = OP_C_REGC_OR_FWD;
imm_a_mux_sel_o = IMMA_ZERO;
imm_b_mux_sel_o = IMMB_I;
mult_int_en_o = 1'b0;
regfile_mem_we = 1'b0;
regfile_alu_we = 1'b0;
csr_access_o = 1'b0;
csr_op = CSR_OP_NONE;
data_we_o = 1'b0;
data_type_o = 2'b00;
data_sign_extension_o = 1'b0;
data_reg_offset_o = 2'b00;
data_req = 1'b0;
data_load_event_o = 1'b0;
illegal_insn_o = 1'b0;
ebrk_insn = 1'b0;
eret_insn = 1'b0;
ecall_insn_o = 1'b0;
pipe_flush = 1'b0;
rega_used_o = 1'b0;
regb_used_o = 1'b0;
unique case (instr_rdata_i[6:0])
//////////////////////////////////////
// _ _ _ __ __ ____ ____ //
// | | | | | \/ | _ \/ ___| //
// _ | | | | | |\/| | |_) \___ \ //
// | |_| | |_| | | | | __/ ___) | //
// \___/ \___/|_| |_|_| |____/ //
// //
//////////////////////////////////////
OPCODE_JAL: begin // Jump and Link
jump_in_id = BRANCH_JAL;
// Calculate jump target in EX
alu_op_a_mux_sel_o = OP_A_CURRPC;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_b_mux_sel_o = IMMB_UJ;
alu_operator_o = ALU_ADD;
regfile_alu_we = 1'b1;
alu_op_c_mux_sel_o = OP_C_RA; // Pipeline return address to EX
end
OPCODE_JALR: begin // Jump and Link Register
jump_in_id = BRANCH_JALR;
// Calculate jump target in EX
alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD;
alu_op_b_mux_sel_o = OP_B_ZERO;
imm_b_mux_sel_o = IMMB_SB;
alu_operator_o = ALU_ADD;
regfile_alu_we = 1'b1;
rega_used_o = 1'b1;
if (instr_rdata_i[14:12] != 3'b0) begin
jump_in_id = BRANCH_NONE;
regfile_alu_we = 1'b0;
illegal_insn_o = 1'b1;
end
alu_op_c_mux_sel_o = OP_C_RA; // Pipeline return address to EX
end
OPCODE_BRANCH: begin // Branch
jump_in_id = BRANCH_COND;
rega_used_o = 1'b1;
regb_used_o = 1'b1;
if (~branch_2nd_stage_i)
begin
unique case (instr_rdata_i[14:12])
3'b000: alu_operator_o = ALU_EQ;
3'b001: alu_operator_o = ALU_NE;
3'b100: alu_operator_o = ALU_LTS;
3'b101: alu_operator_o = ALU_GES;
3'b110: alu_operator_o = ALU_LTU;
3'b111: alu_operator_o = ALU_GEU;
3'b010: begin
alu_operator_o = ALU_EQ;
regb_used_o = 1'b0;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_b_mux_sel_o = IMMB_BI;
end
3'b011: begin
alu_operator_o = ALU_NE;
regb_used_o = 1'b0;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_b_mux_sel_o = IMMB_BI;
end
default: begin
illegal_insn_o = 1'b1;
end
endcase
end
else begin
// Calculate jump target in EX
alu_op_a_mux_sel_o = OP_A_CURRPC;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_b_mux_sel_o = IMMB_SB;
alu_operator_o = ALU_ADD;
regfile_alu_we = 1'b0;
rega_used_o = 1'b1;
end
end
//////////////////////////////////
// _ ____ ______ _____ //
// | | | _ \ / / ___|_ _| //
// | | | | | |/ /\___ \ | | //
// | |___| |_| / / ___) || | //
// |_____|____/_/ |____/ |_| //
// //
//////////////////////////////////
OPCODE_STORE: begin
data_req = 1'b1;
data_we_o = 1'b1;
rega_used_o = 1'b1;
regb_used_o = 1'b1;
alu_operator_o = ALU_ADD;
// pass write data through ALU operand c
alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD;
if (instr_rdata_i[14] == 1'b0) begin
// offset from immediate
imm_b_mux_sel_o = IMMB_S;
alu_op_b_mux_sel_o = OP_B_IMM;
end
// Register offset is illegal since no register c available
else begin
data_req = 1'b0;
data_we_o = 1'b0;
illegal_insn_o = 1'b1;
end
// store size
unique case (instr_rdata_i[13:12])
2'b00: data_type_o = 2'b10; // SB
2'b01: data_type_o = 2'b01; // SH
2'b10: data_type_o = 2'b00; // SW
default: begin
data_req = 1'b0;
data_we_o = 1'b0;
illegal_insn_o = 1'b1;
end
endcase
end
OPCODE_LOAD: begin
data_req = 1'b1;
regfile_mem_we = 1'b1;
rega_used_o = 1'b1;
data_type_o = 2'b00;
// offset from immediate
alu_operator_o = ALU_ADD;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_b_mux_sel_o = IMMB_I;
// sign/zero extension
data_sign_extension_o = ~instr_rdata_i[14];
// load size
unique case (instr_rdata_i[13:12])
2'b00: data_type_o = 2'b10; // LB
2'b01: data_type_o = 2'b01; // LH
2'b10: data_type_o = 2'b00; // LW
default: data_type_o = 2'b00; // illegal or reg-reg
endcase
// reg-reg load (different encoding)
if (instr_rdata_i[14:12] == 3'b111) begin
// offset from RS2
regb_used_o = 1'b1;
alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD;
// sign/zero extension
data_sign_extension_o = ~instr_rdata_i[30];
// load size
unique case (instr_rdata_i[31:25])
7'b0000_000,
7'b0100_000: data_type_o = 2'b10; // LB, LBU
7'b0001_000,
7'b0101_000: data_type_o = 2'b01; // LH, LHU
7'b0010_000: data_type_o = 2'b00; // LW
default: begin
illegal_insn_o = 1'b1;
end
endcase
end
// special p.elw (event load)
if (instr_rdata_i[14:12] == 3'b110)
data_load_event_o = 1'b1;
if (instr_rdata_i[14:12] == 3'b011) begin
// LD -> RV64 only
illegal_insn_o = 1'b1;
end
end
//////////////////////////
// _ _ _ _ //
// / \ | | | | | | //
// / _ \ | | | | | | //
// / ___ \| |__| |_| | //
// /_/ \_\_____\___/ //
// //
//////////////////////////
OPCODE_LUI: begin // Load Upper Immediate
alu_op_a_mux_sel_o = OP_A_IMM;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_a_mux_sel_o = IMMA_ZERO;
imm_b_mux_sel_o = IMMB_U;
alu_operator_o = ALU_ADD;
regfile_alu_we = 1'b1;
end
OPCODE_AUIPC: begin // Add Upper Immediate to PC
alu_op_a_mux_sel_o = OP_A_CURRPC;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_b_mux_sel_o = IMMB_U;
alu_operator_o = ALU_ADD;
regfile_alu_we = 1'b1;
end
OPCODE_OPIMM: begin // Register-Immediate ALU Operations
alu_op_b_mux_sel_o = OP_B_IMM;
imm_b_mux_sel_o = IMMB_I;
regfile_alu_we = 1'b1;
rega_used_o = 1'b1;
unique case (instr_rdata_i[14:12])
3'b000: alu_operator_o = ALU_ADD; // Add Immediate
3'b010: alu_operator_o = ALU_SLTS; // Set to one if Lower Than Immediate
3'b011: alu_operator_o = ALU_SLTU; // Set to one if Lower Than Immediate Unsigned
3'b100: alu_operator_o = ALU_XOR; // Exclusive Or with Immediate
3'b110: alu_operator_o = ALU_OR; // Or with Immediate
3'b111: alu_operator_o = ALU_AND; // And with Immediate
3'b001: begin
alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
if (instr_rdata_i[31:25] != 7'b0)
illegal_insn_o = 1'b1;
end
3'b101: begin
if (instr_rdata_i[31:25] == 7'b0)
alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
else if (instr_rdata_i[31:25] == 7'b010_0000)
alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate
else
illegal_insn_o = 1'b1;
end
default: illegal_insn_o = 1'b1;
endcase
end
OPCODE_OP: begin // Register-Register ALU operation
regfile_alu_we = 1'b1;
rega_used_o = 1'b1;
if (instr_rdata_i[31]) begin
illegal_insn_o = 1'b1;
end
else
begin // non bit-manipulation instructions
if (~instr_rdata_i[28])
regb_used_o = 1'b1;
unique case ({instr_rdata_i[30:25], instr_rdata_i[14:12]})
// RV32I ALU operations
{6'b00_0000, 3'b000}: alu_operator_o = ALU_ADD; // Add
{6'b10_0000, 3'b000}: alu_operator_o = ALU_SUB; // Sub
{6'b00_0000, 3'b010}: alu_operator_o = ALU_SLTS; // Set Lower Than
{6'b00_0000, 3'b011}: alu_operator_o = ALU_SLTU; // Set Lower Than Unsigned
{6'b00_0000, 3'b100}: alu_operator_o = ALU_XOR; // Xor
{6'b00_0000, 3'b110}: alu_operator_o = ALU_OR; // Or
{6'b00_0000, 3'b111}: alu_operator_o = ALU_AND; // And
{6'b00_0000, 3'b001}: alu_operator_o = ALU_SLL; // Shift Left Logical
{6'b00_0000, 3'b101}: alu_operator_o = ALU_SRL; // Shift Right Logical
{6'b10_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic
{6'b00_0010, 3'b010}: alu_operator_o = ALU_SLETS; // Set Lower Equal Than
{6'b00_0010, 3'b011}: alu_operator_o = ALU_SLETU; // Set Lower Equal Than Unsigned
{6'b00_0001, 3'b000}: mult_int_en_o = 1'b1; // mul
default: begin
illegal_insn_o = 1'b1;
end
endcase
end
end
////////////////////////////////////////////////
// ____ ____ _____ ____ ___ _ _ //
// / ___|| _ \| ____/ ___|_ _| / \ | | //
// \___ \| |_) | _|| | | | / _ \ | | //
// ___) | __/| |__| |___ | | / ___ \| |___ //
// |____/|_| |_____\____|___/_/ \_\_____| //
// //
////////////////////////////////////////////////
OPCODE_SYSTEM: begin
if (instr_rdata_i[14:12] == 3'b000)
begin
// non CSR related SYSTEM instructions
unique case (instr_rdata_i[31:20])
12'h000: // ECALL
begin
// environment (system) call
ecall_insn_o = 1'b1;
end
12'h001: // ebreak
begin
// debugger trap
ebrk_insn = 1'b1;
end
12'h302: // mret
begin
eret_insn = 1'b1;
end
12'h105: // wfi
begin
// flush pipeline
pipe_flush = 1'b1;
end
default:
begin
illegal_insn_o = 1'b1;
end
endcase
end
else
begin
// instruction to read/modify CSR
csr_access_o = 1'b1;
regfile_alu_we = 1'b1;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_a_mux_sel_o = IMMA_Z;
imm_b_mux_sel_o = IMMB_I; // CSR address is encoded in I imm
if (instr_rdata_i[14] == 1'b1) begin
// rs1 field is used as immediate
alu_op_a_mux_sel_o = OP_A_IMM;
end else begin
rega_used_o = 1'b1;
alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD;
end
unique case (instr_rdata_i[13:12])
2'b01: csr_op = CSR_OP_WRITE;
2'b10: csr_op = CSR_OP_SET;
2'b11: csr_op = CSR_OP_CLEAR;
default: illegal_insn_o = 1'b1;
endcase
end
end
default: begin
illegal_insn_o = 1'b1;
end
endcase
// make sure invalid compressed instruction causes an exception
if (illegal_c_insn_i) begin
illegal_insn_o = 1'b1;
end
// misaligned access was detected by the LSU
// TODO: this section should eventually be moved out of the decoder
if (data_misaligned_i == 1'b1)
begin
// only part of the pipeline is unstalled, make sure that the
// correct operands are sent to the AGU
alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD;
alu_op_b_mux_sel_o = OP_B_IMM;
imm_b_mux_sel_o = IMMB_PCINCR;
// if prepost increments are used, we do not write back the
// second address since the first calculated address was
// the correct one
regfile_alu_we = 1'b0;
end
end
// deassert we signals (in case of stalls)
assign regfile_mem_we_o = (deassert_we_i) ? 1'b0 : regfile_mem_we;
assign regfile_alu_we_o = (deassert_we_i) ? 1'b0 : regfile_alu_we;
assign data_req_o = (deassert_we_i) ? 1'b0 : data_req;
assign csr_op_o = (deassert_we_i) ? CSR_OP_NONE : csr_op;
assign jump_in_id_o = (deassert_we_i) ? BRANCH_NONE : jump_in_id;
assign ebrk_insn_o = (deassert_we_i) ? 1'b0 : ebrk_insn;
assign eret_insn_o = (deassert_we_i) ? 1'b0 : eret_insn; // TODO: do not deassert?
assign pipe_flush_o = (deassert_we_i) ? 1'b0 : pipe_flush; // TODO: do not deassert?
assign jump_in_dec_o = jump_in_id;
endmodule // controller

97
mult_dir/ex_stage.sv Normal file
View file

@ -0,0 +1,97 @@
// Copyright 2017 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Renzo Andri - andrire@student.ethz.ch //
// //
// Additional contributions by: //
// Igor Loi - igor.loi@unibo.it //
// Sven Stucki - svstucki@student.ethz.ch //
// Andreas Traber - atraber@iis.ee.ethz.ch //
// Markus Wegmann - markus.wegmann@technokrat.ch //
// Davide Schiavone - pschiavo@iis.ee.ethz.ch //
// //
// Design Name: Execute stage //
// Project Name: zero-riscy //
// Language: SystemVerilog //
// //
// Description: Execution block: Hosts ALU and MUL unit //
// ALU: computes additions/subtractions/comparisons //
// MAC: //
// //
////////////////////////////////////////////////////////////////////////////////
`include "zeroriscy_config.sv"
import zeroriscy_defines::*;
module zeroriscy_ex_block
(
// ALU signals from ID stage
input logic [ALU_OP_WIDTH-1:0] alu_operator_i,
input logic mult_en_i,
input logic [31:0] alu_operand_a_i,
input logic [31:0] alu_operand_b_i,
// input logic [31:0] mult_operand_a_i,
// input logic [31:0] mult_operand_b_i,
input logic [4:0] shifter_amt_i,
output logic [31:0] alu_adder_result_ex_o,
output logic [31:0] regfile_wdata_ex_o,
// To IF: Jump and branch target and decision
output logic [31:0] jump_target_o,
output logic branch_decision_o,
output logic ex_ready_o,
output logic ex_valid_o
);
logic [31:0] alu_result, mult_result;
logic alu_cmp_result;
assign regfile_wdata_ex_o = mult_en_i ? mult_result : alu_result;
// branch handling
assign branch_decision_o = alu_cmp_result;
assign jump_target_o = alu_adder_result_ex_o;
////////////////////////////
// _ _ _ _ //
// / \ | | | | | | //
// / _ \ | | | | | | //
// / ___ \| |__| |_| | //
// /_/ \_\_____\___/ //
// //
////////////////////////////
zeroriscy_alu alu_i
(
.operator_i ( alu_operator_i ),
.operand_a_i ( alu_operand_a_i ),
.operand_b_i ( alu_operand_b_i ),
.mult_en_i ( mult_en_i ),
.shifter_amt_i ( shifter_amt_i ),
.adder_result_o (alu_adder_result_ex_o ),
.result_o ( alu_result ),
.comparison_result_o ( alu_cmp_result )
);
/*
zeroriscy_mult mult_i
(
.op_a_i ( mult_operand_a_i ),
.op_b_i ( mult_operand_b_i ),
.result_o ( mult_result )
);
*/
endmodule

798
mult_dir/id_stage.sv Normal file
View file

@ -0,0 +1,798 @@
// Copyright 2017 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Renzo Andri - andrire@student.ethz.ch //
// //
// Additional contributions by: //
// Igor Loi - igor.loi@unibo.it //
// Andreas Traber - atraber@student.ethz.ch //
// Sven Stucki - svstucki@student.ethz.ch //
// Davide Schiavone - pschiavo@iis.ee.ethz.ch //
// //
// Design Name: Instruction Decode Stage //
// Project Name: zero-riscy //
// Language: SystemVerilog //
// //
// Description: Decode stage of the core. It decodes the instructions //
// and hosts the register file. //
// //
////////////////////////////////////////////////////////////////////////////////
`include "zeroriscy_config.sv"
import zeroriscy_defines::*;
// Source/Destination register instruction index
`define REG_S1 19:15
`define REG_S2 24:20
`define REG_S3 29:25
`define REG_D 11:07
module zeroriscy_id_stage
#(
parameter REG_ADDR_WIDTH = 5
)
(
input logic clk,
input logic rst_n,
input logic test_en_i,
input logic fetch_enable_i,
output logic ctrl_busy_o,
output logic is_decoding_o,
// Interface to IF stage
input logic instr_valid_i,
input logic [31:0] instr_rdata_i, // comes from pipeline of IF stage
output logic instr_req_o,
// Jumps and branches
output logic branch_in_ex_o,
input logic branch_decision_i,
// IF and ID stage signals
output logic clear_instr_valid_o,
output logic pc_set_o,
output logic [2:0] pc_mux_o,
output logic [1:0] exc_pc_mux_o,
input logic illegal_c_insn_i,
input logic is_compressed_i,
input logic [31:0] pc_if_i,
input logic [31:0] pc_id_i,
// Stalls
output logic halt_if_o, // controller requests a halt of the IF stage
input logic if_ready_i, // IF stage is done
output logic id_ready_o, // ID stage is ready for the next instruction
input logic lsu_ready_ex_i,
input logic data_valid_lsu_i,
input logic wb_ready_i,
input logic if_valid_i, // IF stage is done
output logic id_valid_o, // ID stage is done
input logic wb_valid_i, // WB stage is done
// ALU
output logic [ALU_OP_WIDTH-1:0] alu_operator_ex_o,
output logic [31:0] alu_operand_a_ex_o,
output logic [31:0] alu_operand_b_ex_o,
output logic [31:0] alu_operand_c_ex_o, // Still needed if 2r1w reg file used
output logic [4:0] shifter_amt_ex_o,
// MUL
output logic mult_en_ex_o,
output logic [31:0] mult_operand_a_ex_o,
output logic [31:0] mult_operand_b_ex_o,
// CSR ID
output logic csr_access_ex_o,
output logic [1:0] csr_op_ex_o,
// Interface to load store unit
output logic data_req_ex_o,
output logic data_we_ex_o,
output logic [1:0] data_type_ex_o,
output logic data_sign_ext_ex_o,
output logic [1:0] data_reg_offset_ex_o,
output logic data_load_event_ex_o,
input logic data_misaligned_i,
input logic [31:0] misaligned_addr_i,
// Interrupt signals
input logic irq_i,
input logic [4:0] irq_id_i,
input logic irq_enable_i,
output logic irq_ack_o,
output logic [5:0] exc_cause_o,
output logic save_exc_cause_o,
output logic exc_save_if_o,
output logic exc_save_id_o,
output logic exc_save_takenbranch_o,
output logic exc_restore_id_o,
input logic lsu_load_err_i,
input logic lsu_store_err_i,
// Debug Unit Signals
input logic [DBG_SETS_W-1:0] dbg_settings_i,
input logic dbg_req_i,
output logic dbg_ack_o,
input logic dbg_stall_i,
output logic dbg_trap_o,
input logic dbg_reg_rreq_i,
input logic [(REG_ADDR_WIDTH-1):0] dbg_reg_raddr_i,
output logic [31:0] dbg_reg_rdata_o,
input logic dbg_reg_wreq_i,
input logic [(REG_ADDR_WIDTH-1):0] dbg_reg_waddr_i,
input logic [31:0] dbg_reg_wdata_i,
input logic dbg_jump_req_i,
// Write back signal
input logic [31:0] regfile_wdata_wb_i,
input logic [31:0] regfile_wdata_ex_i,
input logic [31:0] csr_rdata_i,
// Performance Counters
output logic perf_jump_o, // we are executing a jump instruction
output logic perf_jr_stall_o, // jump-register-hazard
output logic perf_ld_stall_o // load-use-hazard
);
logic [31:0] instr;
// Decoder/Controller ID stage internal signals
logic deassert_we;
logic illegal_insn_dec;
logic ebrk_insn;
logic eret_insn_dec;
logic ecall_insn_dec;
logic pipe_flush_dec;
logic rega_used_dec;
logic regb_used_dec;
logic branch_taken_ex;
logic [1:0] jump_in_id;
logic [1:0] jump_in_dec;
logic branch_2nd_stage;
logic jr_stall;
logic load_stall;
logic mult_stall;
logic halt_id;
//FSM signals to write back multi cycles instructions
logic regfile_we, regfile_we_q;
logic select_data_lsu;
// Immediate decoding and sign extension
logic [31:0] imm_i_type;
logic [31:0] imm_iz_type;
logic [31:0] imm_s_type;
logic [31:0] imm_sb_type;
logic [31:0] imm_u_type;
logic [31:0] imm_uj_type;
logic [31:0] imm_z_type;
logic [31:0] imm_s2_type;
logic [31:0] imm_bi_type;
logic [31:0] imm_s3_type;
logic [31:0] imm_vs_type;
logic [31:0] imm_vu_type;
logic [31:0] imm_a; // contains the immediate for operand b
logic [31:0] imm_b; // contains the immediate for operand b
// Signals running between controller and exception controller
logic int_req, ext_req, exc_ack; // handshake
// Register file interface
logic [(REG_ADDR_WIDTH-1):0] regfile_addr_ra_id;
logic [(REG_ADDR_WIDTH-1):0] regfile_addr_rb_id;
logic [(REG_ADDR_WIDTH-1):0] regfile_alu_waddr_id;
logic regfile_alu_we_id;
logic [31:0] regfile_data_ra_id;
logic [31:0] regfile_data_rb_id;
// ALU Control
logic [ALU_OP_WIDTH-1:0] alu_operator;
logic [2:0] alu_op_a_mux_sel;
logic [2:0] alu_op_b_mux_sel;
logic [1:0] alu_op_c_mux_sel;
logic [0:0] imm_a_mux_sel;
logic [3:0] imm_b_mux_sel;
// Multiplier Control
logic mult_int_en; // use integer multiplier
// Register Write Control
logic regfile_mem_we_id;
// Data Memory Control
logic data_we_id;
logic [1:0] data_type_id;
logic data_sign_ext_id;
logic [1:0] data_reg_offset_id;
logic data_req_id;
logic data_load_event_id;
// CSR control
logic csr_access;
logic [1:0] csr_op;
// Forwarding
logic [1:0] operand_a_fw_mux_sel;
logic [31:0] operand_a_fw_id;
logic [31:0] operand_b_fw_id;
logic [31:0] operand_b;
logic [31:0] alu_operand_a;
logic [31:0] alu_operand_b;
logic [31:0] alu_operand_c; // Still needed if 2r1w reg file used
assign instr = instr_rdata_i;
// immediate extraction and sign extension
assign imm_i_type = { {20 {instr[31]}}, instr[31:20] };
assign imm_iz_type = { 20'b0, instr[31:20] };
assign imm_s_type = { {20 {instr[31]}}, instr[31:25], instr[11:7] };
assign imm_sb_type = { {19 {instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0 };
assign imm_u_type = { instr[31:12], 12'b0 };
assign imm_uj_type = { {12 {instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0 };
// immediate for CSR manipulatin (zero extended)
assign imm_z_type = { 27'b0, instr[`REG_S1] };
assign imm_s2_type = { 27'b0, instr[24:20] };
assign imm_bi_type = { {27{instr[24]}}, instr[24:20] };
assign imm_s3_type = { 27'b0, instr[29:25] };
assign imm_vs_type = { {26 {instr[24]}}, instr[24:20], instr[25] };
assign imm_vu_type = { 26'b0, instr[24:20], instr[25] };
//---------------------------------------------------------------------------
// source register selection
//---------------------------------------------------------------------------
assign regfile_addr_ra_id = instr[`REG_S1];
assign regfile_addr_rb_id = instr[`REG_S2];
//---------------------------------------------------------------------------
// destination registers
//---------------------------------------------------------------------------
assign regfile_alu_waddr_id = instr[`REG_D];
// kill instruction in the IF/ID stage by setting the instr_valid_id control
// signal to 0 for instructions that are done
assign clear_instr_valid_o = id_ready_o | halt_id;
assign branch_taken_ex = branch_in_ex_o & (branch_decision_i | branch_2nd_stage);
////////////////////////////////////////////////////////
// ___ _ _ //
// / _ \ _ __ ___ _ __ __ _ _ __ __| | / \ //
// | | | | '_ \ / _ \ '__/ _` | '_ \ / _` | / _ \ //
// | |_| | |_) | __/ | | (_| | | | | (_| | / ___ \ //
// \___/| .__/ \___|_| \__,_|_| |_|\__,_| /_/ \_\ //
// |_| //
////////////////////////////////////////////////////////
// ALU_Op_a Mux
always_comb
begin : alu_operand_a_mux
case (alu_op_a_mux_sel)
OP_A_REGA_OR_FWD: alu_operand_a = operand_a_fw_id;
//OP_A_REGB_OR_FWD: alu_operand_a = regfile_data_rb_id;
OP_A_CURRPC: alu_operand_a = pc_id_i;
OP_A_IMM: alu_operand_a = imm_a;
default: alu_operand_a = operand_a_fw_id;
endcase; // case (alu_op_a_mux_sel)
end
always_comb
begin : immediate_a_mux
unique case (imm_a_mux_sel)
IMMA_Z: imm_a = imm_z_type;
IMMA_ZERO: imm_a = '0;
default: imm_a = '0;
endcase
end
// Operand a forwarding mux used with LSU instructions
always_comb
begin : operand_a_fw_mux
case (operand_a_fw_mux_sel)
SEL_MISALIGNED: operand_a_fw_id = misaligned_addr_i;
SEL_REGFILE: operand_a_fw_id = regfile_data_ra_id;
default: operand_a_fw_id = regfile_data_ra_id;
endcase; // case (operand_a_fw_mux_sel)
end
//////////////////////////////////////////////////////
// ___ _ ____ //
// / _ \ _ __ ___ _ __ __ _ _ __ __| | | __ ) //
// | | | | '_ \ / _ \ '__/ _` | '_ \ / _` | | _ \ //
// | |_| | |_) | __/ | | (_| | | | | (_| | | |_) | //
// \___/| .__/ \___|_| \__,_|_| |_|\__,_| |____/ //
// |_| //
//////////////////////////////////////////////////////
// Immediate Mux for operand B
always_comb
begin : immediate_b_mux
unique case (imm_b_mux_sel)
IMMB_I: imm_b = imm_i_type;
IMMB_S: imm_b = imm_s_type;
IMMB_U: imm_b = imm_u_type;
IMMB_PCINCR: imm_b = (is_compressed_i && (~data_misaligned_i)) ? 32'h2 : 32'h4;
IMMB_S2: imm_b = imm_s2_type;
IMMB_BI: imm_b = imm_bi_type;
IMMB_S3: imm_b = imm_s3_type;
IMMB_VS: imm_b = imm_vs_type;
IMMB_VU: imm_b = imm_vu_type;
IMMB_UJ: imm_b = imm_uj_type;
IMMB_SB: imm_b = imm_sb_type;
default: imm_b = imm_i_type;
endcase
end
// ALU_Op_b Mux
always_comb
begin : alu_operand_b_mux
case (alu_op_b_mux_sel)
//OP_B_REGA_OR_FWD: operand_b = regfile_data_ra_id;
OP_B_REGB_OR_FWD: operand_b = regfile_data_rb_id;
OP_B_IMM: operand_b = imm_b;
OP_B_ZERO: operand_b = '0;
default: operand_b = regfile_data_rb_id;
endcase // case (alu_op_b_mux_sel)
end
assign alu_operand_b = operand_b;
assign operand_b_fw_id = operand_b;
//////////////////////////////////////////////////////
// ___ _ ____ //
// / _ \ _ __ ___ _ __ __ _ _ __ __| | / ___| //
// | | | | '_ \ / _ \ '__/ _` | '_ \ / _` | | | //
// | |_| | |_) | __/ | | (_| | | | | (_| | | |___ //
// \___/| .__/ \___|_| \__,_|_| |_|\__,_| \____| //
// |_| //
//////////////////////////////////////////////////////
// ALU OP C Mux, jump or store. TODO: Change it
/*
always_comb
begin : alu_operand_c_mux
case (alu_op_c_mux_sel)
OP_C_REGB_OR_FWD: alu_operand_c = regfile_data_rb_id;
OP_C_RA: alu_operand_c = pc_if_i; // this is the return address
default: alu_operand_c = regfile_data_rb_id;
endcase // case (alu_op_c_mux_sel)
end
*/
assign alu_operand_c = regfile_data_rb_id;
/////////////////////////////////////////////////////////
// ____ _____ ____ ___ ____ _____ _____ ____ ____ //
// | _ \| ____/ ___|_ _/ ___|_ _| ____| _ \/ ___| //
// | |_) | _|| | _ | |\___ \ | | | _| | |_) \___ \ //
// | _ <| |__| |_| || | ___) || | | |___| _ < ___) | //
// |_| \_\_____\____|___|____/ |_| |_____|_| \_\____/ //
// //
/////////////////////////////////////////////////////////
logic [31:0] regfile_wdata_mux;
logic regfile_we_mux;
logic [4:0] regfile_waddr_mux;
//TODO: add assertion
// Register File mux
always_comb
begin
if(dbg_reg_wreq_i) begin
regfile_wdata_mux = dbg_reg_wdata_i;
regfile_waddr_mux = dbg_reg_waddr_i;
regfile_we_mux = 1'b1;
end else begin
regfile_we_mux = regfile_we;
regfile_waddr_mux = regfile_alu_waddr_id;
if (select_data_lsu)
regfile_wdata_mux = regfile_wdata_wb_i;
else
if (csr_access)
regfile_wdata_mux = csr_rdata_i;
else
//TODO: modify this
if ((jump_in_id == BRANCH_JALR) || (jump_in_id == BRANCH_JAL))
regfile_wdata_mux = pc_if_i;
else
regfile_wdata_mux = regfile_wdata_ex_i;
end
end
zeroriscy_register_file registers_i
(
.clk ( clk ),
.rst_n ( rst_n ),
.test_en_i ( test_en_i ),
// Read port a
.raddr_a_i ( regfile_addr_ra_id ),
.rdata_a_o ( regfile_data_ra_id ),
// Read port b
.raddr_b_i ( (dbg_reg_rreq_i == 1'b0) ? regfile_addr_rb_id : dbg_reg_raddr_i ),
.rdata_b_o ( regfile_data_rb_id ),
// write port
.waddr_a_i ( regfile_waddr_mux ),
.wdata_a_i ( regfile_wdata_mux ),
.we_a_i ( regfile_we_mux )
);
assign dbg_reg_rdata_o = regfile_data_rb_id;
///////////////////////////////////////////////
// ____ _____ ____ ___ ____ _____ ____ //
// | _ \| ____/ ___/ _ \| _ \| ____| _ \ //
// | | | | _|| | | | | | | | | _| | |_) | //
// | |_| | |__| |__| |_| | |_| | |___| _ < //
// |____/|_____\____\___/|____/|_____|_| \_\ //
// //
///////////////////////////////////////////////
zeroriscy_decoder decoder_i
(
// controller related signals
.deassert_we_i ( deassert_we ),
.data_misaligned_i ( data_misaligned_i ),
.branch_2nd_stage_i ( branch_2nd_stage ),
.illegal_insn_o ( illegal_insn_dec ),
.ebrk_insn_o ( ebrk_insn ),
.eret_insn_o ( eret_insn_dec ),
.ecall_insn_o ( ecall_insn_dec ),
.pipe_flush_o ( pipe_flush_dec ),
.rega_used_o ( rega_used_dec ),
.regb_used_o ( regb_used_dec ),
// from IF/ID pipeline
.instr_rdata_i ( instr ),
.illegal_c_insn_i ( illegal_c_insn_i ),
// ALU signals
.alu_operator_o ( alu_operator ),
.alu_op_a_mux_sel_o ( alu_op_a_mux_sel ),
.alu_op_b_mux_sel_o ( alu_op_b_mux_sel ),
.alu_op_c_mux_sel_o ( alu_op_c_mux_sel ),
.imm_a_mux_sel_o ( imm_a_mux_sel ),
.imm_b_mux_sel_o ( imm_b_mux_sel ),
.mult_int_en_o ( mult_int_en ),
// Register file control signals
.regfile_mem_we_o ( regfile_mem_we_id ),
.regfile_alu_we_o ( regfile_alu_we_id ),
// CSR control signals
.csr_access_o ( csr_access ),
.csr_op_o ( csr_op ),
// Data bus interface
.data_req_o ( data_req_id ),
.data_we_o ( data_we_id ),
.data_type_o ( data_type_id ),
.data_sign_extension_o ( data_sign_ext_id ),
.data_reg_offset_o ( data_reg_offset_id ),
.data_load_event_o ( data_load_event_id ),
// jump/branches
.jump_in_dec_o ( jump_in_dec ),
.jump_in_id_o ( jump_in_id )
);
////////////////////////////////////////////////////////////////////
// ____ ___ _ _ _____ ____ ___ _ _ _____ ____ //
// / ___/ _ \| \ | |_ _| _ \ / _ \| | | | | ____| _ \ //
// | | | | | | \| | | | | |_) | | | | | | | | _| | |_) | //
// | |__| |_| | |\ | | | | _ <| |_| | |___| |___| |___| _ < //
// \____\___/|_| \_| |_| |_| \_\\___/|_____|_____|_____|_| \_\ //
// //
////////////////////////////////////////////////////////////////////
zeroriscy_controller controller_i
(
.clk ( clk ),
.rst_n ( rst_n ),
.fetch_enable_i ( fetch_enable_i ),
.ctrl_busy_o ( ctrl_busy_o ),
.is_decoding_o ( is_decoding_o ),
// decoder related signals
.deassert_we_o ( deassert_we ),
.illegal_insn_i ( illegal_insn_dec ),
.eret_insn_i ( eret_insn_dec ),
.pipe_flush_i ( pipe_flush_dec ),
// from IF/ID pipeline
.instr_valid_i ( instr_valid_i ),
.instr_rdata_i ( instr ),
// from prefetcher
.instr_req_o ( instr_req_o ),
// to prefetcher
.pc_set_o ( pc_set_o ),
.pc_mux_o ( pc_mux_o ),
// LSU
.data_req_ex_i ( data_req_ex_o ),
.data_misaligned_i ( data_misaligned_i ),
.data_load_event_i ( data_load_event_ex_o ),
// jump/branch control
.branch_taken_ex_i ( branch_taken_ex ),
.jump_in_id_i ( jump_in_id ),
.jump_in_dec_i ( jump_in_dec ),
// Exception Controller Signals
.int_req_i ( int_req ),
.ext_req_i ( ext_req ),
.exc_ack_o ( exc_ack ),
.irq_ack_o ( irq_ack_o ),
.exc_save_if_o ( exc_save_if_o ),
.exc_save_id_o ( exc_save_id_o ),
.exc_save_takenbranch_o ( exc_save_takenbranch_o ),
.exc_restore_id_o ( exc_restore_id_o ),
// Debug Unit Signals
.dbg_req_i ( dbg_req_i ),
.dbg_ack_o ( dbg_ack_o ),
.dbg_stall_i ( dbg_stall_i ),
.dbg_jump_req_i ( dbg_jump_req_i ),
// Forwarding signals
.operand_a_fw_mux_sel_o ( operand_a_fw_mux_sel ),
// Stall signals
.halt_if_o ( halt_if_o ),
.halt_id_o ( halt_id ),
.branch_2nd_stage_o ( branch_2nd_stage ),
.jr_stall_o ( jr_stall ),
.id_ready_i ( id_ready_o ),
.if_valid_i ( if_valid_i ),
.wb_valid_i ( wb_valid_i ),
// Performance Counters
.perf_jump_o ( perf_jump_o ),
.perf_jr_stall_o ( perf_jr_stall_o ),
.perf_ld_stall_o ( perf_ld_stall_o )
);
///////////////////////////////////////////////////////////////////////
// _____ ____ _ _ _ //
// | ____|_ _____ / ___|___ _ __ | |_ _ __ ___ | | | ___ _ __ //
// | _| \ \/ / __| | | / _ \| '_ \| __| '__/ _ \| | |/ _ \ '__| //
// | |___ > < (__ _ | |__| (_) | | | | |_| | | (_) | | | __/ | //
// |_____/_/\_\___(_) \____\___/|_| |_|\__|_| \___/|_|_|\___|_| //
// //
///////////////////////////////////////////////////////////////////////
zeroriscy_exc_controller exc_controller_i
(
.clk ( clk ),
.rst_n ( rst_n ),
// to controller
.int_req_o ( int_req ),
.ext_req_o ( ext_req ),
.ack_i ( exc_ack ),
.trap_o ( dbg_trap_o ),
// to IF stage
.pc_mux_o ( exc_pc_mux_o ),
// Interrupt signals
.irq_i ( irq_i ),
.irq_id_i ( irq_id_i ),
.irq_enable_i ( irq_enable_i ),
.ebrk_insn_i ( is_decoding_o & ebrk_insn ),
.illegal_insn_i ( is_decoding_o & illegal_insn_dec ),
.ecall_insn_i ( is_decoding_o & ecall_insn_dec ),
.eret_insn_i ( is_decoding_o & eret_insn_dec ),
.lsu_load_err_i ( lsu_load_err_i ),
.lsu_store_err_i ( lsu_store_err_i ),
.cause_o ( exc_cause_o ),
.save_cause_o ( save_exc_cause_o ),
.dbg_settings_i ( dbg_settings_i )
);
/////////////////////////////////////
// ___ ____ _______ __ //
// |_ _| _ \ | ____\ \/ / //
// | || | | |_____| _| \ / // - merging network
// | || |_| |_____| |___ / \ //
// |___|____/ |_____/_/\_\ //
// //
/////////////////////////////////////
always_comb
begin
data_we_ex_o = data_we_id;
data_type_ex_o = data_type_id;
data_sign_ext_ex_o = data_sign_ext_id;
data_reg_offset_ex_o = 2'b0;
alu_operator_ex_o = alu_operator;
alu_operand_a_ex_o = alu_operand_a;
alu_operand_b_ex_o = alu_operand_b;
alu_operand_c_ex_o = alu_operand_c;
// mult_en_ex_o = mult_int_en;
csr_access_ex_o = csr_access;
csr_op_ex_o = id_ready_o ? csr_op : CSR_OP_NONE;
data_req_ex_o = data_req_id;
data_reg_offset_ex_o = data_reg_offset_id;
data_load_event_ex_o = ((data_req_id & (~halt_id)) ? data_load_event_id : 1'b0);
branch_in_ex_o = (jump_in_dec == BRANCH_COND);
end
logic [4:0] mult_cycle_n, mult_cycle_q;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_en_ex_o
if(~rst_n) begin
mult_cycle_q <= '0;
end else begin
if(mult_int_en) begin
mult_cycle_q <= mult_cycle_n;
end
end
end
assign mult_en_ex_o = mult_int_en;
assign mult_operand_a_ex_o = alu_operand_a;
assign mult_operand_b_ex_o = alu_operand_b;
enum logic { IDLE, WAIT_LSU } id_wb_fsm_cs, id_wb_fsm_ns;
///////////////////////////////////////
// ID-EX/WB Pipeline Register //
///////////////////////////////////////
always_ff @(posedge clk, negedge rst_n)
begin : EX_WB_Pipeline_Register
if (~rst_n)
begin
regfile_we_q <= 1'b0;
id_wb_fsm_cs <= IDLE;
end
else begin
regfile_we_q <= regfile_mem_we_id & (load_stall | mult_stall);
id_wb_fsm_cs <= id_wb_fsm_ns;
end
end
///////////////////////////////////////
// ID-EX/WB FMS //
///////////////////////////////////////
always_comb
begin
id_wb_fsm_ns = id_wb_fsm_cs;
regfile_we = regfile_alu_we_id & (~halt_id);
load_stall = 1'b0;
mult_stall = 1'b0;
select_data_lsu = 2'b0;
shifter_amt_ex_o = alu_operand_b_ex_o[4:0];
mult_cycle_n = mult_cycle_q + 5'd1;
unique case (id_wb_fsm_cs)
IDLE:
begin
//if instr not valid, deassert and so it is 0
if(data_req_ex_o) begin
//LSU operation
regfile_we = 1'b0;
id_wb_fsm_ns = WAIT_LSU;
load_stall = 1'b1;
end
if(mult_int_en) begin
//MUL operation
regfile_we = 1'b0;
id_wb_fsm_ns = WAIT_LSU;
mult_stall = 1'b1;
shifter_amt_ex_o = mult_cycle_q;
end
end
WAIT_LSU:
begin
shifter_amt_ex_o = mult_cycle_q;
if(data_valid_lsu_i) begin
//LSU operation
regfile_we = regfile_we_q;
id_wb_fsm_ns = IDLE;
load_stall = 1'b0;
select_data_lsu = 1'b1;
end
else
load_stall = 1'b1;
if(mult_cycle_q==5'd31) begin
//MUL operation
regfile_we = regfile_we_q;
id_wb_fsm_ns = IDLE;
mult_stall = 1'b0;
mult_cycle_n = 5'd0;
end
end
endcase
end
// stall control
assign id_ready_o = (~jr_stall) & (~load_stall) & (~mult_stall);
assign id_valid_o = (~halt_id) & id_ready_o;
//----------------------------------------------------------------------------
// Assertions
//----------------------------------------------------------------------------
// make sure that branch decision is valid when jumping
assert property (
@(posedge clk) (branch_in_ex_o) |-> (branch_decision_i !== 1'bx) ) else $display("Branch decision is X");
`ifdef CHECK_MISALIGNED
assert property (
@(posedge clk) (~data_misaligned_i) ) else $display("Misaligned memory access at %x",pc_id_i);
`endif
// the instruction delivered to the ID stage should always be valid
assert property (
@(posedge clk) (instr_valid_i & (~illegal_c_insn_i)) |-> (!$isunknown(instr_rdata_i)) ) else $display("Instruction is valid, but has at least one X");
endmodule

57
mult_dir/mult.sv Normal file
View file

@ -0,0 +1,57 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
module zeroriscy_mult
(
// input logic clk,
// input logic rst_n,
// input logic enable_i,
// input logic [ 2:0] operator_i,
// integer and short multiplier
// input logic short_subword_i,
// input logic [ 1:0] short_signed_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
// input logic [31:0] op_c_i,
// input logic [ 4:0] imm_i,
output logic [31:0] result_o
// output logic multicycle_o,
// output logic ready_o,
// input logic ex_ready_i
);
logic [31:0] mult_extended;
assign mult_extended = $signed(op_a_i[7:0])*$signed(op_b_i[7:0]);
assign result_o = mult_extended;
endmodule // zeroriscy_mult

788
mult_dir/zeroriscy_core.sv Normal file
View file

@ -0,0 +1,788 @@
// Copyright 2017 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Igor Loi - igor.loi@unibo.it //
// Andreas Traber - atraber@student.ethz.ch //
// Sven Stucki - svstucki@student.ethz.ch //
// Markus Wegmann - markus.wegmann@technokrat.ch //
// Davide Schiavone - pschiavo@iis.ee.ethz.ch //
// //
// Design Name: Top level module //
// Project Name: zero-riscy //
// Language: SystemVerilog //
// //
// Description: Top level module of the RISC-V core. //
// //
////////////////////////////////////////////////////////////////////////////////
`include "zeroriscy_config.sv"
import zeroriscy_defines::*;
module zeroriscy_core
#(
parameter N_EXT_PERF_COUNTERS = 0,
parameter INSTR_RDATA_WIDTH = 32,
parameter REG_ADDR_WIDTH = 5
)
(
// Clock and Reset
input logic clk_i,
input logic rst_ni,
input logic clock_en_i, // enable clock, otherwise it is gated
input logic test_en_i, // enable all clock gates for testing
// Core ID, Cluster ID and boot address are considered more or less static
input logic [ 3:0] core_id_i,
input logic [ 5:0] cluster_id_i,
input logic [31:0] boot_addr_i,
// Instruction memory interface
output logic instr_req_o,
input logic instr_gnt_i,
input logic instr_rvalid_i,
output logic [31:0] instr_addr_o,
input logic [INSTR_RDATA_WIDTH-1:0] instr_rdata_i,
// Data memory interface
output logic data_req_o,
input logic data_gnt_i,
input logic data_rvalid_i,
output logic data_we_o,
output logic [3:0] data_be_o,
output logic [31:0] data_addr_o,
output logic [31:0] data_wdata_o,
input logic [31:0] data_rdata_i,
input logic data_err_i,
// Interrupt inputs
input logic irq_i, // level sensitive IR lines
input logic [4:0] irq_id_i,
output logic irq_ack_o, // irq ack
// Debug Interface
input logic debug_req_i,
output logic debug_gnt_o,
output logic debug_rvalid_o,
input logic [14:0] debug_addr_i,
input logic debug_we_i,
input logic [31:0] debug_wdata_i,
output logic [31:0] debug_rdata_o,
output logic debug_halted_o,
input logic debug_halt_i,
input logic debug_resume_i,
// CPU Control Signals
input logic fetch_enable_i,
output logic core_busy_o,
input logic [N_EXT_PERF_COUNTERS-1:0] ext_perf_counters_i
);
localparam N_HWLP = 2;
localparam N_HWLP_BITS = $clog2(N_HWLP);
// IF/ID signals
logic instr_valid_id;
logic [31:0] instr_rdata_id; // Instruction sampled inside IF stage
logic is_compressed_id;
logic illegal_c_insn_id; // Illegal compressed instruction sent to ID stage
logic [31:0] pc_if; // Program counter in IF stage
logic [31:0] pc_id; // Program counter in ID stage
logic clear_instr_valid;
logic pc_set;
logic [2:0] pc_mux_id; // Mux selector for next PC
logic [1:0] exc_pc_mux_id; // Mux selector for exception PC
logic lsu_load_err;
logic lsu_store_err;
// ID performance counter signals
logic is_decoding;
logic data_misaligned;
logic [31:0] misaligned_addr;
// Jump and branch target and decision (EX->IF)
logic [31:0] jump_target_ex;
logic branch_in_ex;
logic branch_decision;
logic ctrl_busy;
logic if_busy;
logic lsu_busy;
// ALU Control
logic [ALU_OP_WIDTH-1:0] alu_operator_ex;
logic [31:0] alu_operand_a_ex;
logic [31:0] alu_operand_b_ex;
logic [31:0] mult_operand_a_ex;
logic [31:0] mult_operand_b_ex;
logic [31:0] alu_operand_c_ex;
logic [ 4:0] shifter_amt;
logic [31:0] alu_adder_result_ex; // Used to forward computed address to LSU
logic [31:0] regfile_wdata_ex;
// Multiplier Control
logic mult_en_ex;
// CSR control
logic csr_access_ex;
logic [1:0] csr_op_ex;
logic csr_access;
logic [1:0] csr_op;
logic [11:0] csr_addr;
logic [11:0] csr_addr_int;
logic [31:0] csr_rdata;
logic [31:0] csr_wdata;
// Data Memory Control: From ID stage (id-ex pipe) <--> load store unit
logic data_we_ex;
logic [1:0] data_type_ex;
logic data_sign_ext_ex;
logic [1:0] data_reg_offset_ex;
logic data_req_ex;
logic [31:0] data_pc_ex;
logic data_load_event_ex;
logic data_misaligned_ex;
logic [31:0] regfile_wdata_lsu;
// stall control
logic halt_if;
logic if_ready;
logic id_ready;
logic if_valid;
logic id_valid;
logic wb_valid;
logic lsu_ready_ex;
logic lsu_ready_wb;
logic data_valid_lsu;
// Signals between instruction core interface and pipe (if and id stages)
logic instr_req_int; // Id stage asserts a req to instruction core interface
// Interrupts
logic irq_enable;
logic [31:0] mepc;
logic [5:0] exc_cause;
logic save_exc_cause;
logic exc_save_if;
logic exc_save_id;
logic exc_save_takenbranch_ex;
logic exc_restore_id;
// Debug Unit
logic [DBG_SETS_W-1:0] dbg_settings;
logic dbg_req;
logic dbg_ack;
logic dbg_stall;
logic dbg_trap;
// Debug GPR Read Access
logic dbg_reg_rreq;
logic [(REG_ADDR_WIDTH-1):0] dbg_reg_raddr;
logic [31:0] dbg_reg_rdata;
// Debug GPR Write Access
logic dbg_reg_wreq;
logic [(REG_ADDR_WIDTH-1):0] dbg_reg_waddr;
logic [31:0] dbg_reg_wdata;
// Debug CSR Access
logic dbg_csr_req;
logic [11:0] dbg_csr_addr;
logic dbg_csr_we;
logic [31:0] dbg_csr_wdata;
logic [31:0] dbg_jump_addr;
logic dbg_jump_req;
// Performance Counters
logic perf_imiss;
logic perf_jump;
logic perf_jr_stall;
logic perf_ld_stall;
//////////////////////////////////////////////////////////////////////////////////////////////
// ____ _ _ __ __ _ //
// / ___| | ___ ___| | __ | \/ | __ _ _ __ __ _ __ _ ___ _ __ ___ ___ _ __ | |_ //
// | | | |/ _ \ / __| |/ / | |\/| |/ _` | '_ \ / _` |/ _` |/ _ \ '_ ` _ \ / _ \ '_ \| __| //
// | |___| | (_) | (__| < | | | | (_| | | | | (_| | (_| | __/ | | | | | __/ | | | |_ //
// \____|_|\___/ \___|_|\_\ |_| |_|\__,_|_| |_|\__,_|\__, |\___|_| |_| |_|\___|_| |_|\__| //
// |___/ //
//////////////////////////////////////////////////////////////////////////////////////////////
logic clk;
logic clock_en;
logic dbg_busy;
logic sleeping;
// if we are sleeping on a barrier let's just wait on the instruction
// interface to finish loading instructions
assign core_busy_o = (data_load_event_ex & data_req_o) ? if_busy : (if_busy | ctrl_busy | lsu_busy);
assign dbg_busy = dbg_req | dbg_csr_req | dbg_jump_req | dbg_reg_wreq | debug_req_i;
assign clock_en = clock_en_i | core_busy_o | dbg_busy;
assign sleeping = (~fetch_enable_i) & (~core_busy_o);
// main clock gate of the core
// generates all clocks except the one for the debug unit which is
// independent
cluster_clock_gating core_clock_gate_i
(
.clk_i ( clk_i ),
.en_i ( clock_en ),
.test_en_i ( test_en_i ),
.clk_o ( clk )
);
//////////////////////////////////////////////////
// ___ _____ ____ _____ _ ____ _____ //
// |_ _| ___| / ___|_ _|/ \ / ___| ____| //
// | || |_ \___ \ | | / _ \| | _| _| //
// | || _| ___) || |/ ___ \ |_| | |___ //
// |___|_| |____/ |_/_/ \_\____|_____| //
// //
//////////////////////////////////////////////////
zeroriscy_if_stage
#(
.RDATA_WIDTH ( INSTR_RDATA_WIDTH )
)
if_stage_i
(
.clk ( clk ),
.rst_n ( rst_ni ),
// boot address (trap vector location)
.boot_addr_i ( boot_addr_i ),
// instruction request control
.req_i ( instr_req_int ),
// instruction cache interface
.instr_req_o ( instr_req_o ),
.instr_addr_o ( instr_addr_o ),
.instr_gnt_i ( instr_gnt_i ),
.instr_rvalid_i ( instr_rvalid_i ),
.instr_rdata_i ( instr_rdata_i ),
// outputs to ID stage
.instr_valid_id_o ( instr_valid_id ),
.instr_rdata_id_o ( instr_rdata_id ),
.is_compressed_id_o ( is_compressed_id ),
.illegal_c_insn_id_o ( illegal_c_insn_id ),
.pc_if_o ( pc_if ),
.pc_id_o ( pc_id ),
// control signals
.clear_instr_valid_i ( clear_instr_valid ),
.pc_set_i ( pc_set ),
.exception_pc_reg_i ( mepc ), // exception return address
.pc_mux_i ( pc_mux_id ), // sel for pc multiplexer
.exc_pc_mux_i ( exc_pc_mux_id ),
.exc_vec_pc_mux_i ( irq_id_i ),
// from debug unit
.dbg_jump_addr_i ( dbg_jump_addr ),
.dbg_jump_req_i ( dbg_jump_req ),
// Jump targets
.jump_target_ex_i ( jump_target_ex ),
// pipeline stalls
.halt_if_i ( halt_if ),
.if_ready_o ( if_ready ),
.id_ready_i ( id_ready ),
.if_valid_o ( if_valid ),
.if_busy_o ( if_busy ),
.perf_imiss_o ( perf_imiss )
);
/////////////////////////////////////////////////
// ___ ____ ____ _____ _ ____ _____ //
// |_ _| _ \ / ___|_ _|/ \ / ___| ____| //
// | || | | | \___ \ | | / _ \| | _| _| //
// | || |_| | ___) || |/ ___ \ |_| | |___ //
// |___|____/ |____/ |_/_/ \_\____|_____| //
// //
/////////////////////////////////////////////////
zeroriscy_id_stage
#(
)
id_stage_i
(
.clk ( clk ),
.rst_n ( rst_ni ),
.test_en_i ( test_en_i ),
// Processor Enable
.fetch_enable_i ( fetch_enable_i ),
.ctrl_busy_o ( ctrl_busy ),
.is_decoding_o ( is_decoding ),
// Interface to instruction memory
.instr_valid_i ( instr_valid_id ),
.instr_rdata_i ( instr_rdata_id ),
.instr_req_o ( instr_req_int ),
// Jumps and branches
.branch_in_ex_o ( branch_in_ex ),
.branch_decision_i ( branch_decision ),
// IF and ID control signals
.clear_instr_valid_o ( clear_instr_valid ),
.pc_set_o ( pc_set ),
.pc_mux_o ( pc_mux_id ),
.exc_pc_mux_o ( exc_pc_mux_id ),
.illegal_c_insn_i ( illegal_c_insn_id ),
.is_compressed_i ( is_compressed_id ),
.pc_if_i ( pc_if ),
.pc_id_i ( pc_id ),
// Stalls
.halt_if_o ( halt_if ),
.if_ready_i ( if_ready ),
.id_ready_o ( id_ready ),
.lsu_ready_ex_i ( lsu_ready_ex ),
.data_valid_lsu_i ( data_valid_lsu ),
.wb_ready_i ( lsu_ready_wb ),
.if_valid_i ( if_valid ),
.id_valid_o ( id_valid ),
.wb_valid_i ( wb_valid ),
.alu_operator_ex_o ( alu_operator_ex ),
.alu_operand_a_ex_o ( alu_operand_a_ex ),
.alu_operand_b_ex_o ( alu_operand_b_ex ),
//used in LSU for store instructions
//TODO: change name
.alu_operand_c_ex_o ( alu_operand_c_ex ),
.mult_en_ex_o ( mult_en_ex ),
.mult_operand_a_ex_o ( mult_operand_a_ex ),
.mult_operand_b_ex_o ( mult_operand_b_ex ),
.shifter_amt_ex_o ( shifter_amt ),
// CSR ID/EX
.csr_access_ex_o ( csr_access_ex ),
.csr_op_ex_o ( csr_op_ex ),
// LSU
.data_req_ex_o ( data_req_ex ), // to load store unit
.data_we_ex_o ( data_we_ex ), // to load store unit
.data_type_ex_o ( data_type_ex ), // to load store unit
.data_sign_ext_ex_o ( data_sign_ext_ex ), // to load store unit
.data_reg_offset_ex_o ( data_reg_offset_ex ), // to load store unit
.data_load_event_ex_o ( data_load_event_ex ), // to load store unit
.data_misaligned_i ( data_misaligned ),
.misaligned_addr_i ( misaligned_addr ),
// Interrupt Signals
.irq_i ( irq_i ), // incoming interrupts
.irq_id_i ( irq_id_i ),
.irq_enable_i ( irq_enable ), // global interrupt enable
.irq_ack_o ( irq_ack_o ),
.exc_cause_o ( exc_cause ),
.save_exc_cause_o ( save_exc_cause ),
.exc_save_if_o ( exc_save_if ), // control signal to save pc
.exc_save_id_o ( exc_save_id ), // control signal to save pc
.exc_save_takenbranch_o ( exc_save_takenbranch_ex ), // control signal to save target taken branch
.exc_restore_id_o ( exc_restore_id ), // control signal to restore pc
.lsu_load_err_i ( lsu_load_err ),
.lsu_store_err_i ( lsu_store_err ),
// Debug Unit Signals
.dbg_settings_i ( dbg_settings ),
.dbg_req_i ( dbg_req ),
.dbg_ack_o ( dbg_ack ),
.dbg_stall_i ( dbg_stall ),
.dbg_trap_o ( dbg_trap ),
.dbg_reg_rreq_i ( dbg_reg_rreq ),
.dbg_reg_raddr_i ( dbg_reg_raddr ),
.dbg_reg_rdata_o ( dbg_reg_rdata ),
.dbg_reg_wreq_i ( dbg_reg_wreq ),
.dbg_reg_waddr_i ( dbg_reg_waddr ),
.dbg_reg_wdata_i ( dbg_reg_wdata ),
.dbg_jump_req_i ( dbg_jump_req ),
// write data to commit in the register file
.regfile_wdata_wb_i ( regfile_wdata_lsu ),
.regfile_wdata_ex_i ( regfile_wdata_ex ),
.csr_rdata_i ( csr_rdata ),
// Performance Counters
.perf_jump_o ( perf_jump ),
.perf_jr_stall_o ( perf_jr_stall ),
.perf_ld_stall_o ( perf_ld_stall )
);
zeroriscy_ex_block ex_block_i
(
// Alu signals from ID stage
//TODO: hot encoding
.alu_operator_i ( alu_operator_ex ), // from ID/EX pipe registers
.alu_operand_a_i ( alu_operand_a_ex ), // from ID/EX pipe registers
.alu_operand_b_i ( alu_operand_b_ex ), // from ID/EX pipe registers
// Multipler
.mult_en_i ( mult_en_ex ), // from ID/EX pipe registers
//.mult_operand_a_i ( mult_operand_a_ex ),
//.mult_operand_b_i ( mult_operand_b_ex ),
.shifter_amt_i ( shifter_amt ),
.alu_adder_result_ex_o ( alu_adder_result_ex ), // from ALU to LSU
.regfile_wdata_ex_o ( regfile_wdata_ex ),
// To IF: Jump and branch target and decision
.jump_target_o ( jump_target_ex ),
.branch_decision_o ( branch_decision )
);
////////////////////////////////////////////////////////////////////////////////////////
// _ ___ _ ____ ____ _____ ___ ____ _____ _ _ _ _ ___ _____ //
// | | / _ \ / \ | _ \ / ___|_ _/ _ \| _ \| ____| | | | | \ | |_ _|_ _| //
// | | | | | |/ _ \ | | | | \___ \ | || | | | |_) | _| | | | | \| || | | | //
// | |__| |_| / ___ \| |_| | ___) || || |_| | _ <| |___ | |_| | |\ || | | | //
// |_____\___/_/ \_\____/ |____/ |_| \___/|_| \_\_____| \___/|_| \_|___| |_| //
// //
////////////////////////////////////////////////////////////////////////////////////////
zeroriscy_load_store_unit load_store_unit_i
(
.clk ( clk ),
.rst_n ( rst_ni ),
//output to data memory
.data_req_o ( data_req_o ),
.data_gnt_i ( data_gnt_i ),
.data_rvalid_i ( data_rvalid_i ),
.data_err_i ( data_err_i ),
.data_addr_o ( data_addr_o ),
.data_we_o ( data_we_o ),
.data_be_o ( data_be_o ),
.data_wdata_o ( data_wdata_o ),
.data_rdata_i ( data_rdata_i ),
// signal from ex stage
.data_we_ex_i ( data_we_ex ),
.data_type_ex_i ( data_type_ex ),
.data_wdata_ex_i ( alu_operand_c_ex ),
.data_reg_offset_ex_i ( data_reg_offset_ex ),
.data_sign_ext_ex_i ( data_sign_ext_ex ), // sign extension
.data_rdata_ex_o ( regfile_wdata_lsu ),
.data_req_ex_i ( data_req_ex ),
.adder_result_ex_i ( alu_adder_result_ex),
.data_misaligned_o ( data_misaligned ),
.misaligned_addr_o ( misaligned_addr ),
// exception signals
.load_err_o ( lsu_load_err ),
.store_err_o ( lsu_store_err ),
// control signals
.data_valid_o ( data_valid_lsu ),
.lsu_ready_ex_o ( lsu_ready_ex ),
.lsu_ready_wb_o ( lsu_ready_wb ),
.busy_o ( lsu_busy )
);
assign wb_valid = lsu_ready_wb;
//////////////////////////////////////
// ____ ____ ____ //
// / ___/ ___|| _ \ ___ //
// | | \___ \| |_) / __| //
// | |___ ___) | _ <\__ \ //
// \____|____/|_| \_\___/ //
// //
// Control and Status Registers //
//////////////////////////////////////
zeroriscy_cs_registers
#(
.N_EXT_CNT ( N_EXT_PERF_COUNTERS )
)
cs_registers_i
(
.clk ( clk ),
.rst_n ( rst_ni ),
// Core and Cluster ID from outside
.core_id_i ( core_id_i ),
.cluster_id_i ( cluster_id_i ),
// Interface to CSRs (SRAM like)
.csr_access_i ( csr_access ),
.csr_addr_i ( csr_addr ),
.csr_wdata_i ( csr_wdata ),
.csr_op_i ( csr_op ),
.csr_rdata_o ( csr_rdata ),
// Interrupt related control signals
.irq_enable_o ( irq_enable ),
.mepc_o ( mepc ),
.pc_if_i ( pc_if ),
.pc_id_i ( pc_id ), // from IF stage
.branch_target_i ( jump_target_ex ), // from ID/EX pipeline
.data_load_event_ex_i ( data_load_event_ex ), // from ID/EX pipeline
.exc_save_if_i ( exc_save_if ),
.exc_save_id_i ( exc_save_id ),
.exc_save_takenbranch_i ( exc_save_takenbranch_ex ),
.exc_restore_i ( exc_restore_id ),
.exc_cause_i ( exc_cause ),
.save_exc_cause_i ( save_exc_cause ),
// performance counter related signals
.id_valid_i ( id_valid ),
.is_compressed_i ( is_compressed_id ),
.is_decoding_i ( is_decoding ),
.imiss_i ( perf_imiss ),
.pc_set_i ( pc_set ),
.jump_i ( perf_jump ),
.branch_i ( branch_in_ex ),
.branch_taken_i ( branch_decision ),
.ld_stall_i ( perf_ld_stall ),
.jr_stall_i ( perf_jr_stall ),
.mem_load_i ( data_req_o & data_gnt_i & (~data_we_o) ),
.mem_store_i ( data_req_o & data_gnt_i & data_we_o ),
.ext_counters_i ( ext_perf_counters_i )
);
// Mux for CSR access through Debug Unit
assign csr_access = (dbg_csr_req == 1'b0) ? csr_access_ex : 1'b1;
assign csr_addr = (dbg_csr_req == 1'b0) ? csr_addr_int : dbg_csr_addr;
assign csr_wdata = (dbg_csr_req == 1'b0) ? alu_operand_a_ex : dbg_csr_wdata;
assign csr_op = (dbg_csr_req == 1'b0) ? csr_op_ex
: (dbg_csr_we == 1'b1 ? CSR_OP_WRITE
: CSR_OP_NONE );
assign csr_addr_int = csr_access_ex ? alu_operand_b_ex[11:0] : '0;
/////////////////////////////////////////////////////////////
// ____ _____ ____ _ _ ____ _ _ _ _ ___ _____ //
// | _ \| ____| __ )| | | |/ ___| | | | | \ | |_ _|_ _| //
// | | | | _| | _ \| | | | | _ | | | | \| || | | | //
// | |_| | |___| |_) | |_| | |_| | | |_| | |\ || | | | //
// |____/|_____|____/ \___/ \____| \___/|_| \_|___| |_| //
// //
/////////////////////////////////////////////////////////////
zeroriscy_debug_unit debug_unit_i
(
.clk ( clk_i ), // always-running clock for debug
.rst_n ( rst_ni ),
// Debug Interface
.debug_req_i ( debug_req_i ),
.debug_gnt_o ( debug_gnt_o ),
.debug_rvalid_o ( debug_rvalid_o ),
.debug_addr_i ( debug_addr_i ),
.debug_we_i ( debug_we_i ),
.debug_wdata_i ( debug_wdata_i ),
.debug_rdata_o ( debug_rdata_o ),
.debug_halt_i ( debug_halt_i ),
.debug_resume_i ( debug_resume_i ),
.debug_halted_o ( debug_halted_o ),
// To/From Core
.settings_o ( dbg_settings ),
.trap_i ( dbg_trap ),
.exc_cause_i ( exc_cause ),
.stall_o ( dbg_stall ),
.dbg_req_o ( dbg_req ),
.dbg_ack_i ( dbg_ack ),
// register file read port
.regfile_rreq_o ( dbg_reg_rreq ),
.regfile_raddr_o ( dbg_reg_raddr ),
.regfile_rdata_i ( dbg_reg_rdata ),
// register file write port
.regfile_wreq_o ( dbg_reg_wreq ),
.regfile_waddr_o ( dbg_reg_waddr ),
.regfile_wdata_o ( dbg_reg_wdata ),
// CSR read/write port
.csr_req_o ( dbg_csr_req ),
.csr_addr_o ( dbg_csr_addr ),
.csr_we_o ( dbg_csr_we ),
.csr_wdata_o ( dbg_csr_wdata ),
.csr_rdata_i ( csr_rdata ),
// signals for PPC and NPC
.pc_if_i ( pc_if ), // from IF stage
.pc_id_i ( pc_id ), // from IF stage
.data_load_event_i ( data_load_event_ex ),
.instr_valid_id_i ( instr_valid_id ),
.sleeping_i ( sleeping ),
.branch_in_ex_i ( branch_in_ex ),
.branch_taken_i ( branch_decision ),
.jump_addr_o ( dbg_jump_addr ), // PC from debug unit
.jump_req_o ( dbg_jump_req ) // set PC to new value
);
`ifdef TRACE_EXECUTION
zeroriscy_tracer zeroriscy_tracer_i
(
.clk ( clk_i ), // always-running clock for tracing
.rst_n ( rst_ni ),
.fetch_enable ( fetch_enable_i ),
.core_id ( core_id_i ),
.cluster_id ( cluster_id_i ),
.pc ( id_stage_i.pc_id_i ),
.instr ( id_stage_i.instr ),
.compressed ( id_stage_i.is_compressed_i ),
.id_valid ( id_stage_i.id_valid_o ),
.is_decoding ( id_stage_i.is_decoding_o ),
.pipe_flush ( id_stage_i.controller_i.pipe_flush_i ),
.rs1_value ( id_stage_i.operand_a_fw_id ),
.rs2_value ( id_stage_i.operand_b_fw_id ),
.rs3_value ( id_stage_i.alu_operand_c ),
.rs2_value_vec ( id_stage_i.alu_operand_b ),
.ex_valid ( ),
.ex_reg_addr ( id_stage_i.regfile_waddr_mux ),
.ex_reg_we ( id_stage_i.regfile_we_mux ),
.ex_reg_wdata ( id_stage_i.regfile_wdata_mux ),
.data_valid_lsu ( data_valid_lsu ),
.ex_data_addr ( data_addr_o ),
.ex_data_req ( data_req_o ),
.ex_data_gnt ( data_gnt_i ),
.ex_data_we ( data_we_o ),
// use id_stage_i.regfile_wdata_mux
.ex_data_wdata ( data_wdata_o ),
.wb_bypass ( branch_in_ex_o ),
.wb_valid ( ),
.wb_reg_addr ( ),
.wb_reg_we ( ),
.wb_reg_wdata ( regfile_wdata_lsu ),
.imm_u_type ( id_stage_i.imm_u_type ),
.imm_uj_type ( id_stage_i.imm_uj_type ),
.imm_i_type ( id_stage_i.imm_i_type ),
.imm_iz_type ( id_stage_i.imm_iz_type[11:0] ),
.imm_z_type ( id_stage_i.imm_z_type ),
.imm_s_type ( id_stage_i.imm_s_type ),
.imm_sb_type ( id_stage_i.imm_sb_type ),
.imm_s2_type ( id_stage_i.imm_s2_type ),
.imm_s3_type ( id_stage_i.imm_s3_type ),
.imm_vs_type ( id_stage_i.imm_vs_type ),
.imm_vu_type ( id_stage_i.imm_vu_type ),
.imm_clip_type ( id_stage_i.instr_rdata_i[11:7] )
);
`endif
`ifdef SIMCHECKER
logic is_interrupt;
assign is_interrupt = (pc_mux_id == PC_EXCEPTION) && (exc_pc_mux_id == EXC_PC_IRQ);
zeroriscy_simchecker zeroriscy_simchecker_i
(
.clk ( clk_i ), // always-running clock for tracing
.rst_n ( rst_ni ),
.fetch_enable ( fetch_enable_i ),
.boot_addr ( boot_addr_i ),
.core_id ( core_id_i ),
.cluster_id ( cluster_id_i ),
.instr_compressed ( if_stage_i.fetch_rdata[15:0] ),
.pc_set ( pc_set ),
.if_valid ( if_valid ),
.pc ( id_stage_i.pc_id_i ),
.instr ( id_stage_i.instr ),
.is_compressed ( is_compressed_id ),
.id_valid ( id_stage_i.id_valid_o ),
.is_decoding ( id_stage_i.is_decoding_o ),
.is_illegal ( id_stage_i.illegal_insn_dec ),
.is_interrupt ( is_interrupt ),
.irq_no ( irq_id_i ),
.pipe_flush ( id_stage_i.controller_i.pipe_flush_i ),
.ex_valid ( ),
.ex_reg_addr ( id_stage_i.registers_i.waddr_b_i ),
.ex_reg_we ( id_stage_i.registers_i.we_a_i ),
.ex_reg_wdata ( id_stage_i.registers_i.wdata_b_i ),
.ex_data_addr ( data_addr_o ),
.ex_data_req ( data_req_o ),
.ex_data_gnt ( data_gnt_i ),
.ex_data_we ( data_we_o ),
.ex_data_wdata ( data_wdata_o ),
.wb_bypass ( ex_block_i.branch_in_ex_i ),
.lsu_misaligned ( data_misaligned ),
.wb_valid ( wb_valid ),
.wb_reg_addr ( id_stage_i.registers_i.waddr_a_i ),
.wb_reg_we ( id_stage_i.registers_i.we_a_i ),
.wb_reg_wdata ( id_stage_i.registers_i.wdata_a_i ),
.wb_data_rvalid ( data_rvalid_i ),
.wb_data_rdata ( data_rdata_i )
);
`endif
endmodule

422
mult_h.sv Normal file
View file

@ -0,0 +1,422 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
`define OP_LL 7:0
`define OP_LH 15:8
`define OP_HL 23:16
`define OP_HH 31:24
module zeroriscy_mult_h
#(
parameter ADD_TYPE = 0, //0 shared
parameter ADD_CYCL = 1 //if ADD_CYCL is 1, ADD_TYPE must be 0
)
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
input logic operator_i,
input logic [1:0] signed_mode_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] alu_adder_i,
output logic [31:0] alu_operand_a_o,
output logic [31:0] alu_operand_b_o,
output logic [31:0] mult_result_o,
output logic carry_out_mul_o,
output logic ready_o
);
enum logic [3:0] { STEP0, STEP1, STEP2, STEP3, STEP4, STEP5, STEP6, STEP7, STEP8, STEP9, STEP10, STEP11, STEP12, STEP13, STEP14, STEP15 } mult_state_q, mult_state_n;
enum logic [2:0] { MULT_00_SHIFT, MULT_08_SHIFT, MULT_16_SHIFT, MULT_24_SHIFT, MULT_32_SHIFT, MULT_40_SHIFT, MULT_48_SHIFT } shift_mul;
logic [31:0] accum_high_q, accum_low_q;
logic [31:0] accum_high, accum_low;
logic [31:0] res_mul_low, res_mul_high;
logic [ 7:0] mult_op_a;
logic [ 7:0] mult_op_b;
logic [63:0] mult_extended;
logic [63:0] mult_shifted;
logic [32:0] res_adder_low_ext;
logic [31:0] res_adder_low;
logic [31:0] res_adder_high;
logic sign_a,sign_b;
logic do_mul_n, do_mul_q;
logic carry_out_shortadder;
assign mult_extended = $signed({sign_a,mult_op_a})*$signed({sign_b,mult_op_b});
assign res_mul_low = mult_shifted[31:0 ];
assign res_mul_high = mult_shifted[63:32];
assign res_adder_low_ext = res_mul_low + accum_low;
assign res_adder_low = res_adder_low_ext[31:0];
assign res_adder_high = alu_adder_i;
assign carry_out_shortadder = res_adder_low_ext[32];
assign mult_result_o = operator_i == MUL_H ? res_adder_high : res_adder_low;
assign alu_operand_a_o = res_mul_high;
assign alu_operand_b_o = accum_high;
assign carry_out_mul_o = carry_out_shortadder;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= STEP0;
accum_high_q <= '0;
accum_low_q <= '0;
end else begin
if(mult_en_i) begin
mult_state_q <= mult_state_n;
accum_low_q <= res_adder_low;
if(operator_i == MUL_H)
accum_high_q <= res_adder_high;
end
end
end
always_comb
begin : mult_fsm
ready_o = 1'b0;
accum_low = accum_low_q;
accum_high = accum_high_q;
unique case (mult_state_q)
STEP0: begin
unique case(operator_i)
MUL_L: begin
//all*bll
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
accum_low = '0;
shift_mul = MULT_00_SHIFT;
end
MUL_H: begin
//ahl*bll<<16
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
accum_low = '0;
accum_high = '0;
shift_mul = MULT_16_SHIFT;
end
endcase
mult_state_n = STEP1;
end
STEP1: begin
unique case(operator_i)
MUL_L: begin
//all*blh<<8
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
end
MUL_H: begin
//ahh*bll<<24
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_LL];
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
endcase
mult_state_n = STEP2;
end
STEP2: begin
unique case(operator_i)
MUL_L: begin
//all*bhl<<16
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
end
MUL_H: begin
//ahl*blh<<24
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
endcase
mult_state_n = STEP3;
end
STEP3: begin
unique case(operator_i)
MUL_L: begin
//all*bhh<<24
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_HH];
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_24_SHIFT;
end
MUL_H: begin
//ahh*blh<<32
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_LH];
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_32_SHIFT;
end
endcase
mult_state_n = STEP4;
end
STEP4: begin
unique case(operator_i)
MUL_L: begin
//alh*bll<<8
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
end
MUL_H: begin
//all*bhl<<16
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
end
endcase
mult_state_n = STEP5;
end
STEP5: begin
unique case(operator_i)
MUL_L: begin
//alh*blh<<16
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
end
MUL_H: begin
//alh*bhl<<24
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
endcase
mult_state_n = STEP6;
end
STEP6: begin
unique case(operator_i)
MUL_L: begin
//alh*bhl<<24
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
MUL_H: begin
//all*bhh<<24
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_HH];
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_24_SHIFT;
end
endcase
mult_state_n = STEP7;
end
STEP7: begin
unique case(operator_i)
MUL_L: begin
//ahl*bll<<16
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
end
MUL_H: begin
//alh*bhh<<32
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_HH];
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_32_SHIFT;
end
endcase
mult_state_n = STEP8;
end
STEP8: begin
unique case(operator_i)
MUL_L: begin
//ahl*blh<<24
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
MUL_H: begin
//ahl*bhl<<32
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_32_SHIFT;
end
endcase
mult_state_n = STEP9;
end
STEP9: begin
unique case(operator_i)
MUL_L: begin
//ahh*bll<<24
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_LL];
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
mult_state_n = STEP0;
ready_o = 1'b1;
end
MUL_H: begin
//ahl*bhh<<40
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_HH];
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_40_SHIFT;
mult_state_n = STEP10;
end
endcase
end
STEP10: begin
//only MUL_H here
//ahh*bhl<<40
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_HL];
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_40_SHIFT;
mult_state_n = STEP11;
end
STEP11: begin
//only MUL_H here
//ahh*bhh<<48
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_HH];
sign_a = op_a_i[31];
sign_b = op_b_i[31];
shift_mul = MULT_48_SHIFT;
mult_state_n = STEP12;
end
STEP12: begin
//only MUL_H here
//all*bll
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = STEP13;
end
STEP13: begin
//only MUL_H here
//all*blh<<8
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
mult_state_n = STEP14;
end
STEP14: begin
//only MUL_H here
//alh*bll<<8
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
mult_state_n = STEP15;
end
STEP15: begin
//only MUL_H here
//alh*blh<<16
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = STEP0;
ready_o = 1'b1;
end
default: begin
//all*bll
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[ 7:0 ];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = STEP1;
end
endcase // mult_state_q
end
always_comb
begin
unique case (shift_mul)
MULT_00_SHIFT:
mult_shifted = mult_extended;
MULT_08_SHIFT:
mult_shifted = mult_extended << 8;
MULT_16_SHIFT:
mult_shifted = mult_extended << 16;
MULT_24_SHIFT:
mult_shifted = mult_extended << 24;
MULT_32_SHIFT:
mult_shifted = mult_extended << 32;
MULT_40_SHIFT:
mult_shifted = mult_extended << 40;
MULT_48_SHIFT:
mult_shifted = mult_extended << 48;
default:
mult_shifted = mult_extended;
endcase
end
endmodule // zeroriscy_mult

433
mult_hq.sv Normal file
View file

@ -0,0 +1,433 @@
// Copyright 2015 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Engineer: Matthias Baer - baermatt@student.ethz.ch //
// //
// Additional contributions by: //
// Andreas Traber - atraber@student.ethz.ch //
// //
// Design Name: Subword multiplier and MAC //
// Project Name: RI5CY //
// Language: SystemVerilog //
// //
// Description: Advanced MAC unit for PULP. //
// //
////////////////////////////////////////////////////////////////////////////////
import zeroriscy_defines::*;
`define OP_LL 7:0
`define OP_LH 15:8
`define OP_HL 23:16
`define OP_HH 31:24
module zeroriscy_mult_hq
#(
parameter ADD_TYPE = 0 //0 shared
)
(
input logic clk,
input logic rst_n,
input logic mult_en_i,
input logic operator_i,
input logic [1:0] signed_mode_i,
input logic [31:0] op_a_i,
input logic [31:0] op_b_i,
input logic [31:0] alu_adder_i,
output logic [31:0] alu_operand_a_o,
output logic [31:0] alu_operand_b_o,
output logic [31:0] mult_result_o,
output logic carry_out_mul_o,
output logic ready_o
);
enum logic [3:0] { STEP0, STEP1, STEP2, STEP3, STEP4, STEP5, STEP6, STEP7, STEP8, STEP9, STEP10, STEP11, STEP12, STEP13, STEP14, STEP15 } mult_state_q, mult_state_n;
enum logic [2:0] { MULT_00_SHIFT, MULT_08_SHIFT, MULT_16_SHIFT, MULT_24_SHIFT, MULT_32_SHIFT, MULT_40_SHIFT, MULT_48_SHIFT } shift_mul;
logic [31:0] accum_high_q, accum_low_q;
logic [31:0] accum_high, accum_low;
logic [31:0] res_mul_low, res_mul_high;
logic [31:0] res_mul_low_q, res_mul_high_q;
logic [ 7:0] mult_op_a;
logic [ 7:0] mult_op_b;
logic [63:0] mult_extended;
logic [63:0] mult_shifted;
logic [32:0] res_adder_low_ext;
logic [31:0] res_adder_low;
logic [31:0] res_adder_high;
logic sign_a,sign_b;
logic do_mul_n, do_mul_q;
logic carry_out_shortadder;
assign mult_extended = $signed({sign_a,mult_op_a})*$signed({sign_b,mult_op_b});
assign res_mul_low = mult_shifted[31:0 ];
assign res_mul_high = mult_shifted[63:32];
assign res_adder_low_ext = res_mul_low_q + accum_low;
assign res_adder_low = res_adder_low_ext[31:0];
assign res_adder_high = alu_adder_i;
assign carry_out_shortadder = res_adder_low_ext[32];
assign mult_result_o = operator_i == MUL_H ? res_adder_high : res_adder_low;
assign alu_operand_a_o = res_mul_high_q;
assign alu_operand_b_o = accum_high;
assign carry_out_mul_o = carry_out_shortadder;
always_ff @(posedge clk or negedge rst_n) begin : proc_mult_state_q
if(~rst_n) begin
mult_state_q <= STEP0;
accum_high_q <= '0;
accum_low_q <= '0;
res_mul_high_q <= '0;
res_mul_low_q <= '0;
do_mul_q <= 1'b1;
end else begin
if(mult_en_i) begin
do_mul_q <= do_mul_n;
mult_state_q <= do_mul_q ? mult_state_q : mult_state_n;
if(~do_mul_q)
accum_low_q <= res_adder_low;
if(do_mul_q) begin
res_mul_high_q <= res_mul_high;
res_mul_low_q <= res_mul_low;
end
if(operator_i == MUL_H)
if(~do_mul_q)
accum_high_q <= res_adder_high;
end
end
end
always_comb
begin : mult_fsm
ready_o = 1'b0;
accum_low = accum_low_q;
accum_high = accum_high_q;
do_mul_n = ~do_mul_q;
unique case (mult_state_q)
STEP0: begin
unique case(operator_i)
MUL_L: begin
//all*bll
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
accum_low = '0;
shift_mul = MULT_00_SHIFT;
end
MUL_H: begin
//ahl*bll<<16
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
accum_low = '0;
accum_high = '0;
shift_mul = MULT_16_SHIFT;
end
endcase
mult_state_n = STEP1;
end
STEP1: begin
unique case(operator_i)
MUL_L: begin
//all*blh<<8
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
end
MUL_H: begin
//ahh*bll<<24
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_LL];
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
endcase
mult_state_n = STEP2;
end
STEP2: begin
unique case(operator_i)
MUL_L: begin
//all*bhl<<16
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
end
MUL_H: begin
//ahl*blh<<24
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
endcase
mult_state_n = STEP3;
end
STEP3: begin
unique case(operator_i)
MUL_L: begin
//all*bhh<<24
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_HH];
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_24_SHIFT;
end
MUL_H: begin
//ahh*blh<<32
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_LH];
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_32_SHIFT;
end
endcase
mult_state_n = STEP4;
end
STEP4: begin
unique case(operator_i)
MUL_L: begin
//alh*bll<<8
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
end
MUL_H: begin
//all*bhl<<16
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
end
endcase
mult_state_n = STEP5;
end
STEP5: begin
unique case(operator_i)
MUL_L: begin
//alh*blh<<16
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
end
MUL_H: begin
//alh*bhl<<24
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
endcase
mult_state_n = STEP6;
end
STEP6: begin
unique case(operator_i)
MUL_L: begin
//alh*bhl<<24
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
MUL_H: begin
//all*bhh<<24
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_HH];
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_24_SHIFT;
end
endcase
mult_state_n = STEP7;
end
STEP7: begin
unique case(operator_i)
MUL_L: begin
//ahl*bll<<16
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
end
MUL_H: begin
//alh*bhh<<32
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_HH];
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_32_SHIFT;
end
endcase
mult_state_n = STEP8;
end
STEP8: begin
unique case(operator_i)
MUL_L: begin
//ahl*blh<<24
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
end
MUL_H: begin
//ahl*bhl<<32
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_HL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_32_SHIFT;
end
endcase
mult_state_n = STEP9;
end
STEP9: begin
unique case(operator_i)
MUL_L: begin
//ahh*bll<<24
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_LL];
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_24_SHIFT;
mult_state_n = STEP0;
ready_o = ~do_mul_q;
end
MUL_H: begin
//ahl*bhh<<40
mult_op_a = op_a_i[`OP_HL];
mult_op_b = op_b_i[`OP_HH];
sign_a = 1'b0;
sign_b = op_b_i[31];
shift_mul = MULT_40_SHIFT;
mult_state_n = STEP10;
end
endcase
end
STEP10: begin
//only MUL_H here
//ahh*bhl<<40
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_HL];
sign_a = op_a_i[31];
sign_b = 1'b0;
shift_mul = MULT_40_SHIFT;
mult_state_n = STEP11;
end
STEP11: begin
//only MUL_H here
//ahh*bhh<<48
mult_op_a = op_a_i[`OP_HH];
mult_op_b = op_b_i[`OP_HH];
sign_a = op_a_i[31];
sign_b = op_b_i[31];
shift_mul = MULT_48_SHIFT;
mult_state_n = STEP12;
end
STEP12: begin
//only MUL_H here
//all*bll
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = STEP13;
end
STEP13: begin
//only MUL_H here
//all*blh<<8
mult_op_a = op_a_i[`OP_LL];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
mult_state_n = STEP14;
end
STEP14: begin
//only MUL_H here
//alh*bll<<8
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_LL];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_08_SHIFT;
mult_state_n = STEP15;
end
STEP15: begin
//only MUL_H here
//alh*blh<<16
mult_op_a = op_a_i[`OP_LH];
mult_op_b = op_b_i[`OP_LH];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_16_SHIFT;
mult_state_n = STEP0;
ready_o = ~do_mul_q;
end
default: begin
//all*bll
mult_op_a = op_a_i[ 7:0 ];
mult_op_b = op_b_i[ 7:0 ];
sign_a = 1'b0;
sign_b = 1'b0;
shift_mul = MULT_00_SHIFT;
mult_state_n = STEP1;
end
endcase // mult_state_q
end
always_comb
begin
unique case (shift_mul)
MULT_00_SHIFT:
mult_shifted = mult_extended;
MULT_08_SHIFT:
mult_shifted = mult_extended << 8;
MULT_16_SHIFT:
mult_shifted = mult_extended << 16;
MULT_24_SHIFT:
mult_shifted = mult_extended << 24;
MULT_32_SHIFT:
mult_shifted = mult_extended << 32;
MULT_40_SHIFT:
mult_shifted = mult_extended << 40;
MULT_48_SHIFT:
mult_shifted = mult_extended << 48;
default:
mult_shifted = mult_extended;
endcase
end
endmodule // zeroriscy_mult