mirror of
https://github.com/openhwgroup/cva5.git
synced 2025-04-19 11:44:51 -04:00
commit
f0b92a923a
122 changed files with 11241 additions and 4515 deletions
|
@ -26,11 +26,11 @@ build-toolchain:
|
|||
- mv -f taiga taiga-project/
|
||||
- cd taiga-project
|
||||
- source settings.sh
|
||||
- git clone https://github.com/gcc-mirror/gcc.git --branch releases/gcc-10 --single-branch tool-chain/gcc
|
||||
- git clone https://gitlab.com/sfu-rcl/taiga-picolibc.git --branch master --single-branch tool-chain/picolibc
|
||||
- git clone https://github.com/gcc-mirror/gcc.git --branch releases/gcc-11 --single-branch tool-chain/gcc
|
||||
- git clone https://gitlab.com/sfu-rcl/taiga-picolibc.git --branch main --single-branch tool-chain/picolibc
|
||||
- git clone http://git.veripool.org/git/verilator --branch master --single-branch tool-chain/verilator
|
||||
- git clone https://sourceware.org/git/binutils-gdb.git --branch binutils-2_35-branch --single-branch tool-chain/binutils-gdb
|
||||
- git clone https://sourceware.org/git/newlib-cygwin.git --branch master --single-branch tool-chain/newlib-cygwin
|
||||
- git clone https://github.com/bminor/binutils-gdb.git --branch binutils-2_36-branch --single-branch tool-chain/binutils-gdb
|
||||
- git clone https://github.com/mirror/newlib-cygwin.git --branch master --single-branch tool-chain/newlib-cygwin
|
||||
- git clone https://gitlab.com/sfu-rcl/taiga-embench.git --branch taiga-picolibc --single-branch benchmarks/embench
|
||||
- git clone https://gitlab.com/sfu-rcl/taiga-riscv-compliance.git --branch taiga-sim --single-branch benchmarks/riscv-compliance
|
||||
- git clone https://gitlab.com/sfu-rcl/taiga-dhrystone.git --branch master --single-branch benchmarks/taiga-dhrystone
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# CVA5
|
||||
|
||||
CVA5 is a 32-bit RISC-V processor designed for FPGAs supporting the Multiply/Divide and Atomic extensions (RV32IMA). The processor is written in SystemVerilog and has been designed to be both highly extensible and highly configurable.
|
||||
CVA5 is a 32-bit RISC-V processor designed for FPGAs supporting the Multiply/Divide and Double-precision Floating-Point extensions (RV32IMD). The processor is written in SystemVerilog and has been designed to be both highly extensible and highly configurable.
|
||||
|
||||
|
||||
The CVA5 is derived from the Taiga Project from Simon Fraser University.
|
||||
|
|
|
@ -1,93 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module alu_unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
unit_issue_interface.unit issue,
|
||||
input alu_inputs_t alu_inputs,
|
||||
unit_writeback_interface.unit wb
|
||||
);
|
||||
|
||||
logic[XLEN:0] add_sub_result;
|
||||
logic add_sub_carry_in;
|
||||
logic[XLEN:0] adder_in1;
|
||||
logic[XLEN:0] adder_in2;
|
||||
logic[XLEN-1:0] shift_result;
|
||||
logic[XLEN-1:0] result;
|
||||
//implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
//Logic ops put through the adder carry chain to reduce resources
|
||||
always_comb begin
|
||||
case (alu_inputs.logic_op)
|
||||
ALU_LOGIC_XOR : adder_in1 = alu_inputs.in1 ^ alu_inputs.in2;
|
||||
ALU_LOGIC_OR : adder_in1 = alu_inputs.in1 | alu_inputs.in2;
|
||||
ALU_LOGIC_AND : adder_in1 = alu_inputs.in1 & alu_inputs.in2;
|
||||
default : adder_in1 = alu_inputs.in1; //ADD/SUB/SLT/SLTU
|
||||
endcase
|
||||
case (alu_inputs.logic_op)
|
||||
ALU_LOGIC_XOR,
|
||||
ALU_LOGIC_OR,
|
||||
ALU_LOGIC_AND : adder_in2 = 0;
|
||||
default : adder_in2 = alu_inputs.in2 ^ {33{alu_inputs.subtract}};
|
||||
endcase
|
||||
end
|
||||
|
||||
//Add/Sub ops
|
||||
assign {add_sub_result, add_sub_carry_in} = {adder_in1, 1'b1} + {adder_in2, alu_inputs.subtract};
|
||||
|
||||
//Shift ops
|
||||
barrel_shifter shifter (
|
||||
.shifter_input(alu_inputs.shifter_in),
|
||||
.shift_amount(alu_inputs.shift_amount),
|
||||
.arith(alu_inputs.arith),
|
||||
.lshift(alu_inputs.lshift),
|
||||
.shifted_result(shift_result)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
case (alu_inputs.alu_op)
|
||||
ALU_CONSTANT : result = alu_inputs.constant_adder;
|
||||
ALU_ADD_SUB : result = add_sub_result[31:0];
|
||||
ALU_SLT : result = {31'b0, add_sub_result[XLEN]};
|
||||
default : result = shift_result; //ALU_SHIFT
|
||||
endcase
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
assign issue.ready = 1;
|
||||
assign wb.rd = result;
|
||||
assign wb.done = issue.possible_issue;
|
||||
assign wb.id = issue.id;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
endmodule
|
|
@ -1,115 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module binary_occupancy
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
|
||||
#(parameter DEPTH = 4)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input logic push,
|
||||
input logic pop,
|
||||
output logic almost_full,
|
||||
output logic full,
|
||||
output logic empty,
|
||||
output logic almost_empty,
|
||||
output logic valid
|
||||
);
|
||||
|
||||
logic[$clog2(DEPTH)-1:0] count;
|
||||
|
||||
//Occupancy Tracking
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
count <= 0;
|
||||
else begin
|
||||
case ({push, pop})
|
||||
2'b10: count <= count + 1;
|
||||
2'b01: count <= count - 1;
|
||||
default : count <= count;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
valid <= 0;
|
||||
else begin
|
||||
case ({push, pop})
|
||||
2'b10: valid <= 1;
|
||||
2'b01: valid <= !(count == 1);
|
||||
default : valid <= valid;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// always_ff @ (posedge clk) begin
|
||||
// if (rst)
|
||||
// full <= 0;
|
||||
// else begin
|
||||
// case ({push, pop})
|
||||
// 2'b10: full <= (count == DEPTH-2);
|
||||
// 2'b01: full <= 0;
|
||||
// default : full <= full;
|
||||
// endcase
|
||||
// end
|
||||
// end
|
||||
|
||||
// always_ff @ (posedge clk) begin
|
||||
// if (rst)
|
||||
// almost_full <= 0;
|
||||
// else begin
|
||||
// case ({push, pop})
|
||||
// 2'b10: almost_full <= (count == DEPTH-3);
|
||||
// 2'b01: almost_full <= (count == DEPTH-1);
|
||||
// default : almost_full <= almost_full;
|
||||
// endcase
|
||||
// end
|
||||
// end
|
||||
|
||||
// always_ff @ (posedge clk) begin
|
||||
// if (rst)
|
||||
// almost_empty <= 0;
|
||||
// else begin
|
||||
// case ({push, pop})
|
||||
// 2'b10: almost_empty <=(count == 0);
|
||||
// 2'b01: almost_empty <= (count == 2);
|
||||
// default : almost_empty <= almost_empty;
|
||||
// endcase
|
||||
// end
|
||||
// end
|
||||
|
||||
assign empty = ~valid;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
always_ff @ (posedge clk) begin
|
||||
assert (!(~rst & full & push)) else $error("overflow");
|
||||
assert (!(~rst & empty & pop)) else $error("underflow");
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2019 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module branch_predictor_ram
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
|
||||
#(
|
||||
parameter C_DATA_WIDTH = 20,
|
||||
parameter C_DEPTH = 512
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input logic [$clog2(C_DEPTH)-1:0] write_addr,
|
||||
input logic write_en,
|
||||
input logic [$clog2(C_DEPTH)-1:0] read_addr,
|
||||
input logic read_en,
|
||||
input logic [C_DATA_WIDTH-1:0] write_data,
|
||||
output logic [C_DATA_WIDTH-1:0] read_data
|
||||
);
|
||||
(* ram_style = "block" *)logic [C_DATA_WIDTH-1:0] branch_ram [C_DEPTH-1:0];
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
initial branch_ram = '{default: 0};
|
||||
always_ff @(posedge clk) begin
|
||||
if (write_en)
|
||||
branch_ram[write_addr] <= write_data;
|
||||
end
|
||||
always_ff @(posedge clk) begin
|
||||
if (read_en)
|
||||
read_data <= branch_ram[read_addr];
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Trace Interface
|
||||
|
||||
endmodule
|
|
@ -1,174 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module branch_unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
input branch_inputs_t branch_inputs,
|
||||
output branch_results_t br_results,
|
||||
output logic branch_flush,
|
||||
|
||||
exception_interface.unit exception,
|
||||
|
||||
//Trace signals
|
||||
output logic tr_branch_correct,
|
||||
output logic tr_branch_misspredict,
|
||||
output logic tr_return_correct,
|
||||
output logic tr_return_misspredict
|
||||
);
|
||||
|
||||
logic branch_issued_r;
|
||||
logic result;
|
||||
|
||||
//Branch Predictor
|
||||
logic branch_taken;
|
||||
logic branch_taken_ex;
|
||||
|
||||
id_t id_ex;
|
||||
logic [31:0] jump_pc;
|
||||
logic [31:0] new_pc;
|
||||
logic [31:0] new_pc_ex;
|
||||
|
||||
logic [31:0] pc_ex;
|
||||
logic instruction_is_completing;
|
||||
|
||||
logic branch_complete;
|
||||
logic jal_jalr_ex;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Only stall condition is if the following instruction is not valid for pc comparisons.
|
||||
//If the next instruction isn't valid, no instruction can be issued anyways, so it
|
||||
//is safe to hardcode this to one.
|
||||
assign issue.ready = 1;
|
||||
|
||||
//Branch new request is held if the following instruction hasn't arrived at decode/issue yet
|
||||
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) branch_issued_m (
|
||||
.clk, .rst,
|
||||
.set(issue.new_request),
|
||||
.clr(branch_inputs.issue_pc_valid | exception.valid),
|
||||
.result(branch_issued_r)
|
||||
);
|
||||
|
||||
//To determine if the branch was predicted correctly we need to wait until the
|
||||
//subsequent instruction has reached the issue stage
|
||||
assign instruction_is_completing = branch_issued_r & branch_inputs.issue_pc_valid;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Branch/Jump target determination
|
||||
//Branch comparison and final address calculation
|
||||
//are performed in the issue stage
|
||||
branch_comparator bc (
|
||||
.less_than(branch_inputs.fn3[2]),
|
||||
.a(branch_inputs.rs1),
|
||||
.b(branch_inputs.rs2),
|
||||
.xor_result(branch_inputs.fn3[0]),
|
||||
.result(result)
|
||||
);
|
||||
assign branch_taken = result | branch_inputs.jal_jalr;
|
||||
|
||||
assign jump_pc = (branch_inputs.jalr ? branch_inputs.rs1[31:0] : branch_inputs.issue_pc) + 32'(signed'(branch_inputs.pc_offset));
|
||||
assign new_pc = branch_taken ? jump_pc : branch_inputs.pc_p4;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request) begin
|
||||
branch_taken_ex <= branch_taken;
|
||||
new_pc_ex <= {new_pc[31:1], new_pc[0] & ~branch_inputs.jalr};
|
||||
id_ex <= issue.id;
|
||||
jal_jalr_ex <= branch_inputs.jal_jalr;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Exception support
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_branch_exception
|
||||
logic new_exception;
|
||||
|
||||
assign new_exception = new_pc[1] & branch_taken & issue.new_request;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
exception.valid <= 0;
|
||||
else
|
||||
exception.valid <= (exception.valid & ~exception.ack) | new_exception;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
exception.id <= issue.id;
|
||||
end
|
||||
assign exception.code = INST_ADDR_MISSALIGNED;
|
||||
assign exception.tval = new_pc_ex;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Predictor support
|
||||
logic is_return;
|
||||
logic is_call;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.possible_issue) begin
|
||||
is_return <= branch_inputs.is_return;
|
||||
is_call <= branch_inputs.is_call;
|
||||
pc_ex <= branch_inputs.issue_pc;
|
||||
end
|
||||
end
|
||||
|
||||
assign br_results.id = id_ex;
|
||||
assign br_results.valid = instruction_is_completing;
|
||||
assign br_results.pc = pc_ex;
|
||||
assign br_results.target_pc = new_pc_ex;
|
||||
assign br_results.branch_taken = branch_taken_ex;
|
||||
assign br_results.is_branch = ~jal_jalr_ex;
|
||||
assign br_results.is_return = is_return;
|
||||
assign br_results.is_call = is_call;
|
||||
|
||||
assign branch_flush = instruction_is_completing && (branch_inputs.issue_pc[31:1] != new_pc_ex[31:1]);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Trace Interface
|
||||
generate if (ENABLE_TRACE_INTERFACE) begin
|
||||
assign tr_branch_correct = instruction_is_completing & ~is_return & ~branch_flush;
|
||||
assign tr_branch_misspredict = instruction_is_completing & ~is_return & branch_flush;
|
||||
assign tr_return_correct = instruction_is_completing & is_return & ~branch_flush;
|
||||
assign tr_return_misspredict = instruction_is_completing & is_return & branch_flush;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
64
core/clz.sv
64
core/clz.sv
|
@ -1,64 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2018 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module clz
|
||||
(
|
||||
input logic [31:0] clz_input,
|
||||
output logic [4:0] clz
|
||||
);
|
||||
|
||||
logic [1:0] low_order_clz [7:0];
|
||||
logic [7:0] sub_clz;
|
||||
|
||||
logic [1:0] upper_lower [3:0];
|
||||
//////////////////////////////////////////
|
||||
/* CLZ in groups of 4-bits (optimized for 6-input LUTs)
|
||||
* Upper 3 bits of CLZ calculated directly from the subgroups
|
||||
* Lower order bits [1:0] determined for each subgroup
|
||||
* Lower order bits muxed with neighbor before final 4-1 mux using highest order bits [4:3]
|
||||
*/
|
||||
//////////////////////////////////////////
|
||||
|
||||
//31-28 index: 0, 3-0 index: 7
|
||||
const logic [1:0] clz_low_table [8] = '{2'd3, 2'd2, 2'd1, 2'd1, 2'd0, 2'd0, 2'd0, 2'd0};
|
||||
always_comb begin
|
||||
for (int i=0; i<8; i++) begin
|
||||
sub_clz[7-i] = ~|clz_input[(i*4) +: 4];
|
||||
low_order_clz[7-i] = clz_low_table[clz_input[(i*4) + 1 +: 3]];
|
||||
end
|
||||
|
||||
clz[4] = &sub_clz[3:0]; //upper 16 all zero
|
||||
clz[3] = clz[4] ? &sub_clz[5:4] : &sub_clz[1:0];//upper 24 zero, or first 8 zero
|
||||
clz[2] =
|
||||
(sub_clz[0] & ~sub_clz[1]) |
|
||||
(&sub_clz[2:0] & ~sub_clz[3]) |
|
||||
(&sub_clz[4:0] & ~sub_clz[5]) |
|
||||
(&sub_clz[6:0]);
|
||||
|
||||
for (int i=0; i<8; i+=2) begin
|
||||
upper_lower[i/2] = low_order_clz[{i[2:1], sub_clz[i]}];
|
||||
end
|
||||
|
||||
clz[1:0] = upper_lower[clz[4:3]];
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
|
||||
|
||||
module byte_en_BRAM
|
||||
module byte_en_bram
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
78
core/common_components/clz.sv
Normal file
78
core/common_components/clz.sv
Normal file
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright © 2023 Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module clz
|
||||
|
||||
#(
|
||||
parameter WIDTH = 32
|
||||
)
|
||||
(
|
||||
input logic[WIDTH-1:0] clz_input,
|
||||
output logic[$clog2(WIDTH)-1:0] clz,
|
||||
output logic zero
|
||||
);
|
||||
|
||||
//Based on "Design of Leading Zero Counters on FPGAs" by Perri et al. 2022 (which is optimized for 6-input LUTs)
|
||||
|
||||
//It is possible to unroll this and implement it without recursion
|
||||
//However, this significantly hurts readability especially with regards to the clz signal
|
||||
|
||||
localparam TREE_WIDTH = 2**$clog2(WIDTH);
|
||||
localparam TREE_CLZ_WIDTH = $clog2(WIDTH)-1;
|
||||
localparam HALF_TREE_WIDTH = TREE_WIDTH/2;
|
||||
localparam WIDTH_DIFFERENCE = TREE_WIDTH - WIDTH;
|
||||
|
||||
generate if (WIDTH == 2) begin : gen_base_case
|
||||
//Base case
|
||||
assign zero = ~(clz_input[1] | clz_input[0]);
|
||||
assign clz[0] = ~clz_input[1] & clz_input[0];
|
||||
end
|
||||
else begin : gen_recursive
|
||||
logic[TREE_WIDTH-1:0] padded_input;
|
||||
if (WIDTH_DIFFERENCE != 0) //Pad input on right if width is not a power of 2
|
||||
assign padded_input = {clz_input, {WIDTH_DIFFERENCE{1'b0}}};
|
||||
else
|
||||
assign padded_input = clz_input;
|
||||
logic[TREE_CLZ_WIDTH-1:0] upper_clz;
|
||||
logic[TREE_CLZ_WIDTH-1:0] lower_clz;
|
||||
logic upper_zero;
|
||||
logic lower_zero;
|
||||
assign zero = upper_zero & lower_zero;
|
||||
assign clz[$clog2(WIDTH)-1] = upper_zero;
|
||||
|
||||
clz #(.WIDTH(HALF_TREE_WIDTH)) upper_tree (
|
||||
.clz_input(padded_input[TREE_WIDTH-1:HALF_TREE_WIDTH]),
|
||||
.clz(upper_clz),
|
||||
.zero(upper_zero)
|
||||
);
|
||||
clz #(.WIDTH(HALF_TREE_WIDTH)) lower_tree (
|
||||
.clz_input(padded_input[HALF_TREE_WIDTH-1:0]),
|
||||
.clz(lower_clz),
|
||||
.zero(lower_zero)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < TREE_CLZ_WIDTH; i++) //Combine tree outputs
|
||||
assign clz[i] = (~upper_zero & upper_clz[i]) | (upper_zero & lower_clz[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
|
@ -32,7 +32,7 @@ module cva5_fifo
|
|||
import cva5_types::*;
|
||||
|
||||
#(
|
||||
parameter DATA_WIDTH = 70,
|
||||
parameter type DATA_TYPE = logic,
|
||||
parameter FIFO_DEPTH = 4
|
||||
)
|
||||
(
|
||||
|
@ -63,7 +63,7 @@ module cva5_fifo
|
|||
//connected as a shift reg for the same resources as a LUTRAM FIFO
|
||||
//but with better timing
|
||||
else if (FIFO_DEPTH == 2) begin : gen_width_two
|
||||
logic [DATA_WIDTH-1:0] shift_reg [FIFO_DEPTH];
|
||||
DATA_TYPE shift_reg [FIFO_DEPTH];
|
||||
logic [LOG2_FIFO_DEPTH:0] inflight_count;
|
||||
////////////////////////////////////////////////////
|
||||
//Occupancy Tracking
|
||||
|
@ -87,8 +87,6 @@ module cva5_fifo
|
|||
assign fifo.data_out = shift_reg[~inflight_count[0]];
|
||||
end
|
||||
else begin : gen_width_3_plus
|
||||
//Force FIFO depth to next power of 2
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [DATA_WIDTH-1:0] lut_ram [(2**LOG2_FIFO_DEPTH)];
|
||||
logic [LOG2_FIFO_DEPTH-1:0] write_index;
|
||||
logic [LOG2_FIFO_DEPTH-1:0] read_index;
|
||||
logic [LOG2_FIFO_DEPTH:0] inflight_count;
|
||||
|
@ -102,7 +100,7 @@ module cva5_fifo
|
|||
end
|
||||
|
||||
assign fifo.valid = inflight_count[LOG2_FIFO_DEPTH];
|
||||
assign fifo.full = fifo.valid & ~|inflight_count[LOG2_FIFO_DEPTH-1:0];
|
||||
assign fifo.full = inflight_count == (LOG2_FIFO_DEPTH+1)'(-FIFO_DEPTH);
|
||||
|
||||
lfsr #(.WIDTH(LOG2_FIFO_DEPTH), .NEEDS_RESET(1))
|
||||
lfsr_read_index (
|
||||
|
@ -116,8 +114,8 @@ module cva5_fifo
|
|||
.en(fifo.push),
|
||||
.value(write_index)
|
||||
);
|
||||
|
||||
lutram_1w_1r #(.WIDTH(DATA_WIDTH), .DEPTH(FIFO_DEPTH))
|
||||
//Force FIFO depth to next power of 2
|
||||
lutram_1w_1r #(.DATA_TYPE(DATA_TYPE), .DEPTH(2**LOG2_FIFO_DEPTH))
|
||||
write_port (
|
||||
.clk(clk),
|
||||
.waddr(write_index),
|
||||
|
@ -138,4 +136,4 @@ module cva5_fifo
|
|||
fifo_underflow_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) fifo.pop |-> fifo.valid) else $error("underflow");
|
||||
|
||||
endmodule
|
||||
endmodule
|
47
core/tag_bank.sv → core/common_components/dual_port_bram.sv
Executable file → Normal file
47
core/tag_bank.sv → core/common_components/dual_port_bram.sv
Executable file → Normal file
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
|
||||
* Copyright © 2023 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -20,46 +20,51 @@
|
|||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module tag_bank
|
||||
#(
|
||||
|
||||
|
||||
module dual_port_bram
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
import riscv_types::*;
|
||||
|
||||
#(
|
||||
parameter WIDTH = 32,
|
||||
parameter LINES = 512
|
||||
parameter LINES = 4096
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input logic[$clog2(LINES)-1:0] addr_a,
|
||||
input logic[$clog2(LINES)-1:0] addr_b,
|
||||
input logic en_a,
|
||||
input logic en_b,
|
||||
input logic wen_a,
|
||||
input logic[$clog2(LINES)-1:0] addr_a,
|
||||
input logic[WIDTH-1:0] data_in_a,
|
||||
output logic[WIDTH-1:0] data_out_a,
|
||||
|
||||
input logic en_b,
|
||||
input logic wen_b,
|
||||
input logic [WIDTH-1:0] data_in_a,
|
||||
input logic [WIDTH-1:0] data_in_b,
|
||||
output logic [WIDTH-1:0] data_out_a,
|
||||
output logic [WIDTH-1:0] data_out_b
|
||||
input logic[$clog2(LINES)-1:0] addr_b,
|
||||
input logic[WIDTH-1:0] data_in_b,
|
||||
output logic[WIDTH-1:0] data_out_b
|
||||
);
|
||||
|
||||
(* ram_style = "block", ramstyle = "no_rw_check" *) logic [WIDTH-1:0] tag_entry [LINES];
|
||||
initial tag_entry = '{default: 0};
|
||||
(* ram_style = "block", ramstyle = "no_rw_check" *) logic [WIDTH-1:0] ram [LINES];
|
||||
initial ram = '{default: 0};
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (en_a) begin
|
||||
if (wen_a)
|
||||
tag_entry[addr_a] <= data_in_a;
|
||||
else
|
||||
data_out_a <= tag_entry[addr_a];
|
||||
ram[addr_a] <= data_in_a;
|
||||
data_out_a <= ram[addr_a];
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (en_b) begin
|
||||
if (wen_b)
|
||||
tag_entry[addr_b] <= data_in_b;
|
||||
else
|
||||
data_out_b <= tag_entry[addr_b];
|
||||
ram[addr_b] <= data_in_b;
|
||||
data_out_b <= ram[addr_b];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
endmodule
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
module lutram_1w_1r
|
||||
#(
|
||||
parameter WIDTH = 32,
|
||||
parameter type DATA_TYPE = logic,
|
||||
parameter DEPTH = 32
|
||||
)
|
||||
(
|
||||
|
@ -32,11 +32,11 @@ module lutram_1w_1r
|
|||
input logic[$clog2(DEPTH)-1:0] raddr,
|
||||
|
||||
input logic ram_write,
|
||||
input logic[WIDTH-1:0] new_ram_data,
|
||||
output logic[WIDTH-1:0] ram_data_out
|
||||
input DATA_TYPE new_ram_data,
|
||||
output DATA_TYPE ram_data_out
|
||||
);
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check", ram_style = "distributed" *) logic [WIDTH-1:0] ram [DEPTH-1:0];
|
||||
(* ramstyle = "MLAB, no_rw_check", ram_style = "distributed" *) logic [$bits(DATA_TYPE)-1:0] ram [DEPTH-1:0];
|
||||
|
||||
initial ram = '{default: 0};
|
||||
always_ff @ (posedge clk) begin
|
|
@ -25,7 +25,7 @@ module lutram_1w_mr
|
|||
import cva5_config::*;
|
||||
|
||||
#(
|
||||
parameter WIDTH = 32,
|
||||
parameter type DATA_TYPE = logic,
|
||||
parameter DEPTH = 32,
|
||||
parameter NUM_READ_PORTS = 2
|
||||
)
|
||||
|
@ -36,14 +36,14 @@ module lutram_1w_mr
|
|||
input logic[$clog2(DEPTH)-1:0] raddr [NUM_READ_PORTS],
|
||||
|
||||
input logic ram_write,
|
||||
input logic[WIDTH-1:0] new_ram_data,
|
||||
output logic[WIDTH-1:0] ram_data_out [NUM_READ_PORTS]
|
||||
input DATA_TYPE new_ram_data,
|
||||
output DATA_TYPE ram_data_out [NUM_READ_PORTS]
|
||||
);
|
||||
|
||||
//For Xilinx with their wider selection of LUTRAMs, infer a multi-read port LUTRAM
|
||||
//For Intel, build the multi-read port ram from simple-dual-port LUTRAMs
|
||||
generate if (FPGA_VENDOR == XILINX) begin : xilinx_gen
|
||||
logic [WIDTH-1:0] ram [DEPTH-1:0];
|
||||
logic [$bits(DATA_TYPE)-1:0] ram [DEPTH-1:0];
|
||||
|
||||
initial ram = '{default: 0};
|
||||
always_ff @ (posedge clk) begin
|
||||
|
@ -61,7 +61,7 @@ end
|
|||
else if (FPGA_VENDOR == INTEL) begin : intel_gen
|
||||
|
||||
for (genvar i = 0; i < NUM_READ_PORTS; i++) begin : lutrams
|
||||
lutram_1w_1r #(.WIDTH(WIDTH), .DEPTH(DEPTH))
|
||||
lutram_1w_1r #(.DATA_TYPE(DATA_TYPE), .DEPTH(DEPTH))
|
||||
write_port (
|
||||
.clk(clk),
|
||||
.waddr(waddr),
|
|
@ -51,7 +51,7 @@ module toggle_memory
|
|||
assign new_ram_data = toggle ^ _read_data[0];
|
||||
|
||||
lutram_1w_mr #(
|
||||
.WIDTH(1),
|
||||
.DATA_TYPE(logic),
|
||||
.DEPTH(DEPTH),
|
||||
.NUM_READ_PORTS(NUM_READ_PORTS+1)
|
||||
)
|
|
@ -28,9 +28,7 @@ module toggle_memory_set
|
|||
# (
|
||||
parameter DEPTH = 64,
|
||||
parameter NUM_WRITE_PORTS = 3,
|
||||
parameter NUM_READ_PORTS = 2,
|
||||
parameter WRITE_INDEX_FOR_RESET = 0,
|
||||
parameter READ_INDEX_FOR_RESET = 0
|
||||
parameter NUM_READ_PORTS = 2
|
||||
)
|
||||
(
|
||||
input logic clk,
|
|
@ -84,7 +84,6 @@ module cva5_wrapper_xilinx
|
|||
avalon_interface m_avalon ();
|
||||
wishbone_interface dwishbone ();
|
||||
wishbone_interface iwishbone ();
|
||||
trace_outputs_t tr;
|
||||
logic timer_interrupt;
|
||||
logic interrupt;
|
||||
|
732
core/csr_unit.sv
732
core/csr_unit.sv
|
@ -1,732 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module csr_unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import csr_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
//Unit Interfaces
|
||||
unit_issue_interface.unit issue,
|
||||
input csr_inputs_t csr_inputs,
|
||||
unit_writeback_interface.unit wb,
|
||||
|
||||
//Privilege
|
||||
output logic [1:0] current_privilege,
|
||||
|
||||
//GC
|
||||
input logic interrupt_taken,
|
||||
output logic interrupt_pending,
|
||||
output logic processing_csr,
|
||||
|
||||
//TLB and MMU
|
||||
output logic tlb_on,
|
||||
output logic [ASIDLEN-1:0] asid,
|
||||
|
||||
//MMUs
|
||||
mmu_interface.csr immu,
|
||||
mmu_interface.csr dmmu,
|
||||
|
||||
//CSR exception interface
|
||||
input exception_packet_t exception,
|
||||
output logic [31:0] exception_target_pc,
|
||||
|
||||
//exception return
|
||||
input logic mret,
|
||||
input logic sret,
|
||||
output logic [31:0] epc,
|
||||
|
||||
//Retire
|
||||
input retire_packet_t retire,
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
|
||||
//External
|
||||
input interrupt_t s_interrupt,
|
||||
input interrupt_t m_interrupt
|
||||
);
|
||||
|
||||
logic busy;
|
||||
logic commit;
|
||||
logic commit_in_progress;
|
||||
|
||||
csr_inputs_t csr_inputs_r;
|
||||
|
||||
privilege_t privilege_level;
|
||||
privilege_t next_privilege_level;
|
||||
|
||||
//write_logic
|
||||
logic supervisor_write;
|
||||
logic machine_write;
|
||||
|
||||
logic [XLEN-1:0] selected_csr;
|
||||
logic [XLEN-1:0] selected_csr_r;
|
||||
|
||||
logic [31:0] updated_csr;
|
||||
|
||||
logic swrite;
|
||||
logic mwrite;
|
||||
|
||||
function logic mwrite_en (input csr_addr_t addr);
|
||||
return mwrite & (csr_inputs_r.addr.sub_addr == addr.sub_addr);
|
||||
endfunction
|
||||
function logic swrite_en (input csr_addr_t addr);
|
||||
return swrite & (csr_inputs_r.addr.sub_addr == addr.sub_addr);
|
||||
endfunction
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
assign processing_csr = busy | issue.new_request;
|
||||
|
||||
assign issue.ready = ~busy;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
busy <= 0;
|
||||
else
|
||||
busy <= (busy & ~wb.ack) | issue.new_request;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
csr_inputs_r <= csr_inputs;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
commit_in_progress <= 0;
|
||||
else
|
||||
commit_in_progress <= (commit_in_progress & ~issue.new_request) | commit;
|
||||
end
|
||||
|
||||
//Waits until CSR instruction is the oldest issued instruction
|
||||
assign commit = (retire_ids[0] == wb.id) & busy & (~commit_in_progress);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
wb.done <= 0;
|
||||
else
|
||||
wb.done <= (wb.done & ~wb.ack) | commit;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
wb.id <= issue.id;
|
||||
end
|
||||
|
||||
assign wb.rd = selected_csr_r;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Shared logic
|
||||
always_ff @(posedge clk) begin
|
||||
mwrite <= CONFIG.INCLUDE_M_MODE && commit && (csr_inputs_r.addr.rw_bits != CSR_READ_ONLY && csr_inputs_r.addr.privilege == MACHINE_PRIVILEGE);
|
||||
swrite <= CONFIG.INCLUDE_S_MODE && commit && (csr_inputs_r.addr.rw_bits != CSR_READ_ONLY && csr_inputs_r.addr.privilege == SUPERVISOR_PRIVILEGE);
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (commit) begin
|
||||
case (csr_inputs_r.op)
|
||||
CSR_RW : updated_csr = csr_inputs_r.data;
|
||||
CSR_RS : updated_csr = selected_csr | csr_inputs_r.data;
|
||||
CSR_RC : updated_csr = selected_csr & ~csr_inputs_r.data;
|
||||
default : updated_csr = csr_inputs_r.data;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Machine Mode Registers
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Constant Registers
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Machine ISA register
|
||||
const misa_t misa = '{default:0, mxlen:1, A:(CONFIG.INCLUDE_AMO), I:1, M:(CONFIG.INCLUDE_MUL && CONFIG.INCLUDE_DIV), S:(CONFIG.INCLUDE_S_MODE), U:(CONFIG.INCLUDE_U_MODE)};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Machine Version Registers
|
||||
const logic [XLEN-1:0] mvendorid = 0;
|
||||
const logic [XLEN-1:0] marchid = 0;
|
||||
const logic [XLEN-1:0] mimpid = CONFIG.CSRS.MACHINE_IMPLEMENTATION_ID;
|
||||
const logic [XLEN-1:0] mhartid = CONFIG.CSRS.CPU_ID;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MSTATUS
|
||||
const logic [XLEN-1:0] mstatush = 0; //Always little endian
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Non-Constant Registers
|
||||
mstatus_t mstatus;
|
||||
|
||||
logic[XLEN-1:0] mtvec;
|
||||
logic[XLEN-1:0] medeleg;
|
||||
logic[XLEN-1:0] mideleg;
|
||||
mip_t mip, mip_mask, mip_w_mask, mip_new;
|
||||
mie_t mie, mie_mask;
|
||||
mip_t sip_mask;
|
||||
mie_t sie_mask;
|
||||
|
||||
logic[XLEN-1:0] mepc;
|
||||
|
||||
logic[XLEN-1:0] mtimecmp;
|
||||
|
||||
mcause_t mcause;
|
||||
logic[XLEN-1:0] mtval;
|
||||
|
||||
logic[XLEN-1:0] mscratch;
|
||||
|
||||
//Virtualization support: TSR, TW, TVM unused
|
||||
//Extension context status: SD, FS, XS unused
|
||||
const mstatus_t mstatus_mask =
|
||||
'{default:0, mprv:(CONFIG.INCLUDE_U_MODE | CONFIG.INCLUDE_S_MODE), mxr:(CONFIG.INCLUDE_S_MODE),
|
||||
sum:(CONFIG.INCLUDE_U_MODE & CONFIG.INCLUDE_S_MODE), mpp:'1, spp:(CONFIG.INCLUDE_S_MODE),
|
||||
mpie:1, spie:(CONFIG.INCLUDE_S_MODE), mie:1, sie:(CONFIG.INCLUDE_S_MODE)};
|
||||
|
||||
const mstatus_t sstatus_mask = '{default:0, mxr:1, sum:1, spp:1, spie:1, sie:1};
|
||||
|
||||
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode
|
||||
|
||||
privilege_t trap_return_privilege_level;
|
||||
privilege_t exception_privilege_level;
|
||||
privilege_t interrupt_privilege_level;
|
||||
|
||||
mstatus_t mstatus_exception;
|
||||
mstatus_t mstatus_return;
|
||||
mstatus_t mstatus_new;
|
||||
|
||||
logic [ECODE_W-1:0] interrupt_cause_r;
|
||||
|
||||
//Interrupt and Exception Delegation
|
||||
//Can delegate to supervisor if currently in supervisor or user modes
|
||||
always_comb begin
|
||||
exception_privilege_level = MACHINE_PRIVILEGE;
|
||||
interrupt_privilege_level = MACHINE_PRIVILEGE;
|
||||
if (CONFIG.INCLUDE_S_MODE && privilege_level inside {SUPERVISOR_PRIVILEGE, USER_PRIVILEGE}) begin
|
||||
if (exception.valid & medeleg[exception.code])
|
||||
exception_privilege_level = SUPERVISOR_PRIVILEGE;
|
||||
if (interrupt_taken & mideleg[interrupt_cause_r])
|
||||
interrupt_privilege_level = SUPERVISOR_PRIVILEGE;
|
||||
end
|
||||
end
|
||||
|
||||
//return from trap privilege determination
|
||||
assign trap_return_privilege_level = mret ? privilege_t'(mstatus.mpp) : privilege_t'({1'b0,mstatus.spp});
|
||||
|
||||
always_comb begin
|
||||
if(mret | sret)
|
||||
next_privilege_level = trap_return_privilege_level;
|
||||
else if (interrupt_taken)
|
||||
next_privilege_level = interrupt_privilege_level;
|
||||
else if (exception.valid)
|
||||
next_privilege_level = exception_privilege_level;
|
||||
else
|
||||
next_privilege_level = privilege_level;
|
||||
end
|
||||
|
||||
//Current privilege level
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
privilege_level <= MACHINE_PRIVILEGE;
|
||||
else
|
||||
privilege_level <= next_privilege_level;
|
||||
end
|
||||
assign current_privilege = privilege_level;
|
||||
|
||||
always_comb begin
|
||||
mstatus_exception = mstatus;
|
||||
case (next_privilege_level)
|
||||
SUPERVISOR_PRIVILEGE: begin
|
||||
mstatus_exception.spie = (privilege_level == SUPERVISOR_PRIVILEGE) ? mstatus.sie : 0;
|
||||
mstatus_exception.sie = 0;
|
||||
mstatus_exception.spp = privilege_level[0]; //one if from supervisor-mode, zero if from user-mode
|
||||
end
|
||||
default: begin
|
||||
mstatus_exception.mpie = (privilege_level == MACHINE_PRIVILEGE) ? mstatus.mie : ((privilege_level == SUPERVISOR_PRIVILEGE) ? mstatus.sie : 0);
|
||||
mstatus_exception.mie = 0;
|
||||
mstatus_exception.mpp = privilege_level; //machine,supervisor or user
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
//return from trap
|
||||
always_comb begin
|
||||
mstatus_return = mstatus;
|
||||
if (sret) begin
|
||||
mstatus_return.sie = mstatus.spie;
|
||||
mstatus_return.spie = 1;
|
||||
mstatus_return.spp = USER_PRIVILEGE[0];
|
||||
mstatus_return.mprv = 0;
|
||||
end
|
||||
else if (mret) begin
|
||||
mstatus_return.mie = mstatus.mpie;
|
||||
mstatus_return.mpie = 1;
|
||||
mstatus_return.mpp = CONFIG.INCLUDE_U_MODE ? USER_PRIVILEGE : MACHINE_PRIVILEGE;
|
||||
if (mstatus.mpp != MACHINE_PRIVILEGE)
|
||||
mstatus_return.mprv = 0;
|
||||
end
|
||||
end
|
||||
|
||||
mstatus_t mstatus_write_mask;
|
||||
assign mstatus_write_mask = swrite ? sstatus_mask : mstatus_mask;
|
||||
|
||||
always_comb begin
|
||||
mstatus_new = mstatus;
|
||||
if (mwrite_en(MSTATUS) | swrite_en(SSTATUS))
|
||||
mstatus_new = (mstatus & ~mstatus_write_mask) | (updated_csr & mstatus_write_mask);
|
||||
else if (interrupt_taken | exception.valid)
|
||||
mstatus_new = mstatus_exception;
|
||||
else if (mret | sret)
|
||||
mstatus_new = mstatus_return;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mstatus <= '{default:0, mpp:MACHINE_PRIVILEGE};
|
||||
else
|
||||
mstatus <= mstatus_new;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MTVEC
|
||||
//No vectored mode, mode hard-coded to zero
|
||||
initial mtvec[31:2] = CONFIG.CSRS.RESET_MTVEC[31:2];
|
||||
always_ff @(posedge clk) begin
|
||||
mtvec[1:0] <= '0;
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.MTVEC_WRITEABLE & mwrite_en(MTVEC))
|
||||
mtvec[XLEN-1:2] <= updated_csr[XLEN-1:2];
|
||||
end
|
||||
assign exception_target_pc = mtvec;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MEDELEG
|
||||
logic [31:0] medeleg_mask;
|
||||
always_comb begin
|
||||
medeleg_mask = 0;
|
||||
if (CONFIG.INCLUDE_S_MODE) begin
|
||||
medeleg_mask[INST_ADDR_MISSALIGNED] = 1;
|
||||
medeleg_mask[INST_ACCESS_FAULT] = 1;
|
||||
medeleg_mask[ILLEGAL_INST] = 1;
|
||||
medeleg_mask[BREAK] = 1;
|
||||
medeleg_mask[LOAD_ADDR_MISSALIGNED] = 1;
|
||||
medeleg_mask[LOAD_FAULT] = 1;
|
||||
medeleg_mask[STORE_AMO_ADDR_MISSALIGNED] = 1;
|
||||
medeleg_mask[STORE_AMO_FAULT] = 1;
|
||||
medeleg_mask[ECALL_U] = 1;
|
||||
medeleg_mask[INST_PAGE_FAULT] = 1;
|
||||
medeleg_mask[LOAD_PAGE_FAULT] = 1;
|
||||
medeleg_mask[STORE_OR_AMO_PAGE_FAULT] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
medeleg <= '0;
|
||||
else if (mwrite_en(MEDELEG) & CONFIG.INCLUDE_S_MODE)
|
||||
medeleg <= (updated_csr & medeleg_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MIDELEG
|
||||
logic [31:0] mideleg_mask;
|
||||
always_comb begin
|
||||
mideleg_mask = 0;
|
||||
if (CONFIG.INCLUDE_S_MODE) begin
|
||||
mideleg_mask[S_SOFTWARE_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
mideleg_mask[S_TIMER_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
mideleg_mask[S_EXTERNAL_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
end
|
||||
end
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mideleg <= '0;
|
||||
else if (mwrite_en(MIDELEG) & CONFIG.INCLUDE_S_MODE)
|
||||
mideleg <= (updated_csr & mideleg_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MIP
|
||||
assign mip_mask = '{default:0, meip:1, seip:CONFIG.INCLUDE_S_MODE, mtip:1, stip:CONFIG.INCLUDE_S_MODE, msip:1, ssip:CONFIG.INCLUDE_S_MODE};
|
||||
assign mip_w_mask = '{default:0, seip:CONFIG.INCLUDE_S_MODE, stip:CONFIG.INCLUDE_S_MODE, ssip:CONFIG.INCLUDE_S_MODE};
|
||||
|
||||
always_comb begin
|
||||
mip_new = '0;
|
||||
mip_new.ssip = s_interrupt.software;
|
||||
mip_new.stip = s_interrupt.timer;
|
||||
mip_new.seip = s_interrupt.external;
|
||||
|
||||
mip_new.msip = m_interrupt.software;
|
||||
mip_new.mtip = m_interrupt.timer;
|
||||
mip_new.meip = m_interrupt.external;
|
||||
|
||||
mip_new &= mip_mask;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mip <= 0;
|
||||
else if (mwrite_en(MIP) | (|mip_new))
|
||||
mip <= (updated_csr & mip_w_mask) | mip_new;
|
||||
end
|
||||
assign interrupt_pending = |(mip & mie) & mstatus.mie;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MIE
|
||||
assign mie_mask = '{default:0, meie:1, seie:CONFIG.INCLUDE_S_MODE, mtie:1, stie:CONFIG.INCLUDE_S_MODE, msie:1, ssie:CONFIG.INCLUDE_S_MODE};
|
||||
assign sie_mask = '{default:0, seie:1, stie:1, ssie:1};
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mie <= '0;
|
||||
else if (mwrite_en(MIE) | swrite_en(SIE))
|
||||
mie <= updated_csr & (swrite ? sie_mask : mie_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MEPC
|
||||
//Can be software written, written on exception with
|
||||
//exception causing PC. Lower two bits tied to zero.
|
||||
always_ff @(posedge clk) begin
|
||||
mepc[1:0] <= '0;
|
||||
if (mwrite_en(MEPC) | exception.valid | interrupt_taken)
|
||||
mepc[XLEN-1:2] <= (exception.valid | interrupt_taken) ? exception.pc[XLEN-1:2] : updated_csr[XLEN-1:2];
|
||||
end
|
||||
assign epc = mepc;
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MCAUSE
|
||||
//As the exception and interrupts codes are sparsely populated,
|
||||
//to ensure that only legal values are written, a ROM lookup
|
||||
//is used to validate the CSR write operation
|
||||
logic M_EXCEPTION_MASKING_ROM [2**ECODE_W];
|
||||
logic M_INTERRUPT_MASKING_ROM [2**ECODE_W];
|
||||
always_comb begin
|
||||
M_EXCEPTION_MASKING_ROM = '{default: 0};
|
||||
M_EXCEPTION_MASKING_ROM[INST_ADDR_MISSALIGNED] = 1;
|
||||
M_EXCEPTION_MASKING_ROM[INST_ACCESS_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
M_EXCEPTION_MASKING_ROM[ILLEGAL_INST] = 1;
|
||||
M_EXCEPTION_MASKING_ROM[BREAK] = 1;
|
||||
M_EXCEPTION_MASKING_ROM[LOAD_ADDR_MISSALIGNED] = 1;
|
||||
M_EXCEPTION_MASKING_ROM[LOAD_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
M_EXCEPTION_MASKING_ROM[STORE_AMO_ADDR_MISSALIGNED] = 1;
|
||||
M_EXCEPTION_MASKING_ROM[STORE_AMO_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
M_EXCEPTION_MASKING_ROM[ECALL_U] = CONFIG.INCLUDE_S_MODE;
|
||||
M_EXCEPTION_MASKING_ROM[ECALL_S] = CONFIG.INCLUDE_S_MODE;
|
||||
M_EXCEPTION_MASKING_ROM[ECALL_M] = 1;
|
||||
M_EXCEPTION_MASKING_ROM[INST_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
M_EXCEPTION_MASKING_ROM[LOAD_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
M_EXCEPTION_MASKING_ROM[STORE_OR_AMO_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
|
||||
M_INTERRUPT_MASKING_ROM = '{default: 0};
|
||||
M_INTERRUPT_MASKING_ROM[S_SOFTWARE_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
M_INTERRUPT_MASKING_ROM[M_SOFTWARE_INTERRUPT] = 1;
|
||||
M_INTERRUPT_MASKING_ROM[S_TIMER_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
M_INTERRUPT_MASKING_ROM[M_TIMER_INTERRUPT] = 1;
|
||||
M_INTERRUPT_MASKING_ROM[S_EXTERNAL_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
M_INTERRUPT_MASKING_ROM[M_EXTERNAL_INTERRUPT] = 1;
|
||||
end
|
||||
|
||||
logic mcause_write_valid;
|
||||
always_comb begin
|
||||
if (updated_csr[XLEN-1]) //interrupt
|
||||
mcause_write_valid = M_INTERRUPT_MASKING_ROM[updated_csr[ECODE_W-1:0]];
|
||||
else
|
||||
mcause_write_valid = M_EXCEPTION_MASKING_ROM[updated_csr[ECODE_W-1:0]];
|
||||
end
|
||||
|
||||
mip_t mip_cause;
|
||||
logic [5:0] mip_priority_vector;
|
||||
logic [2:0] mip_cause_sel;
|
||||
|
||||
const logic [ECODE_W-1:0] interruput_code_table [7:0] = '{ 0, 0,
|
||||
M_EXTERNAL_INTERRUPT, M_TIMER_INTERRUPT, M_SOFTWARE_INTERRUPT,
|
||||
S_EXTERNAL_INTERRUPT, S_TIMER_INTERRUPT, S_SOFTWARE_INTERRUPT
|
||||
};
|
||||
assign mip_cause = (mip & mie);
|
||||
assign mip_priority_vector = '{mip_cause.meip, mip_cause.mtip, mip_cause.msip, mip_cause.seip, mip_cause.stip, mip_cause.ssip};
|
||||
|
||||
priority_encoder #(.WIDTH(6))
|
||||
interrupt_cause_encoder (
|
||||
.priority_vector (mip_priority_vector),
|
||||
.encoded_result (mip_cause_sel)
|
||||
);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (interrupt_pending)
|
||||
interrupt_cause_r <= interruput_code_table[mip_cause_sel];
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
mcause.zeroes <= '0;
|
||||
if (rst) begin
|
||||
mcause.is_interrupt <= 0;
|
||||
mcause.code <= 0;
|
||||
end
|
||||
else if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MCAUSE & ((mcause_write_valid & mwrite_en(MCAUSE)) | exception.valid | interrupt_taken)) begin
|
||||
mcause.is_interrupt <= interrupt_taken | (mwrite_en(MCAUSE) & updated_csr[XLEN-1]);
|
||||
mcause.code <= interrupt_taken ? interrupt_cause_r : exception.valid ? exception.code : updated_csr[ECODE_W-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MTVAL
|
||||
always_ff @(posedge clk) begin
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MTVAL & (mwrite_en(MTVAL) | exception.valid))
|
||||
mtval <= exception.valid ? exception.tval : updated_csr;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MSCRATCH
|
||||
always_ff @(posedge clk) begin
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MSCRATCH & mwrite_en(MSCRATCH))
|
||||
mscratch <= updated_csr;
|
||||
end
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//END OF MACHINE REGS
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//BEGIN OF SUPERVISOR REGS
|
||||
////////////////////////////////////////////////////
|
||||
logic[XLEN-1:0] sepc;
|
||||
|
||||
logic[XLEN-1:0] stime;
|
||||
logic[XLEN-1:0] stimecmp;
|
||||
|
||||
logic[XLEN-1:0] scause;
|
||||
logic[XLEN-1:0] stval;
|
||||
|
||||
logic[XLEN-1:0] sstatus;
|
||||
logic[XLEN-1:0] stvec;
|
||||
|
||||
satp_t satp;
|
||||
|
||||
logic[XLEN-1:0] sscratch;
|
||||
|
||||
//TLB status --- used to mux physical/virtual address
|
||||
assign tlb_on = CONFIG.INCLUDE_S_MODE & satp.mode;
|
||||
assign asid = satp.asid;
|
||||
//******************
|
||||
|
||||
generate if (CONFIG.INCLUDE_S_MODE) begin : gen_csr_s_mode
|
||||
////////////////////////////////////////////////////
|
||||
//MMU interface
|
||||
assign immu.mxr = mstatus.mxr;
|
||||
assign dmmu.mxr = mstatus.mxr;
|
||||
assign immu.sum = mstatus.sum;
|
||||
assign dmmu.sum = mstatus.sum;
|
||||
assign immu.privilege = privilege_level;
|
||||
assign dmmu.privilege = mstatus.mprv ? mstatus.mpp : privilege_level;
|
||||
assign immu.satp_ppn = satp.ppn;
|
||||
assign dmmu.satp_ppn = satp.ppn;
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
assign sip_mask = '{default:0, seip:1, stip:1, ssip:1};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//STVEC
|
||||
logic [31:0] stvec_mask = '1;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
stvec <= {CONFIG.CSRS.RESET_VEC[XLEN-1:2], 2'b00};
|
||||
else if (swrite_en(STVEC))
|
||||
stvec <= (updated_csr & stvec_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//SATP
|
||||
logic[XLEN-1:0] satp_mask;
|
||||
assign satp_mask = '1;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
satp <= 0;
|
||||
else if (swrite_en(SATP))
|
||||
satp <= (updated_csr & satp_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//SSCRATCH
|
||||
always_ff @(posedge clk) begin
|
||||
if (swrite_en(SSCRATCH))
|
||||
sscratch <= updated_csr;
|
||||
end
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//END OF SUPERVISOR REGS
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Timers and Counters
|
||||
//Register increment for instructions completed
|
||||
//Increments suppressed on writes to these registers
|
||||
logic[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:0] mcycle;
|
||||
logic[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:0] mtime;
|
||||
logic[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:0] minst_ret;
|
||||
|
||||
logic[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:0] mcycle_input_next;
|
||||
logic[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:0] minst_ret_input_next;
|
||||
logic[LOG2_RETIRE_PORTS:0] minst_ret_inc;
|
||||
logic mcycle_inc;
|
||||
|
||||
always_comb begin
|
||||
mcycle_input_next = mcycle;
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.MCYCLE_WRITEABLE & mwrite_en(MCYCLE))
|
||||
mcycle_input_next[31:0] = updated_csr;
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.MCYCLE_WRITEABLE & mwrite_en(MCYCLEH))
|
||||
mcycle_input_next[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:32] = updated_csr[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-33:0];
|
||||
end
|
||||
|
||||
assign mcycle_inc = ~(CONFIG.CSRS.NON_STANDARD_OPTIONS.MCYCLE_WRITEABLE & (mwrite_en(MCYCLE) | mwrite_en(MCYCLEH)));
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mcycle <= 0;
|
||||
else
|
||||
mcycle <= mcycle_input_next + CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W'(mcycle_inc);
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
minst_ret_input_next = minst_ret;
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.MINSTR_WRITEABLE & mwrite_en(MINSTRET))
|
||||
minst_ret_input_next[31:0] = updated_csr;
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.MINSTR_WRITEABLE & mwrite_en(MINSTRETH))
|
||||
minst_ret_input_next[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:32] = updated_csr[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-33:0];
|
||||
end
|
||||
|
||||
assign minst_ret_inc = {(LOG2_RETIRE_PORTS+1){~(CONFIG.CSRS.NON_STANDARD_OPTIONS.MINSTR_WRITEABLE & (mwrite_en(MINSTRET) | mwrite_en(MINSTRETH)))}} & retire.count;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
minst_ret <= 0;
|
||||
else
|
||||
minst_ret <= minst_ret_input_next + CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W'(minst_ret_inc);
|
||||
end
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//CSR mux
|
||||
always_comb begin
|
||||
case (csr_inputs_r.addr) inside
|
||||
//Machine info
|
||||
MISA : selected_csr = CONFIG.INCLUDE_M_MODE ? misa : 0;
|
||||
MVENDORID : selected_csr = CONFIG.INCLUDE_M_MODE ? mvendorid : 0;
|
||||
MARCHID : selected_csr = CONFIG.INCLUDE_M_MODE ? marchid : 0;
|
||||
MIMPID : selected_csr = CONFIG.INCLUDE_M_MODE ? mimpid : 0;
|
||||
MHARTID : selected_csr = CONFIG.INCLUDE_M_MODE ? mhartid : 0;
|
||||
//Machine trap setup
|
||||
MSTATUS : selected_csr = CONFIG.INCLUDE_M_MODE ? mstatus : 0;
|
||||
MEDELEG : selected_csr = CONFIG.INCLUDE_M_MODE ? medeleg : 0;
|
||||
MIDELEG : selected_csr = CONFIG.INCLUDE_M_MODE ? mideleg : 0;
|
||||
MIE : selected_csr = CONFIG.INCLUDE_M_MODE ? mie : 0;
|
||||
MTVEC : selected_csr = CONFIG.INCLUDE_M_MODE ? mtvec : 0;
|
||||
MCOUNTEREN : selected_csr = 0;
|
||||
//Machine trap handling
|
||||
MSCRATCH : selected_csr = CONFIG.INCLUDE_M_MODE ? mscratch : 0;
|
||||
MEPC : selected_csr = CONFIG.INCLUDE_M_MODE ? mepc : 0;
|
||||
MCAUSE : selected_csr = CONFIG.INCLUDE_M_MODE ? mcause : 0;
|
||||
MTVAL : selected_csr = CONFIG.INCLUDE_M_MODE ? mtval : 0;
|
||||
MIP : selected_csr = CONFIG.INCLUDE_M_MODE ? mip : 0;
|
||||
//Machine Memory Protection
|
||||
[12'h3EF : 12'h3A0] : selected_csr = 0;
|
||||
//Machine Timers and Counters
|
||||
MCYCLE : selected_csr = CONFIG.INCLUDE_M_MODE ? mcycle[XLEN-1:0] : 0;
|
||||
MINSTRET : selected_csr = CONFIG.INCLUDE_M_MODE ? minst_ret[XLEN-1:0] : 0;
|
||||
[12'hB03 : 12'hB1F] : selected_csr = 0;
|
||||
MCYCLEH : selected_csr = CONFIG.INCLUDE_M_MODE ? 32'(mcycle[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:XLEN]) : 0;
|
||||
MINSTRETH : selected_csr = CONFIG.INCLUDE_M_MODE ? 32'(minst_ret[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:XLEN]) : 0;
|
||||
[12'hB83 : 12'hB9F] : selected_csr = 0;
|
||||
//Machine Counter Setup
|
||||
[12'h320 : 12'h33F] : selected_csr = 0;
|
||||
|
||||
//Supervisor Trap Setup
|
||||
SSTATUS : selected_csr = CONFIG.INCLUDE_S_MODE ? (mstatus & sstatus_mask) : '0;
|
||||
SEDELEG : selected_csr = 0; //No user-level interrupts/exception handling
|
||||
SIDELEG : selected_csr = 0;
|
||||
SIE : selected_csr = CONFIG.INCLUDE_S_MODE ? (mie & sie_mask) : '0;
|
||||
STVEC : selected_csr = CONFIG.INCLUDE_S_MODE ? stvec : '0;
|
||||
SCOUNTEREN : selected_csr = 0;
|
||||
//Supervisor trap handling
|
||||
SSCRATCH : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0;
|
||||
SEPC : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0;
|
||||
SCAUSE : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0;
|
||||
STVAL : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0;
|
||||
SIP : selected_csr = CONFIG.INCLUDE_S_MODE ? (mip & sip_mask) : '0;
|
||||
//Supervisor Protection and Translation
|
||||
SATP : selected_csr = CONFIG.INCLUDE_S_MODE ? satp : '0;
|
||||
|
||||
//User status
|
||||
//Floating point
|
||||
FFLAGS : selected_csr = 0;
|
||||
FRM : selected_csr = 0;
|
||||
FCSR : selected_csr = 0;
|
||||
//User Counter Timers
|
||||
CYCLE : selected_csr = mcycle[XLEN-1:0];
|
||||
TIME : selected_csr = mcycle[XLEN-1:0];
|
||||
INSTRET : selected_csr = minst_ret[XLEN-1:0];
|
||||
[12'hC03 : 12'hC1F] : selected_csr = 0;
|
||||
CYCLEH : selected_csr = 32'(mcycle[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:XLEN]);
|
||||
TIMEH : selected_csr = 32'(mcycle[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:XLEN]);
|
||||
INSTRETH : selected_csr = 32'(minst_ret[CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W-1:XLEN]);
|
||||
[12'hC83 : 12'hC9F] : selected_csr = 0;
|
||||
|
||||
default : selected_csr = 0;
|
||||
endcase
|
||||
end
|
||||
always_ff @(posedge clk) begin
|
||||
if (commit)
|
||||
selected_csr_r <= selected_csr;
|
||||
end
|
||||
|
||||
endmodule
|
469
core/cva5.sv
469
core/cva5.sv
|
@ -28,6 +28,7 @@ module cva5
|
|||
import l2_config_and_types::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
|
||||
#(
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
|
@ -45,48 +46,11 @@ module cva5
|
|||
wishbone_interface.master dwishbone,
|
||||
wishbone_interface.master iwishbone,
|
||||
|
||||
output trace_outputs_t tr,
|
||||
|
||||
l2_requester_interface.master l2,
|
||||
|
||||
input interrupt_t s_interrupt,
|
||||
input interrupt_t m_interrupt
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit ID Assignment
|
||||
//Generate Issue IDs based on configuration options
|
||||
//Then assigned to a struct for ease in passing to sub modules
|
||||
|
||||
//Units with writeback
|
||||
localparam int unsigned ALU_UNIT_ID = 32'd0;
|
||||
localparam int unsigned LS_UNIT_ID = 32'd1;
|
||||
localparam int unsigned CSR_UNIT_ID = LS_UNIT_ID + int'(CONFIG.INCLUDE_CSRS);
|
||||
localparam int unsigned MUL_UNIT_ID = CSR_UNIT_ID + int'(CONFIG.INCLUDE_MUL);
|
||||
localparam int unsigned DIV_UNIT_ID = MUL_UNIT_ID + int'(CONFIG.INCLUDE_DIV);
|
||||
//Non-writeback units
|
||||
localparam int unsigned BRANCH_UNIT_ID = DIV_UNIT_ID + 1;
|
||||
localparam int unsigned IEC_UNIT_ID = BRANCH_UNIT_ID + 1;
|
||||
|
||||
//Total number of units
|
||||
localparam int unsigned NUM_UNITS = IEC_UNIT_ID + 1;
|
||||
|
||||
localparam unit_id_param_t UNIT_IDS = '{
|
||||
ALU : ALU_UNIT_ID,
|
||||
LS : LS_UNIT_ID,
|
||||
CSR : CSR_UNIT_ID,
|
||||
MUL : MUL_UNIT_ID,
|
||||
DIV : DIV_UNIT_ID,
|
||||
BR : BRANCH_UNIT_ID,
|
||||
IEC : IEC_UNIT_ID
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Writeback Port Assignment
|
||||
//
|
||||
localparam int unsigned NUM_WB_UNITS_GROUP_1 = 1;//ALU
|
||||
localparam int unsigned NUM_WB_UNITS_GROUP_2 = 1 + int'(CONFIG.INCLUDE_CSRS) + int'(CONFIG.INCLUDE_MUL) + int'(CONFIG.INCLUDE_DIV);//LS
|
||||
localparam int unsigned NUM_WB_UNITS = NUM_WB_UNITS_GROUP_1 + NUM_WB_UNITS_GROUP_2;
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Connecting Signals
|
||||
|
@ -105,24 +69,22 @@ module cva5
|
|||
ras_interface ras();
|
||||
|
||||
issue_packet_t issue;
|
||||
register_file_issue_interface #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS)) rf_issue();
|
||||
register_file_issue_interface #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS), .READ_PORTS(REGFILE_READ_PORTS), .DATA_WIDTH(32)) rf_issue();
|
||||
register_file_issue_interface #(.NUM_WB_GROUPS(2), .READ_PORTS(3), .DATA_WIDTH(FLEN)) fp_rf_issue();
|
||||
|
||||
logic [MAX_NUM_UNITS-1:0] unit_needed;
|
||||
logic [MAX_NUM_UNITS-1:0][REGFILE_READ_PORTS-1:0] unit_uses_rs;
|
||||
logic [1:0][2:0] fp_unit_uses_rs;
|
||||
logic [MAX_NUM_UNITS-1:0] unit_uses_rd;
|
||||
logic [1:0] fp_unit_uses_rd;
|
||||
|
||||
alu_inputs_t alu_inputs;
|
||||
load_store_inputs_t ls_inputs;
|
||||
branch_inputs_t branch_inputs;
|
||||
mul_inputs_t mul_inputs;
|
||||
div_inputs_t div_inputs;
|
||||
gc_inputs_t gc_inputs;
|
||||
csr_inputs_t csr_inputs;
|
||||
logic [31:0] constant_alu;
|
||||
|
||||
unit_issue_interface unit_issue [NUM_UNITS-1:0]();
|
||||
unit_issue_interface unit_issue [MAX_NUM_UNITS-1:0]();
|
||||
|
||||
exception_packet_t ls_exception;
|
||||
logic ls_exception_is_store;
|
||||
|
||||
unit_writeback_interface unit_wb [NUM_WB_UNITS]();
|
||||
|
||||
mmu_interface immu();
|
||||
mmu_interface dmmu();
|
||||
|
||||
|
@ -148,24 +110,39 @@ module cva5
|
|||
logic decode_advance;
|
||||
decode_packet_t decode;
|
||||
logic decode_uses_rd;
|
||||
logic fp_decode_uses_rd;
|
||||
rs_addr_t decode_rd_addr;
|
||||
exception_sources_t decode_exception_unit;
|
||||
logic decode_is_store;
|
||||
phys_addr_t decode_phys_rd_addr;
|
||||
phys_addr_t fp_decode_phys_rd_addr;
|
||||
phys_addr_t decode_phys_rs_addr [REGFILE_READ_PORTS];
|
||||
phys_addr_t fp_decode_phys_rs_addr [3];
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] decode_rs_wb_group [REGFILE_READ_PORTS];
|
||||
logic fp_decode_rs_wb_group [3];
|
||||
logic [2:0] dyn_rm;
|
||||
|
||||
//ID freeing
|
||||
retire_packet_t retire;
|
||||
retire_packet_t wb_retire;
|
||||
retire_packet_t fp_wb_retire;
|
||||
retire_packet_t store_retire;
|
||||
id_t retire_ids [RETIRE_PORTS];
|
||||
id_t retire_ids_next [RETIRE_PORTS];
|
||||
logic retire_port_valid [RETIRE_PORTS];
|
||||
logic [LOG2_RETIRE_PORTS : 0] retire_count;
|
||||
//Writeback
|
||||
unit_writeback_interface #(.DATA_WIDTH(32)) unit_wb [MAX_NUM_UNITS]();
|
||||
unit_writeback_interface #(.DATA_WIDTH(FLEN)) fp_unit_wb [2]();
|
||||
wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS];
|
||||
commit_packet_t commit_packet [CONFIG.NUM_WB_GROUPS];
|
||||
fp_wb_packet_t fp_wb_packet [2];
|
||||
phys_addr_t wb_phys_addr [CONFIG.NUM_WB_GROUPS];
|
||||
phys_addr_t fp_wb_phys_addr [2];
|
||||
logic [4:0] fflag_wmask;
|
||||
//Exception
|
||||
logic [31:0] oldest_pc;
|
||||
|
||||
renamer_interface #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS)) decode_rename_interface ();
|
||||
renamer_interface #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS), .READ_PORTS(REGFILE_READ_PORTS)) decode_rename_interface ();
|
||||
renamer_interface #(.NUM_WB_GROUPS(2), .READ_PORTS(3)) fp_decode_rename_interface ();
|
||||
|
||||
//Global Control
|
||||
exception_interface exception [NUM_EXCEPTION_SOURCES]();
|
||||
|
@ -186,47 +163,16 @@ module cva5
|
|||
logic processing_csr;
|
||||
|
||||
//Decode Unit and Fetch Unit
|
||||
logic issue_stage_ready;
|
||||
phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS];
|
||||
phys_addr_t fp_issue_phys_rs_addr [3];
|
||||
rs_addr_t issue_rs_addr [REGFILE_READ_PORTS];
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group;
|
||||
logic fp_issue_rd_wb_group;
|
||||
logic illegal_instruction;
|
||||
logic instruction_issued;
|
||||
logic instruction_issued_with_rd;
|
||||
|
||||
//LS
|
||||
wb_packet_t wb_snoop;
|
||||
|
||||
//Trace Interface Signals
|
||||
logic tr_early_branch_correction;
|
||||
logic tr_operand_stall;
|
||||
logic tr_unit_stall;
|
||||
logic tr_no_id_stall;
|
||||
logic tr_no_instruction_stall;
|
||||
logic tr_other_stall;
|
||||
logic tr_branch_operand_stall;
|
||||
logic tr_alu_operand_stall;
|
||||
logic tr_ls_operand_stall;
|
||||
logic tr_div_operand_stall;
|
||||
|
||||
logic tr_alu_op;
|
||||
logic tr_branch_or_jump_op;
|
||||
logic tr_load_op;
|
||||
logic tr_store_op;
|
||||
logic tr_mul_op;
|
||||
logic tr_div_op;
|
||||
logic tr_misc_op;
|
||||
|
||||
logic tr_instruction_issued_dec;
|
||||
logic [31:0] tr_instruction_pc_dec;
|
||||
logic [31:0] tr_instruction_data_dec;
|
||||
|
||||
logic tr_branch_correct;
|
||||
logic tr_branch_misspredict;
|
||||
logic tr_return_correct;
|
||||
logic tr_return_misspredict;
|
||||
|
||||
logic tr_load_conflict_delay;
|
||||
|
||||
logic tr_rs1_forwarding_needed;
|
||||
logic tr_rs2_forwarding_needed;
|
||||
logic tr_rs1_and_rs2_forwarding_needed;
|
||||
logic fp_instruction_issued_with_rd;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
@ -267,18 +213,27 @@ module cva5
|
|||
.decode (decode),
|
||||
.decode_advance (decode_advance),
|
||||
.decode_uses_rd (decode_uses_rd),
|
||||
.fp_decode_uses_rd (fp_decode_uses_rd),
|
||||
.decode_rd_addr (decode_rd_addr),
|
||||
.decode_phys_rd_addr (decode_phys_rd_addr),
|
||||
.fp_decode_phys_rd_addr (fp_decode_phys_rd_addr),
|
||||
.decode_exception_unit (decode_exception_unit),
|
||||
.decode_is_store (decode_is_store),
|
||||
.issue (issue),
|
||||
.instruction_issued (instruction_issued),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.fp_instruction_issued_with_rd (fp_instruction_issued_with_rd),
|
||||
.wb_packet (wb_packet),
|
||||
.commit_packet (commit_packet),
|
||||
.retire (retire),
|
||||
.fp_wb_packet (fp_wb_packet),
|
||||
.wb_phys_addr (wb_phys_addr),
|
||||
.fp_wb_phys_addr (fp_wb_phys_addr),
|
||||
.wb_retire (wb_retire),
|
||||
.fp_wb_retire (fp_wb_retire),
|
||||
.store_retire (store_retire),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_ids_next (retire_ids_next),
|
||||
.retire_port_valid(retire_port_valid),
|
||||
.retire_count (retire_count),
|
||||
.post_issue_count(post_issue_count),
|
||||
.oldest_pc (oldest_pc),
|
||||
.current_exception_unit (current_exception_unit)
|
||||
|
@ -307,11 +262,9 @@ module cva5
|
|||
.iwishbone (iwishbone),
|
||||
.icache_on ('1),
|
||||
.tlb (itlb),
|
||||
.tlb_on (tlb_on),
|
||||
.l1_request (l1_request[L1_ICACHE_ID]),
|
||||
.l1_response (l1_response[L1_ICACHE_ID]),
|
||||
.exception (1'b0),
|
||||
.tr_early_branch_correction (tr_early_branch_correction)
|
||||
.exception (1'b0)
|
||||
);
|
||||
|
||||
branch_predictor #(.CONFIG(CONFIG))
|
||||
|
@ -364,7 +317,7 @@ module cva5
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Renamer
|
||||
renamer #(.CONFIG(CONFIG))
|
||||
renamer #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS), .READ_PORTS(REGFILE_READ_PORTS), .RENAME_ZERO(0))
|
||||
renamer_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
|
@ -373,68 +326,57 @@ module cva5
|
|||
.decode (decode_rename_interface),
|
||||
.issue (issue), //packet
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.retire (retire) //packet
|
||||
.wb_retire (wb_retire)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode/Issue
|
||||
decode_and_issue #(
|
||||
.CONFIG (CONFIG),
|
||||
.NUM_UNITS (NUM_UNITS),
|
||||
.UNIT_IDS (UNIT_IDS)
|
||||
)
|
||||
decode_and_issue #(.CONFIG(CONFIG))
|
||||
decode_and_issue_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.pc_id_available (pc_id_available),
|
||||
.decode (decode),
|
||||
.decode_advance (decode_advance),
|
||||
.unit_needed (unit_needed),
|
||||
.unit_uses_rs (unit_uses_rs),
|
||||
.fp_unit_uses_rs (fp_unit_uses_rs),
|
||||
.unit_uses_rd (unit_uses_rd),
|
||||
.fp_unit_uses_rd (fp_unit_uses_rd),
|
||||
.renamer (decode_rename_interface),
|
||||
.fp_renamer (fp_decode_rename_interface),
|
||||
.decode_uses_rd (decode_uses_rd),
|
||||
.fp_decode_uses_rd (fp_decode_uses_rd),
|
||||
.decode_rd_addr (decode_rd_addr),
|
||||
.decode_exception_unit (decode_exception_unit),
|
||||
.decode_phys_rd_addr (decode_phys_rd_addr),
|
||||
.fp_decode_phys_rd_addr (fp_decode_phys_rd_addr),
|
||||
.decode_phys_rs_addr (decode_phys_rs_addr),
|
||||
.fp_decode_phys_rs_addr (fp_decode_phys_rs_addr),
|
||||
.decode_rs_wb_group (decode_rs_wb_group),
|
||||
.fp_decode_rs_wb_group (fp_decode_rs_wb_group),
|
||||
.instruction_issued (instruction_issued),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.fp_instruction_issued_with_rd (fp_instruction_issued_with_rd),
|
||||
.issue (issue),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.issue_phys_rs_addr (issue_phys_rs_addr),
|
||||
.fp_issue_phys_rs_addr (fp_issue_phys_rs_addr),
|
||||
.issue_rd_wb_group (issue_rd_wb_group),
|
||||
.fp_issue_rd_wb_group (fp_issue_rd_wb_group),
|
||||
.rf (rf_issue),
|
||||
.alu_inputs (alu_inputs),
|
||||
.ls_inputs (ls_inputs),
|
||||
.branch_inputs (branch_inputs),
|
||||
.gc_inputs (gc_inputs),
|
||||
.csr_inputs (csr_inputs),
|
||||
.mul_inputs (mul_inputs),
|
||||
.div_inputs (div_inputs),
|
||||
.fp_rf (fp_rf_issue),
|
||||
.constant_alu (constant_alu),
|
||||
.unit_issue (unit_issue),
|
||||
.gc (gc),
|
||||
.current_privilege (current_privilege),
|
||||
.exception (exception[PRE_ISSUE_EXCEPTION]),
|
||||
.tr_operand_stall (tr_operand_stall),
|
||||
.tr_unit_stall (tr_unit_stall),
|
||||
.tr_no_id_stall (tr_no_id_stall),
|
||||
.tr_no_instruction_stall (tr_no_instruction_stall),
|
||||
.tr_other_stall (tr_other_stall),
|
||||
.tr_branch_operand_stall (tr_branch_operand_stall),
|
||||
.tr_alu_operand_stall (tr_alu_operand_stall),
|
||||
.tr_ls_operand_stall (tr_ls_operand_stall),
|
||||
.tr_div_operand_stall (tr_div_operand_stall),
|
||||
.tr_alu_op (tr_alu_op),
|
||||
.tr_branch_or_jump_op (tr_branch_or_jump_op),
|
||||
.tr_load_op (tr_load_op),
|
||||
.tr_store_op (tr_store_op),
|
||||
.tr_mul_op (tr_mul_op),
|
||||
.tr_div_op (tr_div_op),
|
||||
.tr_misc_op (tr_misc_op),
|
||||
.tr_instruction_issued_dec (tr_instruction_issued_dec),
|
||||
.tr_instruction_pc_dec (tr_instruction_pc_dec),
|
||||
.tr_instruction_data_dec (tr_instruction_data_dec)
|
||||
.exception (exception[PRE_ISSUE_EXCEPTION])
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Register File
|
||||
register_file #(.CONFIG(CONFIG))
|
||||
register_file #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS), .READ_PORTS(REGFILE_READ_PORTS), .PORT_ZERO_ABSENT(0), .USE_ZERO(0), .WB_PACKET_TYPE(wb_packet_t))
|
||||
register_file_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
|
@ -444,8 +386,10 @@ module cva5
|
|||
.decode_rs_wb_group (decode_rs_wb_group),
|
||||
.decode_advance (decode_advance),
|
||||
.decode_uses_rd (decode_uses_rd),
|
||||
.decode_rd_addr (decode_rd_addr),
|
||||
.rf_issue (rf_issue),
|
||||
.commit (commit_packet)
|
||||
.commit (wb_packet),
|
||||
.wb_phys_addr (wb_phys_addr)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -453,25 +397,36 @@ module cva5
|
|||
branch_unit #(.CONFIG(CONFIG))
|
||||
branch_unit_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.issue (unit_issue[UNIT_IDS.BR]),
|
||||
.branch_inputs (branch_inputs),
|
||||
.rst (rst),
|
||||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[BR_ID]),
|
||||
.uses_rs (unit_uses_rs[BR_ID]),
|
||||
.uses_rd (unit_uses_rd[BR_ID]),
|
||||
.rf (rf_issue.data),
|
||||
.constant_alu (constant_alu),
|
||||
.issue (unit_issue[BR_ID]),
|
||||
.br_results (br_results),
|
||||
.branch_flush (branch_flush),
|
||||
.exception (exception[BR_EXCEPTION]),
|
||||
.tr_branch_correct (tr_branch_correct),
|
||||
.tr_branch_misspredict (tr_branch_misspredict),
|
||||
.tr_return_correct (tr_return_correct),
|
||||
.tr_return_misspredict (tr_return_misspredict)
|
||||
.exception (exception[BR_EXCEPTION])
|
||||
);
|
||||
|
||||
|
||||
alu_unit alu_unit_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.alu_inputs (alu_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.ALU]),
|
||||
.wb (unit_wb[UNIT_IDS.ALU])
|
||||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[ALU_ID]),
|
||||
.uses_rs (unit_uses_rs[ALU_ID]),
|
||||
.uses_rd (unit_uses_rd[ALU_ID]),
|
||||
.rf (rf_issue.data),
|
||||
.constant_alu (constant_alu),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.issue (unit_issue[ALU_ID]),
|
||||
.wb (unit_wb[ALU_ID])
|
||||
);
|
||||
|
||||
load_store_unit #(.CONFIG(CONFIG))
|
||||
|
@ -479,8 +434,25 @@ module cva5
|
|||
.clk (clk),
|
||||
.rst (rst),
|
||||
.gc (gc),
|
||||
.ls_inputs (ls_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.LS]),
|
||||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[LS_ID]),
|
||||
.uses_rs (unit_uses_rs[LS_ID]),
|
||||
.fp_uses_rs (fp_unit_uses_rs[0]),
|
||||
.uses_rd (unit_uses_rd[LS_ID]),
|
||||
.fp_uses_rd (fp_unit_uses_rd[0]),
|
||||
.decode_is_store (decode_is_store),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.fp_instruction_issued_with_rd (fp_instruction_issued_with_rd),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.issue_rd_wb_group (issue_rd_wb_group),
|
||||
.fp_issue_rd_wb_group (fp_issue_rd_wb_group),
|
||||
.rs2_inuse (rf_issue.inuse[RS2]),
|
||||
.fp_rs2_inuse (fp_rf_issue.inuse[RS2]),
|
||||
.rf (rf_issue.data),
|
||||
.fp_rf (fp_rf_issue.data),
|
||||
.issue (unit_issue[LS_ID]),
|
||||
.dcache_on (1'b1),
|
||||
.clear_reservation (1'b0),
|
||||
.tlb (dtlb),
|
||||
|
@ -493,13 +465,13 @@ module cva5
|
|||
.m_avalon (m_avalon),
|
||||
.dwishbone (dwishbone),
|
||||
.data_bram (data_bram),
|
||||
.wb_snoop (wb_snoop),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_port_valid(retire_port_valid),
|
||||
.wb_packet (wb_packet),
|
||||
.fp_wb_packet (fp_wb_packet),
|
||||
.store_retire (store_retire),
|
||||
.exception (exception[LS_EXCEPTION]),
|
||||
.load_store_status(load_store_status),
|
||||
.wb (unit_wb[UNIT_IDS.LS]),
|
||||
.tr_load_conflict_delay (tr_load_conflict_delay)
|
||||
.wb (unit_wb[LS_ID]),
|
||||
.fp_wb (fp_unit_wb[0])
|
||||
);
|
||||
|
||||
generate if (CONFIG.INCLUDE_S_MODE) begin : gen_dtlb_dmmu
|
||||
|
@ -530,15 +502,24 @@ module cva5
|
|||
end
|
||||
endgenerate
|
||||
|
||||
generate if (CONFIG.INCLUDE_CSRS) begin : gen_csrs
|
||||
generate if (CONFIG.INCLUDE_UNIT.CSR) begin : gen_csrs
|
||||
csr_unit # (.CONFIG(CONFIG))
|
||||
csr_unit_block (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.csr_inputs (csr_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.CSR]),
|
||||
.wb (unit_wb[UNIT_IDS.CSR]),
|
||||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.unit_needed (unit_needed[CSR_ID]),
|
||||
.uses_rs (unit_uses_rs[CSR_ID]),
|
||||
.uses_rd (unit_uses_rd[CSR_ID]),
|
||||
.rf (rf_issue.data),
|
||||
.issue (unit_issue[CSR_ID]),
|
||||
.wb (unit_wb[CSR_ID]),
|
||||
.current_privilege(current_privilege),
|
||||
.fflag_wmask (fflag_wmask),
|
||||
.dyn_rm (dyn_rm),
|
||||
.interrupt_taken(interrupt_taken),
|
||||
.interrupt_pending(interrupt_pending),
|
||||
.processing_csr(processing_csr),
|
||||
|
@ -551,8 +532,8 @@ module cva5
|
|||
.mret(mret),
|
||||
.sret(sret),
|
||||
.epc(epc),
|
||||
.retire(retire),
|
||||
.retire_ids(retire_ids),
|
||||
.retire_count (retire_count),
|
||||
.s_interrupt(s_interrupt),
|
||||
.m_interrupt(m_interrupt)
|
||||
);
|
||||
|
@ -562,8 +543,15 @@ module cva5
|
|||
gc_unit_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.issue (unit_issue[UNIT_IDS.IEC]),
|
||||
.gc_inputs (gc_inputs),
|
||||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[IEC_ID]),
|
||||
.uses_rs (unit_uses_rs[IEC_ID]),
|
||||
.uses_rd (unit_uses_rd[IEC_ID]),
|
||||
.constant_alu (constant_alu),
|
||||
.rf (rf_issue.data),
|
||||
.issue (unit_issue[IEC_ID]),
|
||||
.branch_flush (branch_flush),
|
||||
.exception (exception),
|
||||
.exception_target_pc (exception_target_pc),
|
||||
|
@ -573,8 +561,6 @@ module cva5
|
|||
.mret(mret),
|
||||
.sret(sret),
|
||||
.epc(epc),
|
||||
.retire (retire),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_ids_next (retire_ids_next),
|
||||
.interrupt_taken(interrupt_taken),
|
||||
.interrupt_pending(interrupt_pending),
|
||||
|
@ -583,43 +569,129 @@ module cva5
|
|||
.post_issue_count (post_issue_count)
|
||||
);
|
||||
|
||||
generate if (CONFIG.INCLUDE_MUL) begin : gen_mul
|
||||
generate if (CONFIG.INCLUDE_UNIT.MUL) begin : gen_mul
|
||||
mul_unit mul_unit_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.mul_inputs (mul_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.MUL]),
|
||||
.wb (unit_wb[UNIT_IDS.MUL])
|
||||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[MUL_ID]),
|
||||
.uses_rs (unit_uses_rs[MUL_ID]),
|
||||
.uses_rd (unit_uses_rd[MUL_ID]),
|
||||
.rf (rf_issue.data),
|
||||
.issue (unit_issue[MUL_ID]),
|
||||
.wb (unit_wb[MUL_ID])
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
generate if (CONFIG.INCLUDE_DIV) begin : gen_div
|
||||
generate if (CONFIG.INCLUDE_UNIT.DIV) begin : gen_div
|
||||
div_unit div_unit_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.div_inputs (div_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.DIV]),
|
||||
.wb (unit_wb[UNIT_IDS.DIV])
|
||||
.gc (gc),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.unit_needed (unit_needed[DIV_ID]),
|
||||
.uses_rs (unit_uses_rs[DIV_ID]),
|
||||
.uses_rd (unit_uses_rd[DIV_ID]),
|
||||
.rf (rf_issue.data),
|
||||
.issue (unit_issue[DIV_ID]),
|
||||
.wb (unit_wb[DIV_ID])
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
|
||||
generate if (CONFIG.INCLUDE_UNIT.CUSTOM) begin : gen_custom
|
||||
custom_unit custom_unit_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.decode_stage (decode),
|
||||
.unit_needed (unit_needed[CUSTOM_ID]),
|
||||
.uses_rs (unit_uses_rs[CUSTOM_ID]),
|
||||
.uses_rd (unit_uses_rd[CUSTOM_ID]),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.rf (rf_issue.data),
|
||||
.issue (unit_issue[CUSTOM_ID]),
|
||||
.wb (unit_wb[CUSTOM_ID])
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Writeback
|
||||
//First writeback port: ALU
|
||||
//Second writeback port: LS, CSR, [MUL], [DIV]
|
||||
localparam int unsigned NUM_UNITS_PER_PORT [CONFIG.NUM_WB_GROUPS] = '{NUM_WB_UNITS_GROUP_1, NUM_WB_UNITS_GROUP_2};
|
||||
writeback #(
|
||||
.CONFIG (CONFIG),
|
||||
.NUM_UNITS (NUM_UNITS_PER_PORT),
|
||||
.NUM_WB_UNITS (NUM_WB_UNITS)
|
||||
)
|
||||
writeback_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.wb_packet (wb_packet),
|
||||
.unit_wb (unit_wb),
|
||||
.wb_snoop (wb_snoop)
|
||||
);
|
||||
generate for (genvar i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin : gen_wb
|
||||
writeback #(
|
||||
.NUM_WB_UNITS (get_num_wb_units(CONFIG.WB_GROUP[i])),
|
||||
.WB_INDEX (CONFIG.WB_GROUP[i])
|
||||
)
|
||||
writeback_block (
|
||||
.wb_packet (wb_packet[i]),
|
||||
.unit_wb (unit_wb)
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FPU
|
||||
generate if (CONFIG.INCLUDE_UNIT.FPU) begin : gen_fpu
|
||||
|
||||
fp_writeback fp_writeback_block (
|
||||
.unit_wb (fp_unit_wb),
|
||||
.wb_packet (fp_wb_packet)
|
||||
);
|
||||
|
||||
fpu_top #(.CONFIG(CONFIG))
|
||||
fpu_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.decode_stage (decode),
|
||||
.unit_needed (unit_needed[FPU_ID]),
|
||||
.uses_rs (unit_uses_rs[FPU_ID]),
|
||||
.fp_uses_rs (fp_unit_uses_rs[1]),
|
||||
.uses_rd (unit_uses_rd[FPU_ID]),
|
||||
.fp_uses_rd (fp_unit_uses_rd[1]),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.dyn_rm (dyn_rm),
|
||||
.int_rf (rf_issue.data),
|
||||
.fp_rf (fp_rf_issue.data),
|
||||
.issue (unit_issue[FPU_ID]),
|
||||
.int_wb (unit_wb[FPU_ID]),
|
||||
.fp_wb (fp_unit_wb[1]),
|
||||
.fflags (fflag_wmask)
|
||||
);
|
||||
|
||||
register_file #(.NUM_WB_GROUPS(2), .READ_PORTS(3), .USE_ZERO(1), .PORT_ZERO_ABSENT(1), .WB_PACKET_TYPE(fp_wb_packet_t))
|
||||
fp_register_file_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.gc (gc),
|
||||
.decode_phys_rs_addr (fp_decode_phys_rs_addr),
|
||||
.decode_phys_rd_addr (fp_decode_phys_rd_addr),
|
||||
.decode_rs_wb_group (fp_decode_rs_wb_group),
|
||||
.decode_advance (decode_advance),
|
||||
.decode_uses_rd (fp_decode_uses_rd),
|
||||
.decode_rd_addr ('x),
|
||||
.rf_issue (fp_rf_issue),
|
||||
.commit (fp_wb_packet),
|
||||
.wb_phys_addr (fp_wb_phys_addr)
|
||||
);
|
||||
|
||||
renamer #(.NUM_WB_GROUPS(2), .READ_PORTS(3), .RENAME_ZERO(1))
|
||||
fp_renamer_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.gc (gc),
|
||||
.decode_advance (decode_advance),
|
||||
.decode (fp_decode_rename_interface),
|
||||
.issue (issue),
|
||||
.instruction_issued_with_rd (fp_instruction_issued_with_rd),
|
||||
.wb_retire (fp_wb_retire)
|
||||
);
|
||||
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
|
@ -635,40 +707,5 @@ module cva5
|
|||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Trace Interface
|
||||
generate if (ENABLE_TRACE_INTERFACE) begin : gen_cva5_trace
|
||||
always_ff @(posedge clk) begin
|
||||
tr.events.early_branch_correction <= tr_early_branch_correction;
|
||||
tr.events.operand_stall <= tr_operand_stall;
|
||||
tr.events.unit_stall <= tr_unit_stall;
|
||||
tr.events.no_id_stall <= tr_no_id_stall;
|
||||
tr.events.no_instruction_stall <= tr_no_instruction_stall;
|
||||
tr.events.other_stall <= tr_other_stall;
|
||||
tr.events.instruction_issued_dec <= tr_instruction_issued_dec;
|
||||
tr.events.branch_operand_stall <= tr_branch_operand_stall;
|
||||
tr.events.alu_operand_stall <= tr_alu_operand_stall;
|
||||
tr.events.ls_operand_stall <= tr_ls_operand_stall;
|
||||
tr.events.div_operand_stall <= tr_div_operand_stall;
|
||||
tr.events.alu_op <= tr_alu_op;
|
||||
tr.events.branch_or_jump_op <= tr_branch_or_jump_op;
|
||||
tr.events.load_op <= tr_load_op;
|
||||
tr.events.store_op <= tr_store_op;
|
||||
tr.events.mul_op <= tr_mul_op;
|
||||
tr.events.div_op <= tr_div_op;
|
||||
tr.events.misc_op <= tr_misc_op;
|
||||
tr.events.branch_correct <= tr_branch_correct;
|
||||
tr.events.branch_misspredict <= tr_branch_misspredict;
|
||||
tr.events.return_correct <= tr_return_correct;
|
||||
tr.events.return_misspredict <= tr_return_misspredict;
|
||||
tr.events.load_conflict_delay <= tr_load_conflict_delay;
|
||||
tr.events.rs1_forwarding_needed <= tr_rs1_forwarding_needed;
|
||||
tr.events.rs2_forwarding_needed <= tr_rs2_forwarding_needed;
|
||||
tr.events.rs1_and_rs2_forwarding_needed <= tr_rs1_and_rs2_forwarding_needed;
|
||||
tr.instruction_pc_dec <= tr_instruction_pc_dec;
|
||||
tr.instruction_data_dec <= tr_instruction_data_dec;
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
|
345
core/dcache.sv
345
core/dcache.sv
|
@ -1,345 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module dcache
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input logic dcache_on,
|
||||
l1_arbiter_request_interface.master l1_request,
|
||||
l1_arbiter_return_interface.master l1_response,
|
||||
input logic sc_complete,
|
||||
input logic sc_success,
|
||||
input logic clear_reservation,
|
||||
input amo_details_t amo,
|
||||
input logic uncacheable,
|
||||
memory_sub_unit_interface.responder ls
|
||||
);
|
||||
|
||||
localparam DCACHE_SIZE_IN_WORDS = CONFIG.DCACHE.LINES*CONFIG.DCACHE.LINE_W*CONFIG.DCACHE.WAYS;
|
||||
localparam derived_cache_config_t SCONFIG = get_derived_cache_params(CONFIG, CONFIG.DCACHE, CONFIG.DCACHE_ADDR);
|
||||
localparam LOG2_DCACHE_WAYS = (CONFIG.DCACHE.WAYS == 1) ? 1 : $clog2(CONFIG.DCACHE.WAYS);
|
||||
|
||||
typedef struct packed{
|
||||
logic [29:0] addr;
|
||||
logic [3:0] be;
|
||||
logic load;
|
||||
logic store;
|
||||
logic [31:0] data;
|
||||
amo_details_t amo;
|
||||
logic uncacheable;
|
||||
} stage2_t;
|
||||
|
||||
logic [$clog2(DCACHE_SIZE_IN_WORDS)-1:0] data_bank_addr_a;
|
||||
logic [$clog2(DCACHE_SIZE_IN_WORDS)-1:0] data_bank_addr_b;
|
||||
|
||||
logic tag_hit;
|
||||
logic [CONFIG.DCACHE.WAYS-1:0] tag_hit_way;
|
||||
|
||||
logic [LOG2_DCACHE_WAYS-1:0] tag_hit_way_int;
|
||||
|
||||
logic tag_update;
|
||||
logic [CONFIG.DCACHE.WAYS-1:0] tag_update_way;
|
||||
logic [CONFIG.DCACHE.WAYS-1:0] replacement_way;
|
||||
|
||||
logic [LOG2_DCACHE_WAYS-1:0] replacement_way_int;
|
||||
logic [LOG2_DCACHE_WAYS-1:0] tag_update_way_int;
|
||||
|
||||
logic [SCONFIG.SUB_LINE_ADDR_W-1:0] word_count;
|
||||
logic [SCONFIG.SUB_LINE_ADDR_W-1:0] sc_write_index;
|
||||
logic [SCONFIG.SUB_LINE_ADDR_W-1:0] update_word_index;
|
||||
|
||||
logic line_complete;
|
||||
logic reservation;
|
||||
|
||||
stage2_t stage2;
|
||||
|
||||
logic [31:0] dbank_data_out;
|
||||
logic [31:0] hit_data;
|
||||
logic [31:0] miss_data;
|
||||
logic [31:0] new_line_data;
|
||||
logic [31:0] amo_result;
|
||||
logic [31:0] amo_rs2;
|
||||
|
||||
logic[3:0] write_hit_be;
|
||||
|
||||
logic second_cycle;
|
||||
|
||||
logic new_arb_request;
|
||||
logic arb_request_r;
|
||||
|
||||
logic is_target_word;
|
||||
|
||||
logic hit_allowed;
|
||||
logic read_hit_allowed;
|
||||
logic read_hit_data_valid;
|
||||
logic read_hit;
|
||||
|
||||
logic address_range_valid;
|
||||
|
||||
logic idle;
|
||||
logic read_miss_complete;
|
||||
|
||||
logic store_complete;
|
||||
amo_alu_inputs_t amo_alu_inputs;
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//2nd Cycle Control Signals
|
||||
always_ff @ (posedge clk) begin
|
||||
if (ls.new_request) begin
|
||||
stage2.addr <= ls.addr[31:2];
|
||||
stage2.be <= ls.be;
|
||||
stage2.load <= ls.re;
|
||||
stage2.store <= ls.we;
|
||||
stage2.data <= ls.data_in;
|
||||
stage2.amo <= amo;
|
||||
stage2.uncacheable <= uncacheable;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//General Control Logic
|
||||
//LR and AMO ops are forced misses (if there is a tag hit they will reuse the same way)
|
||||
//Signal is valid for a single cycle, RAM enables are used to hold outputs in case of pipeline stalls
|
||||
always_ff @ (posedge clk) begin
|
||||
read_hit_allowed <= ls.new_request & ls.re & dcache_on & ~(amo.is_lr | amo.is_amo) & ~uncacheable;
|
||||
read_hit_data_valid <= read_hit_allowed;
|
||||
second_cycle <= ls.new_request;
|
||||
tag_update <= second_cycle & dcache_on & stage2.load & ~tag_hit & ~stage2.uncacheable;//Cache enabled, read miss
|
||||
end
|
||||
|
||||
assign read_hit = tag_hit & read_hit_allowed;
|
||||
|
||||
//LR reservation, cleared on exceptions
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
reservation <= 0;
|
||||
else if (second_cycle)
|
||||
reservation <= stage2.amo.is_lr;
|
||||
else if (sc_complete | clear_reservation)
|
||||
reservation <= 0;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//L1 Arbiter Interface
|
||||
assign l1_request.addr = {stage2.addr, 2'b0} ;//Memory interface aligns request to burst size (done there to support AMO line-read word-write)
|
||||
assign l1_request.data = stage2.data;
|
||||
assign l1_request.rnw = ~stage2.store;
|
||||
assign l1_request.be = stage2.be;
|
||||
assign l1_request.size = (stage2.load & ~stage2.uncacheable) ? 5'(CONFIG.DCACHE.LINE_W-1) : 0;//LR and AMO ops are included in load
|
||||
assign l1_request.is_amo = (stage2.amo.is_amo | stage2.amo.is_lr | stage2.amo.is_sc);
|
||||
assign l1_request.amo = stage2.amo.op;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst | line_complete)
|
||||
word_count <= 0;
|
||||
else if (l1_response.data_valid)
|
||||
word_count <= word_count + 1;
|
||||
end
|
||||
assign is_target_word = (stage2.addr[SCONFIG.SUB_LINE_ADDR_W-1:0] == word_count) | stage2.uncacheable;
|
||||
|
||||
assign new_arb_request = second_cycle & (~read_hit);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
arb_request_r <= 0;
|
||||
else if (second_cycle & ~l1_request.ack)
|
||||
arb_request_r <= new_arb_request;
|
||||
else if (l1_request.ack)
|
||||
arb_request_r <= 0;
|
||||
end
|
||||
assign l1_request.request = new_arb_request | arb_request_r;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Replacement policy (free runing one-hot cycler, i.e. pseudo random)
|
||||
cycler #(CONFIG.DCACHE.WAYS) replacement_policy (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.en (1'b1),
|
||||
.one_hot (replacement_way)
|
||||
);
|
||||
|
||||
//One-hot tag hit / update logic to binary int
|
||||
one_hot_to_integer #(CONFIG.DCACHE.WAYS)
|
||||
hit_way_conv (
|
||||
.one_hot(tag_hit_way),
|
||||
.int_out(tag_hit_way_int)
|
||||
);
|
||||
one_hot_to_integer #(CONFIG.DCACHE.WAYS)
|
||||
update_way_conv (
|
||||
.one_hot (replacement_way),
|
||||
.int_out (replacement_way_int)
|
||||
);
|
||||
|
||||
|
||||
//If atomic load (LR or AMO op) and there's a tag hit reuse same line
|
||||
logic stage2_amo_with_load;
|
||||
assign stage2_amo_with_load = stage2.amo.is_amo | stage2.amo.is_lr;
|
||||
always_ff @ (posedge clk) begin
|
||||
if (second_cycle) begin
|
||||
tag_update_way<= (stage2_amo_with_load & tag_hit) ? tag_hit_way : replacement_way;
|
||||
tag_update_way_int <= (stage2_amo_with_load & tag_hit) ? tag_hit_way_int : replacement_way_int;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Tag banks
|
||||
dtag_banks #(.CONFIG(CONFIG), .SCONFIG(SCONFIG))
|
||||
dcache_tag_banks (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.stage1_addr (ls.addr[31:2]),
|
||||
.stage2_addr (stage2.addr),
|
||||
.inv_addr (l1_response.inv_addr),
|
||||
.update_way (tag_update_way),
|
||||
.update (tag_update),
|
||||
.stage1_adv (ls.new_request),
|
||||
.stage1_inv (1'b0),//For software invalidation
|
||||
.extern_inv (l1_response.inv_valid),
|
||||
.extern_inv_complete (l1_response.inv_ack),
|
||||
.tag_hit (tag_hit),
|
||||
.tag_hit_way (tag_hit_way)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//AMO logic
|
||||
always_ff @ (posedge clk) begin
|
||||
amo_rs2 <= stage2.data;
|
||||
end
|
||||
|
||||
assign amo_alu_inputs.rs1_load = l1_response.data;
|
||||
assign amo_alu_inputs.rs2 = amo_rs2;
|
||||
assign amo_alu_inputs.op = stage2.amo.op;
|
||||
|
||||
generate if (CONFIG.INCLUDE_AMO)
|
||||
amo_alu amo_unit (
|
||||
.amo_alu_inputs (amo_alu_inputs),
|
||||
.result (amo_result)
|
||||
);
|
||||
endgenerate
|
||||
|
||||
always_comb begin
|
||||
if (stage2.amo.is_amo & is_target_word)
|
||||
new_line_data = amo_result;
|
||||
else if (stage2.amo.is_sc)
|
||||
new_line_data = stage2.data;
|
||||
else
|
||||
new_line_data = l1_response.data;
|
||||
end
|
||||
|
||||
assign sc_write_index = stage2.addr[SCONFIG.SUB_LINE_ADDR_W-1:0];
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Data Bank(s)
|
||||
//Tag bank selection done with upper address bits
|
||||
//On miss, word index in line provided by: update_word_index
|
||||
assign write_hit_be = stage2.be & {4{tag_hit}};
|
||||
assign update_word_index = stage2.amo.is_sc ? sc_write_index : word_count;
|
||||
|
||||
generate if (CONFIG.DCACHE.WAYS == 1) begin : bank_sel_gen
|
||||
assign data_bank_addr_a = stage2.addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:0];
|
||||
assign data_bank_addr_b = {stage2.addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:SCONFIG.SUB_LINE_ADDR_W], update_word_index};
|
||||
end else begin
|
||||
assign data_bank_addr_a = {tag_hit_way_int, stage2.addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:0]};
|
||||
assign data_bank_addr_b = {tag_update_way_int, stage2.addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:SCONFIG.SUB_LINE_ADDR_W], update_word_index};
|
||||
end endgenerate
|
||||
|
||||
ddata_bank #(.LINES(DCACHE_SIZE_IN_WORDS)) data_bank (
|
||||
.clk(clk),
|
||||
.addr_a(data_bank_addr_a),
|
||||
.addr_b(data_bank_addr_b),
|
||||
.en_a(second_cycle),
|
||||
.en_b((l1_response.data_valid & ~stage2.uncacheable) | (sc_complete & sc_success)),
|
||||
.be_a(write_hit_be),
|
||||
.data_in_a(stage2.data),
|
||||
.data_in_b(new_line_data),
|
||||
.data_out_a(dbank_data_out)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
always_ff @ (posedge clk) begin
|
||||
if (l1_response.data_valid & is_target_word)
|
||||
miss_data <= l1_response.data;
|
||||
else if (sc_complete)
|
||||
miss_data <= {31'b0, sc_success};
|
||||
end
|
||||
|
||||
assign ls.data_out = read_hit_data_valid ? dbank_data_out : miss_data;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Pipeline Advancement
|
||||
assign line_complete = l1_response.data_valid & ((word_count == $clog2(CONFIG.DCACHE.LINE_W)'(CONFIG.DCACHE.LINE_W-1)) | stage2.uncacheable); //covers load, LR, AMO
|
||||
assign store_complete = l1_request.ack & stage2.store & ~stage2.amo.is_sc;
|
||||
|
||||
//read miss complete includes store conditional complete
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
read_miss_complete <= 0;
|
||||
else
|
||||
read_miss_complete <= line_complete | sc_complete;
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
ls.data_valid <= 0;
|
||||
else
|
||||
ls.data_valid <= (l1_response.data_valid & is_target_word) | read_hit | sc_complete;
|
||||
end
|
||||
|
||||
assign ls.ready = read_hit | store_complete | read_miss_complete | idle;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
idle <= 1;
|
||||
else if (ls.new_request)
|
||||
idle <= 0;
|
||||
else if (ls.ready)
|
||||
idle <= 1;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
dcache_request_when_not_ready_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) ls.new_request |-> ls.ready)
|
||||
else $error("dcache received request when not ready");
|
||||
|
||||
endmodule
|
|
@ -1,47 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module ddata_bank
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
|
||||
#(
|
||||
parameter LINES = 2048
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic[$clog2(LINES)-1:0] addr_a,
|
||||
input logic en_a,
|
||||
input logic[3:0] be_a,
|
||||
input logic[31:0] data_in_a,
|
||||
output logic[31:0] data_out_a,
|
||||
|
||||
//write only port
|
||||
input logic[$clog2(LINES)-1:0] addr_b,
|
||||
input logic en_b,
|
||||
input logic[31:0] data_in_b
|
||||
);
|
||||
|
||||
byte_en_BRAM #(LINES, "", 0) ram_block (.*, .be_b({4{en_b}}), .data_out_b());
|
||||
|
||||
endmodule
|
|
@ -26,11 +26,10 @@ module decode_and_issue
|
|||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import csr_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG,
|
||||
parameter NUM_UNITS = 7,
|
||||
parameter unit_id_param_t UNIT_IDS = EXAMPLE_UNIT_IDS
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
|
||||
(
|
||||
|
@ -45,161 +44,148 @@ module decode_and_issue
|
|||
|
||||
//Renamer
|
||||
renamer_interface.decode renamer,
|
||||
renamer_interface.decode fp_renamer,
|
||||
|
||||
input logic [MAX_NUM_UNITS-1:0] unit_needed,
|
||||
input logic [MAX_NUM_UNITS-1:0][REGFILE_READ_PORTS-1:0] unit_uses_rs,
|
||||
input logic [1:0][2:0] fp_unit_uses_rs,
|
||||
input logic [MAX_NUM_UNITS-1:0] unit_uses_rd,
|
||||
input logic [1:0] fp_unit_uses_rd,
|
||||
|
||||
output logic decode_uses_rd,
|
||||
output logic fp_decode_uses_rd,
|
||||
output rs_addr_t decode_rd_addr,
|
||||
output phys_addr_t decode_phys_rd_addr,
|
||||
output phys_addr_t fp_decode_phys_rd_addr,
|
||||
output phys_addr_t decode_phys_rs_addr [REGFILE_READ_PORTS],
|
||||
output phys_addr_t fp_decode_phys_rs_addr [3],
|
||||
output logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] decode_rs_wb_group [REGFILE_READ_PORTS],
|
||||
output logic fp_decode_rs_wb_group [3],
|
||||
|
||||
output logic instruction_issued,
|
||||
output logic instruction_issued_with_rd,
|
||||
output logic fp_instruction_issued_with_rd,
|
||||
output issue_packet_t issue,
|
||||
output rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
output phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS],
|
||||
output phys_addr_t fp_issue_phys_rs_addr [3],
|
||||
output logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group,
|
||||
output logic fp_issue_rd_wb_group,
|
||||
output logic issue_stage_ready,
|
||||
|
||||
//Register File
|
||||
register_file_issue_interface.issue rf,
|
||||
register_file_issue_interface.issue fp_rf,
|
||||
|
||||
output alu_inputs_t alu_inputs,
|
||||
output load_store_inputs_t ls_inputs,
|
||||
output branch_inputs_t branch_inputs,
|
||||
output gc_inputs_t gc_inputs,
|
||||
output csr_inputs_t csr_inputs,
|
||||
output mul_inputs_t mul_inputs,
|
||||
output div_inputs_t div_inputs,
|
||||
output logic [31:0] constant_alu,
|
||||
|
||||
unit_issue_interface.decode unit_issue [NUM_UNITS-1:0],
|
||||
unit_issue_interface.decode unit_issue [MAX_NUM_UNITS-1:0],
|
||||
|
||||
input gc_outputs_t gc,
|
||||
input logic [1:0] current_privilege,
|
||||
|
||||
exception_interface.unit exception,
|
||||
exception_interface.unit exception
|
||||
);
|
||||
|
||||
//Trace signals
|
||||
output logic tr_operand_stall,
|
||||
output logic tr_unit_stall,
|
||||
output logic tr_no_id_stall,
|
||||
output logic tr_no_instruction_stall,
|
||||
output logic tr_other_stall,
|
||||
output logic tr_branch_operand_stall,
|
||||
output logic tr_alu_operand_stall,
|
||||
output logic tr_ls_operand_stall,
|
||||
output logic tr_div_operand_stall,
|
||||
|
||||
output logic tr_alu_op,
|
||||
output logic tr_branch_or_jump_op,
|
||||
output logic tr_load_op,
|
||||
output logic tr_store_op,
|
||||
output logic tr_mul_op,
|
||||
output logic tr_div_op,
|
||||
output logic tr_misc_op,
|
||||
common_instruction_t decode_instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
output logic tr_instruction_issued_dec,
|
||||
output logic [31:0] tr_instruction_pc_dec,
|
||||
output logic [31:0] tr_instruction_data_dec
|
||||
);
|
||||
logic decode_uses_rs [REGFILE_READ_PORTS];
|
||||
logic fp_decode_uses_rs [3];
|
||||
|
||||
logic [2:0] fn3;
|
||||
logic [6:0] opcode;
|
||||
logic [4:0] opcode_trim;
|
||||
rs_addr_t decode_rs_addr [REGFILE_READ_PORTS];
|
||||
rs_addr_t fp_decode_rs_addr [3];
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] decode_wb_group;
|
||||
logic fp_decode_wb_group;
|
||||
|
||||
logic uses_rs [REGFILE_READ_PORTS];
|
||||
logic uses_rd;
|
||||
logic issue_hold;
|
||||
logic [REGFILE_READ_PORTS-1:0] operand_ready;
|
||||
logic [2:0] fp_operand_ready;
|
||||
logic [MAX_NUM_UNITS-1:0] unit_needed_issue_stage;
|
||||
logic [MAX_NUM_UNITS-1:0] issue_to;
|
||||
|
||||
rs_addr_t rs_addr [REGFILE_READ_PORTS];
|
||||
rs_addr_t rd_addr;
|
||||
|
||||
logic is_csr;
|
||||
logic is_fence;
|
||||
logic is_ifence;
|
||||
logic csr_imm_op;
|
||||
logic environment_op;
|
||||
|
||||
logic issue_valid;
|
||||
logic operands_ready;
|
||||
logic mult_div_op;
|
||||
|
||||
logic [NUM_UNITS-1:0] unit_needed;
|
||||
logic [NUM_UNITS-1:0] unit_needed_issue_stage;
|
||||
logic [NUM_UNITS-1:0] unit_ready;
|
||||
logic [NUM_UNITS-1:0] issue_ready;
|
||||
logic [NUM_UNITS-1:0] issue_to;
|
||||
|
||||
rs_addr_t issue_rs_addr [REGFILE_READ_PORTS];
|
||||
phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS];
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rs_wb_group [REGFILE_READ_PORTS];
|
||||
logic fp_issue_rs_wb_group [3];
|
||||
logic issue_uses_rs [REGFILE_READ_PORTS];
|
||||
logic fp_issue_uses_rs [3];
|
||||
|
||||
logic pre_issue_exception_pending;
|
||||
logic illegal_instruction_pattern;
|
||||
|
||||
logic issue_stage_ready;
|
||||
|
||||
logic [REGFILE_READ_PORTS-1:0] rs_conflict;
|
||||
|
||||
genvar i;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
|
||||
//Can move data into issue stage if:
|
||||
// there is no instruction currently in the issue stage, or
|
||||
// an instruction could issue (issue_flush, issue_hold and whether the instruction is valid are not needed in this check)
|
||||
assign issue_stage_ready = ((~issue.stage_valid) | (issue_valid & |issue_ready)) & ~gc.issue_hold;
|
||||
// an instruction could issue (ignoring gc.fetch_flush)
|
||||
assign issue_stage_ready = (~issue.stage_valid) | (|issue_to);
|
||||
assign decode_advance = decode.valid & issue_stage_ready;
|
||||
|
||||
//Instruction aliases
|
||||
assign opcode = decode.instruction[6:0];
|
||||
assign opcode_trim = opcode[6:2];
|
||||
assign fn3 = decode.instruction[14:12];
|
||||
assign rs_addr[RS1] = decode.instruction[19:15];
|
||||
assign rs_addr[RS2] = decode.instruction[24:20];
|
||||
assign rd_addr = decode.instruction[11:7];
|
||||
|
||||
assign is_csr = CONFIG.INCLUDE_CSRS & (opcode_trim == SYSTEM_T) & (fn3 != 0);
|
||||
assign is_fence = (opcode_trim == FENCE_T) & ~fn3[0];
|
||||
assign is_ifence = CONFIG.INCLUDE_IFENCE & (opcode_trim == FENCE_T) & fn3[0];
|
||||
assign csr_imm_op = (opcode_trim == SYSTEM_T) & fn3[2];
|
||||
assign environment_op = (opcode_trim == SYSTEM_T) & (fn3 == 0);
|
||||
|
||||
assign decode_instruction = decode.instruction;
|
||||
always_comb begin
|
||||
decode_rs_addr = '{default: '0};
|
||||
decode_rs_addr[RS1] = decode_instruction.rs1_addr;
|
||||
decode_rs_addr[RS2] = decode_instruction.rs2_addr;
|
||||
fp_decode_rs_addr = '{default: '0};
|
||||
fp_decode_rs_addr[RS1] = decode_instruction.rs1_addr;
|
||||
fp_decode_rs_addr[RS2] = decode_instruction.rs2_addr;
|
||||
fp_decode_rs_addr[RS3] = decode_instruction.fn7[6:2];
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Register File Support
|
||||
assign uses_rs[RS1] = opcode_trim inside {JALR_T, BRANCH_T, LOAD_T, STORE_T, ARITH_IMM_T, ARITH_T, AMO_T} | is_csr;
|
||||
assign uses_rs[RS2] = opcode_trim inside {BRANCH_T, ARITH_T, AMO_T};//Stores are exempted due to store forwarding
|
||||
assign uses_rd = opcode_trim inside {LUI_T, AUIPC_T, JAL_T, JALR_T, LOAD_T, ARITH_IMM_T, ARITH_T} | is_csr;
|
||||
always_comb begin
|
||||
decode_uses_rd = |unit_uses_rd;
|
||||
fp_decode_uses_rd = |fp_unit_uses_rd;
|
||||
decode_uses_rs = '{default: 0};
|
||||
for (int i = 0; i < MAX_NUM_UNITS; i++)
|
||||
for (int j = 0; j < REGFILE_READ_PORTS; j++)
|
||||
decode_uses_rs[j] |= unit_uses_rs[i][j];
|
||||
fp_decode_uses_rs = '{default: 0};
|
||||
for (int i = 0; i < 2; i++)
|
||||
for (int j = 0; j < 3; j++)
|
||||
fp_decode_uses_rs[j] |= fp_unit_uses_rs[i][j];
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit Determination
|
||||
assign unit_needed[UNIT_IDS.BR] = opcode_trim inside {BRANCH_T, JAL_T, JALR_T};
|
||||
assign unit_needed[UNIT_IDS.ALU] = (opcode_trim inside {ARITH_T, ARITH_IMM_T, AUIPC_T, LUI_T, JAL_T, JALR_T}) & ~mult_div_op;
|
||||
assign unit_needed[UNIT_IDS.LS] = opcode_trim inside {LOAD_T, STORE_T, AMO_T} | is_fence;
|
||||
generate if (CONFIG.INCLUDE_CSRS)
|
||||
assign unit_needed[UNIT_IDS.CSR] = is_csr;
|
||||
endgenerate
|
||||
assign unit_needed[UNIT_IDS.IEC] = (opcode_trim inside {SYSTEM_T} & ~is_csr & CONFIG.INCLUDE_M_MODE) | is_ifence;
|
||||
//WB Group Determination
|
||||
localparam units_t [MAX_NUM_UNITS-1:0] WB_UNITS_TYPE_REP = get_wb_units_type_representation(CONFIG.WB_GROUP);
|
||||
logic [CONFIG.NUM_WB_GROUPS-1:0] uses_wb_group;
|
||||
|
||||
always_comb begin
|
||||
for (int i = 0; i < CONFIG.NUM_WB_GROUPS; i++)
|
||||
uses_wb_group[i] = |(unit_needed & WB_UNITS_TYPE_REP[i]);
|
||||
end
|
||||
|
||||
assign mult_div_op = (opcode_trim == ARITH_T) && decode.instruction[25];
|
||||
generate if (CONFIG.INCLUDE_MUL)
|
||||
assign unit_needed[UNIT_IDS.MUL] = mult_div_op && ~fn3[2];
|
||||
endgenerate
|
||||
one_hot_to_integer #(.C_WIDTH(CONFIG.NUM_WB_GROUPS))
|
||||
wb_group_one_hot_block (
|
||||
.one_hot (uses_wb_group),
|
||||
.int_out (decode_wb_group)
|
||||
);
|
||||
|
||||
generate if (CONFIG.INCLUDE_DIV)
|
||||
assign unit_needed[UNIT_IDS.DIV] = mult_div_op && fn3[2];
|
||||
endgenerate
|
||||
assign fp_decode_wb_group = unit_needed[FPU_ID];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Renamer Support
|
||||
assign renamer.rd_addr = rd_addr;
|
||||
assign renamer.rs_addr = rs_addr;
|
||||
assign renamer.uses_rd = uses_rd;
|
||||
assign renamer.rd_wb_group = ~unit_needed[UNIT_IDS.ALU];//TODO: automate generation of wb group logic
|
||||
assign renamer.rd_addr = decode_instruction.rd_addr;
|
||||
assign fp_renamer.rd_addr = decode_instruction.rd_addr;
|
||||
assign renamer.rs_addr = decode_rs_addr;
|
||||
assign fp_renamer.rs_addr = fp_decode_rs_addr;
|
||||
assign renamer.uses_rd = decode_uses_rd;
|
||||
assign fp_renamer.uses_rd = fp_decode_uses_rd;
|
||||
assign renamer.rd_wb_group = decode_wb_group;
|
||||
assign fp_renamer.rd_wb_group = fp_decode_wb_group;
|
||||
assign renamer.id = decode.id;
|
||||
assign fp_renamer.id = decode.id;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode ID Support
|
||||
assign decode_uses_rd = uses_rd;
|
||||
assign decode_rd_addr = rd_addr;
|
||||
assign decode_rd_addr = decode_instruction.rd_addr;
|
||||
assign decode_phys_rd_addr = renamer.phys_rd_addr;
|
||||
assign fp_decode_phys_rd_addr = fp_renamer.phys_rd_addr;
|
||||
assign decode_phys_rs_addr = renamer.phys_rs_addr;
|
||||
assign fp_decode_phys_rs_addr = fp_renamer.phys_rs_addr;
|
||||
assign decode_rs_wb_group = renamer.rs_wb_group;
|
||||
assign fp_decode_rs_wb_group = fp_renamer.rs_wb_group;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
|
@ -208,18 +194,25 @@ module decode_and_issue
|
|||
issue.pc <= decode.pc;
|
||||
issue.instruction <= decode.instruction;
|
||||
issue.fetch_metadata <= decode.fetch_metadata;
|
||||
issue.fn3 <= fn3;
|
||||
issue.opcode <= opcode;
|
||||
issue_rs_addr <= rs_addr;
|
||||
issue.fn3 <= decode_instruction.fn3;
|
||||
issue.opcode <= decode.instruction[6:0];
|
||||
issue_rs_addr <= decode_rs_addr;
|
||||
issue_phys_rs_addr <= renamer.phys_rs_addr;
|
||||
fp_issue_phys_rs_addr <= fp_renamer.phys_rs_addr;
|
||||
issue_rs_wb_group <= renamer.rs_wb_group;
|
||||
issue.rd_addr <= rd_addr;
|
||||
fp_issue_rs_wb_group <= fp_renamer.rs_wb_group;
|
||||
issue.rd_addr <= decode_instruction.rd_addr;
|
||||
issue.phys_rd_addr <= renamer.phys_rd_addr;
|
||||
issue.is_multicycle <= ~unit_needed[UNIT_IDS.ALU];
|
||||
issue.fp_phys_rd_addr <= fp_renamer.phys_rd_addr;
|
||||
issue_rd_wb_group <= decode_wb_group;
|
||||
fp_issue_rd_wb_group <= fp_decode_wb_group;
|
||||
issue.is_multicycle <= ~unit_needed[ALU_ID];
|
||||
issue.id <= decode.id;
|
||||
issue.exception_unit <= decode_exception_unit;
|
||||
issue_uses_rs <= uses_rs;
|
||||
issue.uses_rd <= uses_rd;
|
||||
issue_uses_rs <= decode_uses_rs;
|
||||
fp_issue_uses_rs <= fp_decode_uses_rs;
|
||||
issue.uses_rd <= decode_uses_rd;
|
||||
issue.fp_uses_rd <= fp_decode_uses_rd;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -236,379 +229,105 @@ module decode_and_issue
|
|||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit ready
|
||||
generate for (i=0; i<NUM_UNITS; i++)
|
||||
assign unit_ready[i] = unit_issue[i].ready;
|
||||
//Issue Determination
|
||||
assign issue_hold = gc.issue_hold | pre_issue_exception_pending;
|
||||
|
||||
generate for (genvar i=0; i<REGFILE_READ_PORTS; i++)
|
||||
assign operand_ready[i] = ~rf.inuse[i] | (rf.inuse[i] & ~issue_uses_rs[i]);
|
||||
endgenerate
|
||||
|
||||
generate for (genvar i=0; i<3; i++)
|
||||
assign fp_operand_ready[i] = ~fp_rf.inuse[i] | (fp_rf.inuse[i] & ~fp_issue_uses_rs[i]);
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue Determination
|
||||
generate for (i=0; i<REGFILE_READ_PORTS; i++)
|
||||
assign rs_conflict[i] = rf.inuse[i] & issue_uses_rs[i];
|
||||
endgenerate
|
||||
assign operands_ready = ~|rs_conflict;
|
||||
//Unit EX signals
|
||||
generate for (genvar i = 0; i < MAX_NUM_UNITS; i++) begin : gen_unit_issue_signals
|
||||
assign unit_issue[i].possible_issue = issue.stage_valid & unit_needed_issue_stage[i] & unit_issue[i].ready;
|
||||
assign issue_to[i] = unit_issue[i].possible_issue & (&operand_ready) & (&fp_operand_ready) & ~issue_hold;
|
||||
assign unit_issue[i].new_request = issue_to[i] & ~gc.fetch_flush;
|
||||
assign unit_issue[i].id = issue.id;
|
||||
end endgenerate
|
||||
|
||||
assign issue_ready = unit_needed_issue_stage & unit_ready;
|
||||
assign issue_valid = issue.stage_valid & operands_ready & ~gc.issue_hold & ~pre_issue_exception_pending;
|
||||
|
||||
assign issue_to = {NUM_UNITS{issue_valid & ~gc.fetch_flush}} & issue_ready;
|
||||
|
||||
assign instruction_issued = issue_valid & ~gc.fetch_flush & |issue_ready;
|
||||
assign instruction_issued = |issue_to & ~gc.fetch_flush;
|
||||
assign instruction_issued_with_rd = instruction_issued & issue.uses_rd;
|
||||
assign fp_instruction_issued_with_rd = instruction_issued & issue.fp_uses_rd;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Register File Issue Interface
|
||||
assign rf.phys_rs_addr = issue_phys_rs_addr;
|
||||
assign fp_rf.phys_rs_addr = fp_issue_phys_rs_addr;
|
||||
assign rf.phys_rd_addr = issue.phys_rd_addr;
|
||||
assign fp_rf.phys_rd_addr = issue.fp_phys_rd_addr;
|
||||
assign rf.rs_wb_group = issue_rs_wb_group;
|
||||
assign fp_rf.rs_wb_group = fp_issue_rs_wb_group;
|
||||
|
||||
assign rf.single_cycle_or_flush = (instruction_issued_with_rd & |issue.rd_addr & ~issue.is_multicycle) | (issue.stage_valid & issue.uses_rd & |issue.rd_addr & gc.fetch_flush);
|
||||
assign fp_rf.single_cycle_or_flush = issue.stage_valid & issue.fp_uses_rd & gc.fetch_flush;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//ALU unit inputs
|
||||
logic [XLEN-1:0] alu_rs2_data;
|
||||
logic alu_imm_type;
|
||||
logic [31:0] constant_alu;
|
||||
alu_op_t alu_op;
|
||||
alu_op_t alu_op_r;
|
||||
alu_logic_op_t alu_logic_op;
|
||||
alu_logic_op_t alu_logic_op_r;
|
||||
logic alu_subtract;
|
||||
logic sub_instruction;
|
||||
|
||||
always_comb begin
|
||||
case (opcode_trim) inside
|
||||
LUI_T, AUIPC_T, JAL_T, JALR_T : alu_op = ALU_CONSTANT;
|
||||
default :
|
||||
case (fn3) inside
|
||||
SLTU_fn3, SLT_fn3 : alu_op = ALU_SLT;
|
||||
SLL_fn3, SRA_fn3 : alu_op = ALU_SHIFT;
|
||||
default : alu_op = ALU_ADD_SUB;
|
||||
endcase
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (fn3)
|
||||
XOR_fn3 : alu_logic_op = ALU_LOGIC_XOR;
|
||||
OR_fn3 : alu_logic_op = ALU_LOGIC_OR;
|
||||
AND_fn3 : alu_logic_op = ALU_LOGIC_AND;
|
||||
default : alu_logic_op = ALU_LOGIC_ADD;//ADD/SUB/SLT/SLTU
|
||||
endcase
|
||||
end
|
||||
|
||||
assign sub_instruction = (fn3 == ADD_SUB_fn3) && decode.instruction[30] && opcode[5];//If ARITH instruction
|
||||
|
||||
//Constant ALU:
|
||||
// provides LUI, AUIPC, JAL, JALR results for ALU
|
||||
// provides PC+4 for BRANCH unit and ifence in GC unit
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
constant_alu <= ((opcode_trim inside {LUI_T}) ? '0 : decode.pc) + ((opcode_trim inside {LUI_T, AUIPC_T}) ? {decode.instruction[31:12], 12'b0} : 4);
|
||||
alu_imm_type <= opcode_trim inside {ARITH_IMM_T};
|
||||
alu_op_r <= alu_op;
|
||||
alu_subtract <= (fn3 inside {SLTU_fn3, SLT_fn3}) || sub_instruction;
|
||||
alu_logic_op_r <= alu_logic_op;
|
||||
end
|
||||
if (issue_stage_ready)
|
||||
constant_alu <= ((decode_instruction.upper_opcode inside {LUI_T}) ? '0 : decode.pc) + ((decode_instruction.upper_opcode inside {LUI_T, AUIPC_T}) ? {decode.instruction[31:12], 12'b0} : 4);
|
||||
end
|
||||
|
||||
//Shifter related
|
||||
assign alu_inputs.lshift = ~issue.fn3[2];
|
||||
assign alu_inputs.shift_amount = alu_imm_type ? issue_rs_addr[RS2] : rf.data[RS2][4:0];
|
||||
assign alu_inputs.arith = rf.data[RS1][XLEN-1] & issue.instruction[30];//shift in bit
|
||||
assign alu_inputs.shifter_in = rf.data[RS1];
|
||||
|
||||
//LUI, AUIPC, JAL, JALR
|
||||
assign alu_inputs.constant_adder = constant_alu;
|
||||
|
||||
//logic and adder
|
||||
assign alu_inputs.subtract = alu_subtract;
|
||||
assign alu_inputs.logic_op = alu_logic_op_r;
|
||||
assign alu_inputs.in1 = {(rf.data[RS1][XLEN-1] & ~issue.fn3[0]), rf.data[RS1]};//(fn3[0] is SLTU_fn3);
|
||||
assign alu_rs2_data = alu_imm_type ? 32'(signed'(issue.instruction[31:20])) : rf.data[RS2];
|
||||
assign alu_inputs.in2 = {(alu_rs2_data[XLEN-1] & ~issue.fn3[0]), alu_rs2_data};
|
||||
|
||||
assign alu_inputs.alu_op = alu_op_r;
|
||||
////////////////////////////////////////////////////
|
||||
//Load Store unit inputs
|
||||
logic is_load;
|
||||
logic is_store;
|
||||
logic amo_op;
|
||||
logic store_conditional;
|
||||
logic load_reserve;
|
||||
logic [4:0] amo_type;
|
||||
|
||||
assign amo_op = CONFIG.INCLUDE_AMO ? (opcode_trim == AMO_T) : 1'b0;
|
||||
assign amo_type = decode.instruction[31:27];
|
||||
assign store_conditional = (amo_type == AMO_SC_FN5);
|
||||
assign load_reserve = (amo_type == AMO_LR_FN5);
|
||||
|
||||
generate if (CONFIG.INCLUDE_AMO) begin : gen_decode_ls_amo
|
||||
assign ls_inputs.amo.is_lr = load_reserve;
|
||||
assign ls_inputs.amo.is_sc = store_conditional;
|
||||
assign ls_inputs.amo.is_amo = amo_op & ~(load_reserve | store_conditional);
|
||||
assign ls_inputs.amo.op = amo_type;
|
||||
end
|
||||
else begin
|
||||
assign ls_inputs.amo = '0;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign is_load = (opcode_trim inside {LOAD_T, AMO_T}) && !(amo_op & store_conditional); //LR and AMO_ops perform a read operation as well
|
||||
assign is_store = (opcode_trim == STORE_T) || (amo_op && store_conditional);//Used for LS unit and for ID tracking
|
||||
|
||||
logic [11:0] ls_offset;
|
||||
logic is_load_r;
|
||||
logic is_store_r;
|
||||
logic is_fence_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
ls_offset <= opcode[5] ? {decode.instruction[31:25], decode.instruction[11:7]} : decode.instruction[31:20];
|
||||
is_load_r <= is_load;
|
||||
is_store_r <= is_store;
|
||||
is_fence_r <= is_fence;
|
||||
end
|
||||
end
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) id_t rd_to_id_table [32];
|
||||
always_ff @ (posedge clk) begin
|
||||
if (instruction_issued_with_rd)
|
||||
rd_to_id_table[issue.rd_addr] <= issue.id;
|
||||
end
|
||||
|
||||
assign ls_inputs.offset = ls_offset;
|
||||
assign ls_inputs.load = is_load_r;
|
||||
assign ls_inputs.store = is_store_r;
|
||||
assign ls_inputs.fence = is_fence_r;
|
||||
assign ls_inputs.fn3 = amo_op ? LS_W_fn3 : issue.fn3;
|
||||
assign ls_inputs.rs1 = rf.data[RS1];
|
||||
assign ls_inputs.rs2 = rf.data[RS2];
|
||||
assign ls_inputs.forwarded_store = rf.inuse[RS2];
|
||||
assign ls_inputs.store_forward_id = rd_to_id_table[issue_rs_addr[RS2]];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Branch unit inputs
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//RAS Support
|
||||
logic rs1_link;
|
||||
logic rd_link;
|
||||
logic rs1_eq_rd;
|
||||
logic is_return;
|
||||
logic is_call;
|
||||
assign rs1_link = (rs_addr[RS1] inside {1,5});
|
||||
assign rd_link = (rd_addr inside {1,5});
|
||||
assign rs1_eq_rd = (rs_addr[RS1] == rd_addr);
|
||||
|
||||
logic br_use_signed;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
is_return <= (opcode_trim == JALR_T) && ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd));
|
||||
is_call <= (opcode_trim inside {JAL_T, JALR_T}) && rd_link;
|
||||
br_use_signed <= !(fn3 inside {BLTU_fn3, BGEU_fn3});
|
||||
end
|
||||
end
|
||||
|
||||
logic[19:0] jal_imm;
|
||||
logic[11:0] jalr_imm;
|
||||
logic[11:0] br_imm;
|
||||
|
||||
logic [20:0] pc_offset;
|
||||
logic [20:0] pc_offset_r;
|
||||
assign jal_imm = {decode.instruction[31], decode.instruction[19:12], decode.instruction[20], decode.instruction[30:21]};
|
||||
assign jalr_imm = decode.instruction[31:20];
|
||||
assign br_imm = {decode.instruction[31], decode.instruction[7], decode.instruction[30:25], decode.instruction[11:8]};
|
||||
|
||||
|
||||
always_comb begin
|
||||
case (opcode[3:2])
|
||||
2'b11 : pc_offset = 21'(signed'({jal_imm, 1'b0}));
|
||||
2'b01 : pc_offset = 21'(signed'(jalr_imm));
|
||||
default : pc_offset = 21'(signed'({br_imm, 1'b0}));
|
||||
endcase
|
||||
end
|
||||
|
||||
logic jalr;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
pc_offset_r <= pc_offset;
|
||||
jalr <= (~opcode[3] & opcode[2]);
|
||||
end
|
||||
end
|
||||
|
||||
assign branch_inputs.is_return = is_return;
|
||||
assign branch_inputs.is_call = is_call;
|
||||
assign branch_inputs.fn3 = issue.fn3;
|
||||
assign branch_inputs.pc_offset = pc_offset_r;
|
||||
assign branch_inputs.jal = issue.opcode[3];//(opcode == JAL);
|
||||
assign branch_inputs.jalr = jalr;
|
||||
assign branch_inputs.jal_jalr = issue.opcode[2];
|
||||
|
||||
assign branch_inputs.issue_pc = issue.pc;
|
||||
assign branch_inputs.issue_pc_valid = issue.stage_valid;
|
||||
assign branch_inputs.rs1 = {(rf.data[RS1][31] & br_use_signed), rf.data[RS1]};
|
||||
assign branch_inputs.rs2 = {(rf.data[RS2][31] & br_use_signed), rf.data[RS2]};
|
||||
assign branch_inputs.pc_p4 = constant_alu;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Global Control unit inputs
|
||||
logic is_ecall_r;
|
||||
logic is_ebreak_r;
|
||||
logic is_mret_r;
|
||||
logic is_sret_r;
|
||||
logic is_ifence_r;
|
||||
|
||||
logic [7:0] sys_op_match;
|
||||
typedef enum logic [2:0] {
|
||||
ECALL_i = 0,
|
||||
EBREAK_i = 1,
|
||||
URET_i = 2,
|
||||
SRET_i = 3,
|
||||
MRET_i = 4,
|
||||
SFENCE_i = 5
|
||||
} sys_op_index_t;
|
||||
|
||||
always_comb begin
|
||||
sys_op_match = '0;
|
||||
case (decode.instruction[31:20]) inside
|
||||
ECALL_imm : sys_op_match[ECALL_i] = CONFIG.INCLUDE_M_MODE;
|
||||
EBREAK_imm : sys_op_match[EBREAK_i] = CONFIG.INCLUDE_M_MODE;
|
||||
SRET_imm : sys_op_match[SRET_i] = CONFIG.INCLUDE_S_MODE;
|
||||
MRET_imm : sys_op_match[MRET_i] = CONFIG.INCLUDE_M_MODE;
|
||||
SFENCE_imm : sys_op_match[SFENCE_i] = CONFIG.INCLUDE_S_MODE;
|
||||
default : sys_op_match = '0;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
is_ecall_r <= sys_op_match[ECALL_i];
|
||||
is_ebreak_r <= sys_op_match[EBREAK_i];
|
||||
is_mret_r <= sys_op_match[MRET_i];
|
||||
is_sret_r <= sys_op_match[SRET_i];
|
||||
is_ifence_r <= is_ifence;
|
||||
end
|
||||
end
|
||||
|
||||
assign gc_inputs.pc_p4 = constant_alu;
|
||||
assign gc_inputs.is_ifence = is_ifence_r;
|
||||
assign gc_inputs.is_mret = is_mret_r;
|
||||
assign gc_inputs.is_sret = is_sret_r;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//CSR unit inputs
|
||||
generate if (CONFIG.INCLUDE_CSRS) begin : gen_decode_csr_inputs
|
||||
assign csr_inputs.addr = issue.instruction[31:20];
|
||||
assign csr_inputs.op = issue.fn3[1:0];
|
||||
assign csr_inputs.data = issue.fn3[2] ? {27'b0, issue_rs_addr[RS1]} : rf.data[RS1];
|
||||
assign csr_inputs.reads = ~((issue.fn3[1:0] == CSR_RW) && (issue.rd_addr == 0));
|
||||
assign csr_inputs.writes = ~((issue.fn3[1:0] == CSR_RC) && (issue_rs_addr[RS1] == 0));
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Mul unit inputs
|
||||
generate if (CONFIG.INCLUDE_MUL) begin : gen_decode_mul_inputs
|
||||
assign mul_inputs.rs1 = rf.data[RS1];
|
||||
assign mul_inputs.rs2 = rf.data[RS2];
|
||||
assign mul_inputs.op = issue.fn3[1:0];
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Div unit inputs
|
||||
generate if (CONFIG.INCLUDE_DIV) begin : gen_decode_div_inputs
|
||||
phys_addr_t prev_div_rs_addr [2];
|
||||
logic [1:0] div_rd_match;
|
||||
logic prev_div_result_valid;
|
||||
logic div_rs_overwrite;
|
||||
logic div_op_reuse;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_to[UNIT_IDS.DIV])
|
||||
prev_div_rs_addr <= issue_phys_rs_addr[RS1:RS2];
|
||||
end
|
||||
|
||||
assign div_op_reuse = {prev_div_result_valid, prev_div_rs_addr[RS1], prev_div_rs_addr[RS2]} == {1'b1, issue_phys_rs_addr[RS1],issue_phys_rs_addr[RS2]};
|
||||
|
||||
//Clear if prev div inputs are overwritten by another instruction
|
||||
assign div_rd_match[RS1] = (issue.phys_rd_addr == prev_div_rs_addr[RS1]);
|
||||
assign div_rd_match[RS2] = (issue.phys_rd_addr == prev_div_rs_addr[RS2]);
|
||||
assign div_rs_overwrite = |div_rd_match;
|
||||
|
||||
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
|
||||
.clk, .rst,
|
||||
.set(instruction_issued & unit_needed_issue_stage[UNIT_IDS.DIV]),
|
||||
.clr((instruction_issued & issue.uses_rd & div_rs_overwrite) | gc.writeback_supress), //No instructions will be issued while gc.writeback_supress is asserted
|
||||
.result(prev_div_result_valid)
|
||||
);
|
||||
|
||||
assign div_inputs.rs1 = rf.data[RS1];
|
||||
assign div_inputs.rs2 = rf.data[RS2];
|
||||
assign div_inputs.op = issue.fn3[1:0];
|
||||
assign div_inputs.reuse_result = div_op_reuse;
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit EX signals
|
||||
generate for (i = 0; i < NUM_UNITS; i++) begin : gen_unit_issue_signals
|
||||
assign unit_issue[i].possible_issue = issue.stage_valid & unit_needed_issue_stage[i] & unit_issue[i].ready;
|
||||
assign unit_issue[i].new_request = issue_to[i];
|
||||
assign unit_issue[i].id = issue.id;
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Illegal Instruction check
|
||||
logic illegal_instruction_pattern_r;
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_decode_exceptions
|
||||
illegal_instruction_checker # (.CONFIG(CONFIG))
|
||||
illegal_op_check (
|
||||
.instruction(decode.instruction), .illegal_instruction(illegal_instruction_pattern)
|
||||
);
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
illegal_instruction_pattern_r <= 0;
|
||||
else if (issue_stage_ready)
|
||||
illegal_instruction_pattern_r <= illegal_instruction_pattern;
|
||||
end
|
||||
logic new_exception;
|
||||
exception_code_t ecode;
|
||||
exception_code_t ecall_code;
|
||||
|
||||
//ECALL and EBREAK captured here, but seperated out when ecode is set
|
||||
assign illegal_instruction_pattern = ~|unit_needed;
|
||||
|
||||
//TODO: Consider ways of parameterizing so that any exception generating unit
|
||||
//can be automatically added to this expression
|
||||
always_comb begin
|
||||
unique case (1'b1)
|
||||
unit_needed[UNIT_IDS.LS] : decode_exception_unit = LS_EXCEPTION;
|
||||
unit_needed[UNIT_IDS.BR] : decode_exception_unit = BR_EXCEPTION;
|
||||
unit_needed[LS_ID] : decode_exception_unit = LS_EXCEPTION;
|
||||
unit_needed[BR_ID] : decode_exception_unit = BR_EXCEPTION;
|
||||
default : decode_exception_unit = PRE_ISSUE_EXCEPTION;
|
||||
endcase
|
||||
if (illegal_instruction_pattern)
|
||||
if (~decode.fetch_metadata.ok)
|
||||
decode_exception_unit = PRE_ISSUE_EXCEPTION;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//ECALL/EBREAK
|
||||
//The type of call instruction is depedent on the current privilege level
|
||||
exception_code_t ecall_code;
|
||||
always_comb begin
|
||||
case (current_privilege)
|
||||
USER_PRIVILEGE : ecall_code = ECALL_U;
|
||||
SUPERVISOR_PRIVILEGE : ecall_code = ECALL_S;
|
||||
SUPERVISOR_PRIVILEGE : ecall_code = ECALL_S;
|
||||
MACHINE_PRIVILEGE : ecall_code = ECALL_M;
|
||||
default : ecall_code = ECALL_U;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
ecode <=
|
||||
decode.instruction inside {ECALL} ? ecall_code :
|
||||
decode.instruction inside {EBREAK} ? BREAK :
|
||||
illegal_instruction_pattern ? ILLEGAL_INST :
|
||||
decode.fetch_metadata.error_code; //(~decode.fetch_metadata.ok)
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Exception generation (ecall/ebreak/illegal instruction/propagated fetch error)
|
||||
logic new_exception;
|
||||
exception_code_t ecode;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
pre_issue_exception_pending <= 0;
|
||||
else if (issue_stage_ready)
|
||||
pre_issue_exception_pending <= illegal_instruction_pattern | (opcode_trim inside {SYSTEM_T} & ~is_csr & (sys_op_match[ECALL_i] | sys_op_match[EBREAK_i])) | ~decode.fetch_metadata.ok;
|
||||
pre_issue_exception_pending <= illegal_instruction_pattern | (~decode.fetch_metadata.ok);
|
||||
end
|
||||
|
||||
assign new_exception = issue.stage_valid & pre_issue_exception_pending & ~(gc.issue_hold | gc.fetch_flush);
|
||||
assign new_exception = issue.stage_valid & pre_issue_exception_pending & ~(gc.issue_hold | gc.fetch_flush | exception.valid);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
|
@ -617,12 +336,6 @@ module decode_and_issue
|
|||
exception.valid <= (exception.valid | new_exception) & ~exception.ack;
|
||||
end
|
||||
|
||||
assign ecode =
|
||||
illegal_instruction_pattern_r ? ILLEGAL_INST :
|
||||
is_ecall_r ? ecall_code :
|
||||
~issue.fetch_metadata.ok ? issue.fetch_metadata.error_code :
|
||||
BREAK;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (new_exception) begin
|
||||
exception.code <= ecode;
|
||||
|
@ -639,33 +352,4 @@ module decode_and_issue
|
|||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Trace Interface
|
||||
generate if (ENABLE_TRACE_INTERFACE) begin : gen_decode_trace
|
||||
assign tr_operand_stall = issue.stage_valid & ~gc.fetch_flush & ~gc.issue_hold & ~pre_issue_exception_pending & ~operands_ready & |issue_ready;
|
||||
assign tr_unit_stall = issue_valid & ~gc.fetch_flush & ~|issue_ready;
|
||||
assign tr_no_id_stall = (~issue.stage_valid & ~pc_id_available & ~gc.fetch_flush); //All instructions in execution pipeline
|
||||
assign tr_no_instruction_stall = (pc_id_available & ~issue.stage_valid) | gc.fetch_flush;
|
||||
assign tr_other_stall = issue.stage_valid & ~instruction_issued & ~(tr_operand_stall | tr_unit_stall | tr_no_id_stall | tr_no_instruction_stall);
|
||||
assign tr_branch_operand_stall = tr_operand_stall & unit_needed_issue_stage[UNIT_IDS.BR];
|
||||
assign tr_alu_operand_stall = tr_operand_stall & unit_needed_issue_stage[UNIT_IDS.ALU] & ~unit_needed_issue_stage[UNIT_IDS.BR];
|
||||
assign tr_ls_operand_stall = tr_operand_stall & unit_needed_issue_stage[UNIT_IDS.LS];
|
||||
assign tr_div_operand_stall = tr_operand_stall & unit_needed_issue_stage[UNIT_IDS.DIV];
|
||||
|
||||
//Instruction Mix
|
||||
always_ff @(posedge clk) begin
|
||||
tr_alu_op <= issue_to[UNIT_IDS.ALU];
|
||||
tr_branch_or_jump_op <= issue_to[UNIT_IDS.BR];
|
||||
tr_load_op <= issue_to[UNIT_IDS.LS] & is_load_r;
|
||||
tr_store_op <= issue_to[UNIT_IDS.LS] & is_store_r;
|
||||
tr_mul_op <= issue_to[UNIT_IDS.MUL];
|
||||
tr_div_op <= issue_to[UNIT_IDS.DIV];
|
||||
tr_misc_op <= issue_to[UNIT_IDS.CSR] | issue_to[UNIT_IDS.IEC];
|
||||
end
|
||||
|
||||
assign tr_instruction_issued_dec = instruction_issued;
|
||||
assign tr_instruction_pc_dec = issue.pc;
|
||||
assign tr_instruction_data_dec = issue.instruction;
|
||||
end endgenerate
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,137 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module dtag_banks
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG,
|
||||
parameter derived_cache_config_t SCONFIG = '{default: 0}
|
||||
)
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input logic[29:0] stage1_addr,
|
||||
input logic[29:0] stage2_addr,
|
||||
input logic[29:0] inv_addr,
|
||||
|
||||
input logic[CONFIG.DCACHE.WAYS-1:0] update_way,
|
||||
input logic update,
|
||||
|
||||
input logic stage1_adv,
|
||||
input logic stage1_inv,
|
||||
|
||||
input logic extern_inv,
|
||||
output logic extern_inv_complete,
|
||||
|
||||
output tag_hit,
|
||||
output logic[CONFIG.DCACHE.WAYS-1:0] tag_hit_way
|
||||
);
|
||||
|
||||
typedef struct packed{
|
||||
logic valid;
|
||||
logic [SCONFIG.TAG_W-1:0] tag;
|
||||
} dtag_entry_t;
|
||||
|
||||
function logic[SCONFIG.TAG_W-1:0] getTag(logic[29:0] addr);
|
||||
return addr[SCONFIG.SUB_LINE_ADDR_W + SCONFIG.LINE_ADDR_W +: SCONFIG.TAG_W];
|
||||
endfunction
|
||||
|
||||
function logic[SCONFIG.LINE_ADDR_W-1:0] getLineAddr(logic[29:0] addr);
|
||||
return addr[SCONFIG.LINE_ADDR_W + SCONFIG.SUB_LINE_ADDR_W - 1 : SCONFIG.SUB_LINE_ADDR_W];
|
||||
endfunction
|
||||
|
||||
dtag_entry_t tag_line [CONFIG.DCACHE.WAYS-1:0];
|
||||
dtag_entry_t inv_tag_line [CONFIG.DCACHE.WAYS-1:0];
|
||||
|
||||
dtag_entry_t new_tagline;
|
||||
|
||||
logic [CONFIG.DCACHE.WAYS-1:0] update_tag_way;
|
||||
|
||||
logic inv_tags_accessed;
|
||||
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] inv_hit_way;
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] inv_hit_way_r;
|
||||
|
||||
logic [SCONFIG.LINE_ADDR_W-1:0] update_port_addr;
|
||||
|
||||
dtag_entry_t stage2_hit_comparison_tagline;
|
||||
dtag_entry_t inv_hit_comparison_tagline;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Muxing of cache miss or invalidation control logic and tags
|
||||
assign update_port_addr =
|
||||
CONFIG.DCACHE.USE_EXTERNAL_INVALIDATIONS ?
|
||||
((update) ? getLineAddr(stage2_addr) : getLineAddr(inv_addr)) :
|
||||
getLineAddr(stage2_addr);
|
||||
|
||||
assign new_tagline = '{valid: update, tag: getTag(stage2_addr)};
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
inv_tags_accessed <= 0;
|
||||
else
|
||||
inv_tags_accessed <= extern_inv & ~update;
|
||||
end
|
||||
|
||||
assign extern_inv_complete = (extern_inv & ~update) & inv_tags_accessed;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Memory instantiation and hit detection
|
||||
assign stage2_hit_comparison_tagline = '{valid: 1, tag: getTag(stage2_addr)};
|
||||
assign inv_hit_comparison_tagline = '{valid: 1, tag: getTag(inv_addr)};
|
||||
|
||||
generate for (genvar i=0; i < CONFIG.DCACHE.WAYS; i++) begin : dtag_bank_gen
|
||||
assign update_tag_way[i] = update_way[i] | (inv_hit_way[i] & extern_inv_complete);
|
||||
|
||||
tag_bank #($bits(dtag_entry_t), CONFIG.DCACHE.LINES) dtag_bank (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.en_a (stage1_adv),
|
||||
.wen_a (stage1_inv),
|
||||
.addr_a (getLineAddr(stage1_addr)),
|
||||
.data_in_a ('0),
|
||||
.data_out_a (tag_line[i]),
|
||||
.en_b (update | extern_inv),
|
||||
.wen_b (update_tag_way[i]),
|
||||
.addr_b (update_port_addr),
|
||||
.data_in_b (new_tagline),
|
||||
.data_out_b(inv_tag_line[i])
|
||||
);
|
||||
|
||||
assign inv_hit_way[i] = (inv_hit_comparison_tagline == inv_tag_line[i]);
|
||||
assign tag_hit_way[i] = (stage2_hit_comparison_tagline == tag_line[i]);
|
||||
|
||||
end endgenerate
|
||||
|
||||
assign tag_hit = |tag_hit_way;
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
endmodule
|
184
core/execution_units/alu_unit.sv
Executable file
184
core/execution_units/alu_unit.sv
Executable file
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module alu_unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input decode_packet_t decode_stage,
|
||||
output unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] constant_alu,
|
||||
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit wb
|
||||
);
|
||||
typedef enum logic [1:0] {
|
||||
LOGIC_XOR = 2'b00,
|
||||
LOGIC_OR = 2'b01,
|
||||
LOGIC_AND = 2'b10,
|
||||
LOGIC_OTHER = 2'b11
|
||||
} logic_op_t;
|
||||
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
logic [31:0] rs2_data;
|
||||
logic imm_type;
|
||||
alu_op_t alu_op;
|
||||
alu_op_t alu_op_r;
|
||||
logic_op_t logic_op;
|
||||
logic_op_t logic_op_r;
|
||||
logic subtract;
|
||||
logic is_slt;
|
||||
|
||||
logic[32:0] add_sub_result;
|
||||
logic add_sub_carry_in;
|
||||
logic[31:0] logic_and_upper_slt;
|
||||
logic[32:0] sign_ext_adder1;
|
||||
logic[32:0] sign_ext_adder2;
|
||||
logic[31:0] shift_result;
|
||||
logic[31:0] result;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
LUI, AUIPC, JAL, JALR,
|
||||
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
|
||||
};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {
|
||||
JALR,
|
||||
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
|
||||
};
|
||||
uses_rs[RS2] = decode_stage.instruction inside {
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
|
||||
};
|
||||
uses_rd = decode_stage.instruction inside {
|
||||
LUI, AUIPC, JAL, JALR,
|
||||
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
|
||||
};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (instruction.upper_opcode) inside
|
||||
LUI_T, AUIPC_T, JAL_T, JALR_T : alu_op = ALU_CONSTANT;
|
||||
default :
|
||||
case (instruction.fn3) inside
|
||||
XOR_fn3, OR_fn3, AND_fn3, SLTU_fn3, SLT_fn3 : alu_op = ALU_SLT;
|
||||
SLL_fn3, SRA_fn3 : alu_op = ALU_SHIFT;
|
||||
default : alu_op = ALU_ADD_SUB;
|
||||
endcase
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (instruction.fn3) inside
|
||||
XOR_fn3 : logic_op = LOGIC_XOR;
|
||||
OR_fn3 : logic_op = LOGIC_OR;
|
||||
AND_fn3 : logic_op = LOGIC_AND;
|
||||
default : logic_op = LOGIC_OTHER;
|
||||
endcase
|
||||
end
|
||||
|
||||
//Constant ALU:
|
||||
// provides LUI, AUIPC, JAL, JALR results for ALU
|
||||
// provides PC+4 for BRANCH unit and ifence in GC unit
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
imm_type <= instruction.upper_opcode inside {ARITH_IMM_T};
|
||||
alu_op_r <= alu_op;
|
||||
logic_op_r <= logic_op;
|
||||
subtract <= decode_stage.instruction inside {SUB, SLTI, SLTIU, SLT, SLTU};
|
||||
is_slt <= instruction.fn3 inside {SLT_fn3, SLTU_fn3};
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
//Logic ops put through the adder carry chain to reduce resources
|
||||
//TODO: explore moving this mux into the regfile bypass mux
|
||||
assign rs2_data = imm_type ? 32'(signed'(issue_stage.instruction[31:20])) : rf[RS2];
|
||||
always_comb begin
|
||||
case (logic_op_r)
|
||||
LOGIC_XOR : logic_and_upper_slt = rf[RS1] ^ rs2_data;
|
||||
LOGIC_OR : logic_and_upper_slt = rf[RS1] | rs2_data;
|
||||
LOGIC_AND : logic_and_upper_slt = rf[RS1] & rs2_data;
|
||||
default : logic_and_upper_slt = 0; //ADD/SUB/SLT/SLTU
|
||||
endcase
|
||||
end
|
||||
|
||||
//Add/Sub ops
|
||||
assign sign_ext_adder1 = {(rf[RS1][31] & ~issue_stage.fn3[0]), rf[RS1]};
|
||||
assign sign_ext_adder2 = {(rs2_data[31] & ~issue_stage.fn3[0]) ^ subtract, rs2_data ^ {32{subtract}}};
|
||||
|
||||
assign {add_sub_result, add_sub_carry_in} = {sign_ext_adder1, 1'b1} + {sign_ext_adder2, subtract};
|
||||
|
||||
//Shift ops
|
||||
barrel_shifter shifter (
|
||||
.shifter_input(rf[RS1]),
|
||||
.shift_amount(imm_type ? issue_rs_addr[RS2] : rf[RS2][4:0]),
|
||||
.arith(rf[RS1][31] & issue_stage.instruction[30]),
|
||||
.lshift(~issue_stage.fn3[2]),
|
||||
.shifted_result(shift_result)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
case (alu_op_r)
|
||||
ALU_CONSTANT : result = constant_alu;//LUI, AUIPC, JAL, JALR
|
||||
ALU_ADD_SUB : result = add_sub_result[31:0];
|
||||
ALU_SLT : result = {logic_and_upper_slt[31:1], is_slt ? add_sub_result[32] : logic_and_upper_slt[0]};
|
||||
default : result = shift_result; //ALU_SHIFT
|
||||
endcase
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
assign issue.ready = 1;
|
||||
assign wb.rd = result;
|
||||
assign wb.done = issue.possible_issue;
|
||||
assign wb.id = issue.id;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
endmodule
|
253
core/execution_units/branch_unit.sv
Executable file
253
core/execution_units/branch_unit.sv
Executable file
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module branch_unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] constant_alu,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
output branch_results_t br_results,
|
||||
output logic branch_flush,
|
||||
|
||||
exception_interface.unit exception
|
||||
);
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
logic branch_issued_r;
|
||||
logic result;
|
||||
|
||||
//Branch Predictor
|
||||
logic branch_taken;
|
||||
logic branch_taken_ex;
|
||||
|
||||
id_t id_ex;
|
||||
logic [31:0] jump_pc;
|
||||
logic [31:0] new_pc;
|
||||
logic [31:0] new_pc_ex;
|
||||
|
||||
logic [31:0] pc_ex;
|
||||
logic instruction_is_completing;
|
||||
|
||||
logic branch_complete;
|
||||
logic jal_or_jalr_ex;
|
||||
|
||||
logic [32:0] rs1;
|
||||
logic [32:0] rs2;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
BEQ, BNE, BLT, BGE, BLTU, BGEU, JALR, JAL
|
||||
};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {
|
||||
BEQ, BNE, BLT, BGE, BLTU, BGEU, JALR
|
||||
};
|
||||
uses_rs[RS2] = decode_stage.instruction inside {
|
||||
BEQ, BNE, BLT, BGE, BLTU, BGEU
|
||||
};
|
||||
uses_rd = 0;//JALR/JAL writeback handled by ALU
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//RAS Support
|
||||
logic rs1_link;
|
||||
logic rd_link;
|
||||
logic rs1_eq_rd;
|
||||
logic is_return;
|
||||
logic is_call;
|
||||
|
||||
assign rs1_link = instruction.rs1_addr inside {1,5};
|
||||
assign rd_link = instruction.rd_addr inside {1,5};
|
||||
assign rs1_eq_rd = (instruction.rs1_addr == instruction.rd_addr);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
is_return <= (instruction.upper_opcode inside {JALR_T}) & ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd));
|
||||
is_call <= (instruction.upper_opcode inside {JAL_T, JALR_T}) & rd_link;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//PC Offset
|
||||
logic[19:0] jal_imm;
|
||||
logic[11:0] jalr_imm;
|
||||
logic[11:0] br_imm;
|
||||
|
||||
logic [20:0] pc_offset;
|
||||
logic [20:0] pc_offset_r;
|
||||
assign jal_imm = {decode_stage.instruction[31], decode_stage.instruction[19:12], decode_stage.instruction[20], decode_stage.instruction[30:21]};
|
||||
assign jalr_imm = decode_stage.instruction[31:20];
|
||||
assign br_imm = {decode_stage.instruction[31], decode_stage.instruction[7], decode_stage.instruction[30:25], decode_stage.instruction[11:8]};
|
||||
|
||||
always_comb begin
|
||||
case (decode_stage.instruction[3:2])
|
||||
2'b11 : pc_offset = 21'(signed'({jal_imm, 1'b0}));
|
||||
2'b01 : pc_offset = 21'(signed'(jalr_imm));
|
||||
default : pc_offset = 21'(signed'({br_imm, 1'b0}));
|
||||
endcase
|
||||
end
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready)
|
||||
pc_offset_r <= pc_offset;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
logic jalr;
|
||||
logic jal_or_jalr;
|
||||
logic br_use_signed;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
jalr <= (~decode_stage.instruction[3] & decode_stage.instruction[2]);
|
||||
jal_or_jalr <= decode_stage.instruction[2];
|
||||
br_use_signed <= !(instruction.fn3 inside {BLTU_fn3, BGEU_fn3});
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
|
||||
//Only stall condition is if the following instruction is not valid for pc comparisons.
|
||||
//If the next instruction isn't valid, no instruction can be issued anyways, so it
|
||||
//is safe to hardcode this to one.
|
||||
assign issue.ready = 1;
|
||||
|
||||
//Branch new request is held if the following instruction hasn't arrived at decode/issue yet
|
||||
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) branch_issued_m (
|
||||
.clk, .rst,
|
||||
.set(issue.new_request),
|
||||
.clr(issue_stage.stage_valid | exception.valid),
|
||||
.result(branch_issued_r)
|
||||
);
|
||||
|
||||
//To determine if the branch was predicted correctly we need to wait until the
|
||||
//subsequent instruction has reached the issue stage
|
||||
assign instruction_is_completing = branch_issued_r & issue_stage.stage_valid;
|
||||
|
||||
//Sign extend
|
||||
assign rs1 = {(rf[RS1][31] & br_use_signed), rf[RS1]};
|
||||
assign rs2 = {(rf[RS2][31] & br_use_signed), rf[RS2]};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Branch/Jump target determination
|
||||
//Branch comparison and final address calculation
|
||||
//are performed in the issue stage
|
||||
branch_comparator bc (
|
||||
.less_than(issue_stage.fn3[2]),
|
||||
.a(rs1),
|
||||
.b(rs2),
|
||||
.xor_result(issue_stage.fn3[0]),
|
||||
.result(result)
|
||||
);
|
||||
assign branch_taken = result | jal_or_jalr;
|
||||
|
||||
assign jump_pc = (jalr ? rs1[31:0] : issue_stage.pc) + 32'(signed'(pc_offset_r));
|
||||
assign new_pc = branch_taken ? jump_pc : constant_alu;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request) begin
|
||||
branch_taken_ex <= branch_taken;
|
||||
new_pc_ex <= {new_pc[31:1], new_pc[0] & ~jalr};
|
||||
id_ex <= issue.id;
|
||||
jal_or_jalr_ex <= jal_or_jalr;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Exception support
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_branch_exception
|
||||
logic new_exception;
|
||||
|
||||
assign new_exception = new_pc[1] & branch_taken & issue.new_request;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
exception.valid <= 0;
|
||||
else
|
||||
exception.valid <= (exception.valid & ~exception.ack) | new_exception;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
exception.id <= issue.id;
|
||||
end
|
||||
assign exception.code = INST_ADDR_MISSALIGNED;
|
||||
assign exception.tval = new_pc_ex;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Predictor support
|
||||
logic is_return_ex;
|
||||
logic is_call_ex;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.possible_issue) begin
|
||||
is_return_ex <= is_return;
|
||||
is_call_ex <= is_call;
|
||||
pc_ex <= issue_stage.pc;
|
||||
end
|
||||
end
|
||||
|
||||
assign br_results.id = id_ex;
|
||||
assign br_results.valid = instruction_is_completing;
|
||||
assign br_results.pc = pc_ex;
|
||||
assign br_results.target_pc = new_pc_ex;
|
||||
assign br_results.branch_taken = branch_taken_ex;
|
||||
assign br_results.is_branch = ~jal_or_jalr_ex;
|
||||
assign br_results.is_return = is_return_ex;
|
||||
assign br_results.is_call = is_call_ex;
|
||||
|
||||
assign branch_flush = instruction_is_completing & (issue_stage.pc[31:1] != new_pc_ex[31:1]);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
endmodule
|
842
core/execution_units/csr_unit.sv
Executable file
842
core/execution_units/csr_unit.sv
Executable file
|
@ -0,0 +1,842 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module csr_unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import csr_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
//Unit Interfaces
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit wb,
|
||||
|
||||
//Privilege
|
||||
output logic [1:0] current_privilege,
|
||||
|
||||
//FP
|
||||
input logic [4:0] fflag_wmask, //Always valid
|
||||
output logic [2:0] dyn_rm,
|
||||
|
||||
//GC
|
||||
input logic interrupt_taken,
|
||||
output logic interrupt_pending,
|
||||
output logic processing_csr,
|
||||
|
||||
//TLB and MMU
|
||||
output logic tlb_on,
|
||||
output logic [ASIDLEN-1:0] asid,
|
||||
|
||||
//MMUs
|
||||
mmu_interface.csr immu,
|
||||
mmu_interface.csr dmmu,
|
||||
|
||||
//CSR exception interface
|
||||
input exception_packet_t exception,
|
||||
output logic [31:0] exception_target_pc,
|
||||
|
||||
//exception return
|
||||
input logic mret,
|
||||
input logic sret,
|
||||
output logic [31:0] epc,
|
||||
|
||||
//Retire
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
input logic [LOG2_RETIRE_PORTS : 0] retire_count,
|
||||
|
||||
//External
|
||||
input interrupt_t s_interrupt,
|
||||
input interrupt_t m_interrupt
|
||||
);
|
||||
|
||||
typedef struct packed{
|
||||
csr_addr_t addr;
|
||||
logic[1:0] op;
|
||||
logic reads;
|
||||
logic writes;
|
||||
logic [31:0] data;
|
||||
} csr_inputs_t;
|
||||
|
||||
typedef enum logic [2:0] {
|
||||
MSTATUS_UNCHANGED = 0,
|
||||
MSTATUS_WRITE = 1,
|
||||
MSTATUS_INTERRUPT = 2,
|
||||
MSTATUS_EXCEPTION = 3,
|
||||
MSTATUS_MRET = 4,
|
||||
MSTATUS_SRET = 5
|
||||
} mstatus_cases_t;
|
||||
mstatus_cases_t mstatus_case;
|
||||
|
||||
logic busy;
|
||||
logic commit;
|
||||
logic commit_in_progress;
|
||||
|
||||
csr_inputs_t csr_inputs;
|
||||
csr_inputs_t csr_inputs_r;
|
||||
|
||||
privilege_t privilege_level;
|
||||
privilege_t next_privilege_level;
|
||||
|
||||
//write_logic
|
||||
logic swrite;
|
||||
logic mwrite;
|
||||
logic [255:0] sub_write_en;
|
||||
|
||||
logic [31:0] selected_csr;
|
||||
logic [31:0] selected_csr_r;
|
||||
|
||||
logic [31:0] updated_csr;
|
||||
logic [31:0] next_csr;
|
||||
|
||||
function logic mwrite_en (input csr_addr_t addr);
|
||||
return mwrite & sub_write_en[addr.sub_addr];
|
||||
endfunction
|
||||
function logic swrite_en (input csr_addr_t addr);
|
||||
return swrite & sub_write_en[addr.sub_addr];
|
||||
endfunction
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Legalization Functions
|
||||
function logic [31:0] init_medeleg_mask();
|
||||
init_medeleg_mask = 0;
|
||||
if (CONFIG.INCLUDE_S_MODE) begin
|
||||
init_medeleg_mask[INST_ADDR_MISSALIGNED] = 1;
|
||||
init_medeleg_mask[INST_ACCESS_FAULT] = 1;
|
||||
init_medeleg_mask[ILLEGAL_INST] = 1;
|
||||
init_medeleg_mask[BREAK] = 1;
|
||||
init_medeleg_mask[LOAD_ADDR_MISSALIGNED] = 1;
|
||||
init_medeleg_mask[LOAD_FAULT] = 1;
|
||||
init_medeleg_mask[STORE_AMO_ADDR_MISSALIGNED] = 1;
|
||||
init_medeleg_mask[STORE_AMO_FAULT] = 1;
|
||||
init_medeleg_mask[ECALL_U] = 1;
|
||||
init_medeleg_mask[INST_PAGE_FAULT] = 1;
|
||||
init_medeleg_mask[LOAD_PAGE_FAULT] = 1;
|
||||
init_medeleg_mask[STORE_OR_AMO_PAGE_FAULT] = 1;
|
||||
end
|
||||
endfunction
|
||||
|
||||
function logic [31:0] init_mideleg_mask();
|
||||
init_mideleg_mask = 0;
|
||||
if (CONFIG.INCLUDE_S_MODE) begin
|
||||
init_mideleg_mask[S_SOFTWARE_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_mideleg_mask[S_TIMER_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_mideleg_mask[S_EXTERNAL_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
end
|
||||
endfunction
|
||||
|
||||
function logic [2**ECODE_W-1:0] init_exception_masking_rom();
|
||||
init_exception_masking_rom = '{default: 0};
|
||||
init_exception_masking_rom[INST_ADDR_MISSALIGNED] = 1;
|
||||
init_exception_masking_rom[INST_ACCESS_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_exception_masking_rom[ILLEGAL_INST] = 1;
|
||||
init_exception_masking_rom[BREAK] = 1;
|
||||
init_exception_masking_rom[LOAD_ADDR_MISSALIGNED] = 1;
|
||||
init_exception_masking_rom[LOAD_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_exception_masking_rom[STORE_AMO_ADDR_MISSALIGNED] = 1;
|
||||
init_exception_masking_rom[STORE_AMO_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_exception_masking_rom[ECALL_U] = CONFIG.INCLUDE_S_MODE;
|
||||
init_exception_masking_rom[ECALL_S] = CONFIG.INCLUDE_S_MODE;
|
||||
init_exception_masking_rom[ECALL_M] = 1;
|
||||
init_exception_masking_rom[INST_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_exception_masking_rom[LOAD_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_exception_masking_rom[STORE_OR_AMO_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE;
|
||||
endfunction
|
||||
|
||||
function logic [2**ECODE_W-1:0] init_interrupt_masking_rom();
|
||||
init_interrupt_masking_rom = '{default: 0};
|
||||
init_interrupt_masking_rom[S_SOFTWARE_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_interrupt_masking_rom[M_SOFTWARE_INTERRUPT] = 1;
|
||||
init_interrupt_masking_rom[S_TIMER_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_interrupt_masking_rom[M_TIMER_INTERRUPT] = 1;
|
||||
init_interrupt_masking_rom[S_EXTERNAL_INTERRUPT] = CONFIG.INCLUDE_S_MODE;
|
||||
init_interrupt_masking_rom[M_EXTERNAL_INTERRUPT] = 1;
|
||||
endfunction
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign unit_needed = decode_stage.instruction inside {CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {CSRRW, CSRRS, CSRRC};
|
||||
uses_rd = unit_needed;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
assign csr_inputs = '{
|
||||
addr : issue_stage.instruction[31:20],
|
||||
op : issue_stage.fn3[1:0],
|
||||
data : issue_stage.fn3[2] ? {27'b0, issue_rs_addr[RS1]} : rf[RS1],
|
||||
reads : ~((issue_stage.fn3[1:0] == CSR_RW) && (issue_stage.rd_addr == 0)),
|
||||
writes : ~((issue_stage.fn3[1:0] == CSR_RC) && (issue_rs_addr[RS1] == 0))
|
||||
};
|
||||
|
||||
assign processing_csr = busy | issue.new_request;
|
||||
|
||||
assign issue.ready = ~busy;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
busy <= 0;
|
||||
else
|
||||
busy <= (busy & ~wb.ack) | issue.new_request;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
csr_inputs_r <= csr_inputs;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
commit_in_progress <= 0;
|
||||
else
|
||||
commit_in_progress <= (commit_in_progress & ~issue.new_request) | commit;
|
||||
end
|
||||
|
||||
//Waits until CSR instruction is the oldest issued instruction
|
||||
assign commit = (retire_ids[0] == wb.id) & busy & (~commit_in_progress);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
wb.done <= 0;
|
||||
else
|
||||
wb.done <= (wb.done & ~wb.ack) | commit;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
wb.id <= issue.id;
|
||||
end
|
||||
|
||||
assign wb.rd = selected_csr_r;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Shared logic
|
||||
always_ff @(posedge clk) begin
|
||||
sub_write_en <= (1 << csr_inputs_r.addr.sub_addr);
|
||||
mwrite <= CONFIG.INCLUDE_M_MODE && commit && (csr_inputs_r.addr.rw_bits != CSR_READ_ONLY && csr_inputs_r.addr.privilege == MACHINE_PRIVILEGE);
|
||||
swrite <= CONFIG.INCLUDE_S_MODE && commit && (csr_inputs_r.addr.rw_bits != CSR_READ_ONLY && csr_inputs_r.addr.privilege == SUPERVISOR_PRIVILEGE);
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (csr_inputs_r.op)
|
||||
CSR_RW : next_csr = csr_inputs_r.data;
|
||||
CSR_RS : next_csr = selected_csr | csr_inputs_r.data;
|
||||
CSR_RC : next_csr = selected_csr & ~csr_inputs_r.data;
|
||||
default : next_csr = csr_inputs_r.data;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (commit)
|
||||
updated_csr <= next_csr;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Machine Mode Registers
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Constant Registers
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Machine ISA register
|
||||
localparam misa_t misa = '{
|
||||
default:0,
|
||||
mxlen:1,
|
||||
A:(CONFIG.INCLUDE_AMO),
|
||||
I:1,
|
||||
M:(CONFIG.INCLUDE_UNIT.MUL && CONFIG.INCLUDE_UNIT.DIV),
|
||||
S:(CONFIG.INCLUDE_S_MODE),
|
||||
U:(CONFIG.INCLUDE_U_MODE),
|
||||
F:(CONFIG.INCLUDE_UNIT.FPU),
|
||||
D:(CONFIG.INCLUDE_UNIT.FPU)
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Machine Version Registers
|
||||
localparam logic [31:0] mvendorid = 0;
|
||||
localparam logic [31:0] marchid = 0;
|
||||
localparam logic [31:0] mimpid = CONFIG.CSRS.MACHINE_IMPLEMENTATION_ID;
|
||||
localparam logic [31:0] mhartid = CONFIG.CSRS.CPU_ID;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MSTATUS
|
||||
localparam logic [31:0] mstatush = 0; //Always little endian
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Non-Constant Registers
|
||||
mstatus_t mstatus;
|
||||
logic[31:0] mtvec;
|
||||
logic[31:0] medeleg;
|
||||
logic[31:0] mideleg;
|
||||
logic[31:0] mepc;
|
||||
mip_t mip, mip_new;
|
||||
mie_t mie;
|
||||
mcause_t mcause;
|
||||
logic[31:0] mtval;
|
||||
logic[31:0] mscratch;
|
||||
|
||||
//Virtualization support: TSR, TW, TVM unused
|
||||
//Extension context status: SD, FS, XS unused
|
||||
localparam mstatus_t mstatus_mask = '{
|
||||
default:0,
|
||||
mprv:(CONFIG.INCLUDE_U_MODE | CONFIG.INCLUDE_S_MODE),
|
||||
mxr:(CONFIG.INCLUDE_S_MODE),
|
||||
sum:(CONFIG.INCLUDE_U_MODE & CONFIG.INCLUDE_S_MODE),
|
||||
mpp:'1,
|
||||
spp:(CONFIG.INCLUDE_S_MODE),
|
||||
mpie:1,
|
||||
spie:(CONFIG.INCLUDE_S_MODE),
|
||||
mie:1,
|
||||
sie:(CONFIG.INCLUDE_S_MODE)
|
||||
};
|
||||
|
||||
localparam mstatus_t sstatus_mask = '{default:0, mxr:1, sum:1, spp:1, spie:1, sie:1};
|
||||
|
||||
localparam mip_t sip_mask = '{default:0, seip:CONFIG.INCLUDE_S_MODE, stip:CONFIG.INCLUDE_S_MODE, ssip:CONFIG.INCLUDE_S_MODE};
|
||||
localparam mie_t sie_mask = '{default:0, seie:CONFIG.INCLUDE_S_MODE, stie:CONFIG.INCLUDE_S_MODE, ssie:CONFIG.INCLUDE_S_MODE};
|
||||
|
||||
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode
|
||||
mstatus_t mstatus_new;
|
||||
mstatus_t mstatus_write_mask;
|
||||
logic [ECODE_W-1:0] interrupt_cause_r;
|
||||
|
||||
//Interrupt and Exception Delegation
|
||||
//Can delegate to supervisor if currently in supervisor or user modes
|
||||
logic can_delegate;
|
||||
logic exception_delegated;
|
||||
logic interrupt_delegated;
|
||||
|
||||
assign can_delegate = CONFIG.INCLUDE_S_MODE & privilege_level inside {SUPERVISOR_PRIVILEGE, USER_PRIVILEGE};
|
||||
assign exception_delegated = can_delegate & exception.valid & medeleg[exception.code];
|
||||
assign interrupt_delegated = can_delegate & interrupt_taken & mideleg[interrupt_cause_r];
|
||||
|
||||
one_hot_to_integer #(6)
|
||||
mstatus_case_one_hot (
|
||||
.one_hot ({sret, mret, exception.valid, interrupt_taken, (mwrite_en(MSTATUS) | swrite_en(SSTATUS)), 1'b0}),
|
||||
.int_out (mstatus_case)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
case (mstatus_case) inside
|
||||
MSTATUS_MRET : next_privilege_level = privilege_t'(mstatus.mpp);
|
||||
MSTATUS_SRET : next_privilege_level = privilege_t'({1'b0,mstatus.spp});
|
||||
MSTATUS_INTERRUPT : next_privilege_level = interrupt_delegated ? SUPERVISOR_PRIVILEGE : MACHINE_PRIVILEGE;
|
||||
MSTATUS_EXCEPTION : next_privilege_level = exception_delegated ? SUPERVISOR_PRIVILEGE : MACHINE_PRIVILEGE;
|
||||
default : next_privilege_level = privilege_level;
|
||||
endcase
|
||||
end
|
||||
|
||||
//Current privilege level
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
privilege_level <= MACHINE_PRIVILEGE;
|
||||
else
|
||||
privilege_level <= next_privilege_level;
|
||||
end
|
||||
assign current_privilege = privilege_level;
|
||||
|
||||
assign mstatus_write_mask = swrite ? sstatus_mask : mstatus_mask;
|
||||
|
||||
always_comb begin
|
||||
mstatus_new = mstatus;
|
||||
case (mstatus_case) inside
|
||||
MSTATUS_WRITE : mstatus_new = (mstatus & ~mstatus_write_mask) | (updated_csr & mstatus_write_mask);
|
||||
MSTATUS_MRET : begin
|
||||
mstatus_new.mie = mstatus.mpie;
|
||||
mstatus_new.mpie = 1;
|
||||
mstatus_new.mpp = CONFIG.INCLUDE_U_MODE ? USER_PRIVILEGE : MACHINE_PRIVILEGE;
|
||||
if (mstatus.mpp != MACHINE_PRIVILEGE)
|
||||
mstatus_new.mprv = 0;
|
||||
end
|
||||
MSTATUS_SRET : begin
|
||||
mstatus_new.sie = mstatus.spie;
|
||||
mstatus_new.spie = 1;
|
||||
mstatus_new.spp = USER_PRIVILEGE[0];
|
||||
mstatus_new.mprv = 0;
|
||||
end
|
||||
MSTATUS_INTERRUPT, MSTATUS_EXCEPTION : begin
|
||||
if (next_privilege_level == SUPERVISOR_PRIVILEGE) begin
|
||||
mstatus_new.spie = (privilege_level == SUPERVISOR_PRIVILEGE) ? mstatus.sie : 0;
|
||||
mstatus_new.sie = 0;
|
||||
mstatus_new.spp = privilege_level[0]; //one if from supervisor-mode, zero if from user-mode
|
||||
end
|
||||
else begin
|
||||
mstatus_new.mpie = (privilege_level == MACHINE_PRIVILEGE) ? mstatus.mie : ((privilege_level == SUPERVISOR_PRIVILEGE) ? mstatus.sie : 0);
|
||||
mstatus_new.mie = 0;
|
||||
mstatus_new.mpp = privilege_level; //machine,supervisor or user
|
||||
end
|
||||
end
|
||||
default : mstatus_new = mstatus;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mstatus <= '{default:0, mpp:MACHINE_PRIVILEGE};
|
||||
else
|
||||
mstatus <= mstatus_new;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MTVEC
|
||||
//No vectored mode, mode hard-coded to zero
|
||||
initial mtvec[31:2] = CONFIG.CSRS.RESET_MTVEC[31:2];
|
||||
always_ff @(posedge clk) begin
|
||||
mtvec[1:0] <= '0;
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.MTVEC_WRITEABLE & mwrite_en(MTVEC))
|
||||
mtvec[31:2] <= updated_csr[31:2];
|
||||
end
|
||||
assign exception_target_pc = mtvec;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MEDELEG
|
||||
localparam logic [31:0] medeleg_mask = init_medeleg_mask();
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
medeleg <= '0;
|
||||
else if (mwrite_en(MEDELEG) & CONFIG.INCLUDE_S_MODE)
|
||||
medeleg <= (updated_csr & medeleg_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MIDELEG
|
||||
localparam logic [31:0] mideleg_mask = init_mideleg_mask();
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mideleg <= '0;
|
||||
else if (mwrite_en(MIDELEG) & CONFIG.INCLUDE_S_MODE)
|
||||
mideleg <= (updated_csr & mideleg_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MIP
|
||||
localparam mip_t mip_mask = '{default:0, meip:1, seip:CONFIG.INCLUDE_S_MODE, mtip:1, stip:CONFIG.INCLUDE_S_MODE, msip:1, ssip:CONFIG.INCLUDE_S_MODE};
|
||||
localparam mip_t mip_w_mask = '{default:0, seip:CONFIG.INCLUDE_S_MODE, stip:CONFIG.INCLUDE_S_MODE, ssip:CONFIG.INCLUDE_S_MODE};
|
||||
|
||||
always_comb begin
|
||||
mip_new = '0;
|
||||
mip_new.ssip = s_interrupt.software;
|
||||
mip_new.stip = s_interrupt.timer;
|
||||
mip_new.seip = s_interrupt.external;
|
||||
|
||||
mip_new.msip = m_interrupt.software;
|
||||
mip_new.mtip = m_interrupt.timer;
|
||||
mip_new.meip = m_interrupt.external;
|
||||
|
||||
mip_new &= mip_mask;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mip <= 0;
|
||||
else if (mwrite_en(MIP) | (|mip_new))
|
||||
mip <= (updated_csr & mip_w_mask) | mip_new;
|
||||
end
|
||||
assign interrupt_pending = |(mip & mie) & mstatus.mie;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MIE
|
||||
localparam mie_t mie_mask = '{default:0, meie:1, seie:CONFIG.INCLUDE_S_MODE, mtie:1, stie:CONFIG.INCLUDE_S_MODE, msie:1, ssie:CONFIG.INCLUDE_S_MODE};
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mie <= '0;
|
||||
else if (mwrite_en(MIE) | swrite_en(SIE))
|
||||
mie <= updated_csr & (swrite ? sie_mask : mie_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MEPC
|
||||
//Can be software written, written on exception with
|
||||
//exception causing PC. Lower two bits tied to zero.
|
||||
always_ff @(posedge clk) begin
|
||||
mepc[1:0] <= '0;
|
||||
if (mwrite_en(MEPC) | exception.valid | interrupt_taken)
|
||||
mepc[31:2] <= (exception.valid | interrupt_taken) ? exception.pc[31:2] : updated_csr[31:2];
|
||||
end
|
||||
assign epc = mepc;
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MCAUSE
|
||||
//As the exception and interrupts codes are sparsely populated,
|
||||
//to ensure that only legal values are written, a ROM lookup
|
||||
//is used to validate the CSR write operation
|
||||
localparam logic [2**ECODE_W-1:0] M_EXCEPTION_MASKING_ROM = init_exception_masking_rom();
|
||||
localparam logic [2**ECODE_W-1:0] M_INTERRUPT_MASKING_ROM = init_interrupt_masking_rom();
|
||||
|
||||
logic mcause_write_valid;
|
||||
always_comb begin
|
||||
if (updated_csr[31]) //interrupt
|
||||
mcause_write_valid = M_INTERRUPT_MASKING_ROM[updated_csr[ECODE_W-1:0]];
|
||||
else
|
||||
mcause_write_valid = M_EXCEPTION_MASKING_ROM[updated_csr[ECODE_W-1:0]];
|
||||
end
|
||||
|
||||
mip_t mip_cause;
|
||||
logic [5:0] mip_priority_vector;
|
||||
logic [2:0] mip_cause_sel;
|
||||
|
||||
localparam logic [ECODE_W-1:0] interruput_code_table [7:0] = '{ 0, 0,
|
||||
M_EXTERNAL_INTERRUPT, M_TIMER_INTERRUPT, M_SOFTWARE_INTERRUPT,
|
||||
S_EXTERNAL_INTERRUPT, S_TIMER_INTERRUPT, S_SOFTWARE_INTERRUPT
|
||||
};
|
||||
assign mip_cause = (mip & mie);
|
||||
assign mip_priority_vector = '{mip_cause.meip, mip_cause.mtip, mip_cause.msip, mip_cause.seip, mip_cause.stip, mip_cause.ssip};
|
||||
|
||||
priority_encoder #(.WIDTH(6))
|
||||
interrupt_cause_encoder (
|
||||
.priority_vector (mip_priority_vector),
|
||||
.encoded_result (mip_cause_sel)
|
||||
);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (interrupt_pending)
|
||||
interrupt_cause_r <= interruput_code_table[mip_cause_sel];
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
mcause.zeroes <= '0;
|
||||
if (rst) begin
|
||||
mcause.is_interrupt <= 0;
|
||||
mcause.code <= 0;
|
||||
end
|
||||
else if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MCAUSE & ((mcause_write_valid & mwrite_en(MCAUSE)) | exception.valid | interrupt_taken)) begin
|
||||
mcause.is_interrupt <= interrupt_taken | (mwrite_en(MCAUSE) & updated_csr[31]);
|
||||
mcause.code <= interrupt_taken ? interrupt_cause_r : exception.valid ? exception.code : updated_csr[ECODE_W-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MTVAL
|
||||
always_ff @(posedge clk) begin
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MTVAL & (mwrite_en(MTVAL) | exception.valid))
|
||||
mtval <= exception.valid ? exception.tval : updated_csr;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//MSCRATCH
|
||||
always_ff @(posedge clk) begin
|
||||
if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MSCRATCH & mwrite_en(MSCRATCH))
|
||||
mscratch <= updated_csr;
|
||||
end
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//END OF MACHINE REGS
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//BEGIN OF SUPERVISOR REGS
|
||||
////////////////////////////////////////////////////
|
||||
logic[31:0] sepc;
|
||||
|
||||
logic[31:0] stime;
|
||||
logic[31:0] stimecmp;
|
||||
|
||||
logic[31:0] scause;
|
||||
logic[31:0] stval;
|
||||
|
||||
logic[31:0] sstatus;
|
||||
logic[31:0] stvec;
|
||||
|
||||
satp_t satp;
|
||||
|
||||
logic[31:0] sscratch;
|
||||
|
||||
//TLB status --- used to mux physical/virtual address
|
||||
assign tlb_on = CONFIG.INCLUDE_S_MODE & satp.mode;
|
||||
assign asid = satp.asid;
|
||||
//******************
|
||||
|
||||
generate if (CONFIG.INCLUDE_S_MODE) begin : gen_csr_s_mode
|
||||
////////////////////////////////////////////////////
|
||||
//MMU interface
|
||||
assign immu.mxr = mstatus.mxr;
|
||||
assign dmmu.mxr = mstatus.mxr;
|
||||
assign immu.sum = mstatus.sum;
|
||||
assign dmmu.sum = mstatus.sum;
|
||||
assign immu.privilege = privilege_level;
|
||||
assign dmmu.privilege = mstatus.mprv ? mstatus.mpp : privilege_level;
|
||||
assign immu.satp_ppn = satp.ppn;
|
||||
assign dmmu.satp_ppn = satp.ppn;
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//STVEC
|
||||
logic [31:0] stvec_mask = '1;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
stvec <= {CONFIG.CSRS.RESET_VEC[31:2], 2'b00};
|
||||
else if (swrite_en(STVEC))
|
||||
stvec <= (updated_csr & stvec_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//SATP
|
||||
logic[31:0] satp_mask;
|
||||
assign satp_mask = '1;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
satp <= 0;
|
||||
else if (swrite_en(SATP))
|
||||
satp <= (updated_csr & satp_mask);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//SSCRATCH
|
||||
always_ff @(posedge clk) begin
|
||||
if (swrite_en(SSCRATCH))
|
||||
sscratch <= updated_csr;
|
||||
end
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//END OF SUPERVISOR REGS
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Timers and Counters
|
||||
//Register increment for instructions completed
|
||||
//Increments suppressed on writes to these registers
|
||||
localparam COUNTER_W = CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W;
|
||||
localparam MCYCLE_WRITEABLE = CONFIG.CSRS.NON_STANDARD_OPTIONS.MCYCLE_WRITEABLE;
|
||||
localparam MINSTR_WRITEABLE = CONFIG.CSRS.NON_STANDARD_OPTIONS.MINSTR_WRITEABLE;
|
||||
|
||||
logic[COUNTER_W-1:0] mcycle;
|
||||
logic[COUNTER_W-1:0] mtime;
|
||||
logic[COUNTER_W-1:0] minst_ret;
|
||||
|
||||
logic[COUNTER_W-1:0] mcycle_input_next;
|
||||
logic[COUNTER_W-1:0] minst_ret_input_next;
|
||||
logic[LOG2_RETIRE_PORTS:0] minst_ret_inc;
|
||||
logic mcycle_inc;
|
||||
|
||||
assign mcycle_input_next[31:0] = (MCYCLE_WRITEABLE & mwrite_en(MCYCLE)) ? updated_csr : mcycle[31:0];
|
||||
assign mcycle_input_next[COUNTER_W-1:32] = (MCYCLE_WRITEABLE & mwrite_en(MCYCLE)) ? updated_csr[COUNTER_W-33:0] : mcycle[COUNTER_W-1:32];
|
||||
assign mcycle_inc = ~(MCYCLE_WRITEABLE & (mwrite_en(MCYCLE) | mwrite_en(MCYCLEH)));
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
mcycle <= 0;
|
||||
else
|
||||
mcycle <= mcycle_input_next + COUNTER_W'(mcycle_inc);
|
||||
end
|
||||
|
||||
assign minst_ret_input_next[31:0] = (MINSTR_WRITEABLE & mwrite_en(MINSTRET)) ? updated_csr : minst_ret[31:0];
|
||||
assign minst_ret_input_next[COUNTER_W-1:32] = (MINSTR_WRITEABLE & mwrite_en(MINSTRET)) ? updated_csr[COUNTER_W-33:0] : minst_ret[COUNTER_W-1:32];
|
||||
assign minst_ret_inc = (MINSTR_WRITEABLE & (mwrite_en(MINSTRET) | mwrite_en(MINSTRETH))) ? '0 : retire_count;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
minst_ret <= 0;
|
||||
else
|
||||
minst_ret <= minst_ret_input_next + COUNTER_W'(minst_ret_inc);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Floating-Point status register
|
||||
//Contains 5 exception flags (invalid, inexact, overflow, underflow, divide by zero)
|
||||
//Also contains dynamic rounding mode (round to zero, round to +infinity, round to -infinity, round to nearest ties to even, round to nearest ties away)
|
||||
//These fields can be accessed individually or simultaneously through different addresses
|
||||
logic[2:0] frm;
|
||||
logic[4:0] fflags;
|
||||
assign dyn_rm = frm;
|
||||
|
||||
generate if (CONFIG.INCLUDE_UNIT.FPU) begin : gen_csr_fp
|
||||
typedef enum logic[1:0] {
|
||||
WRITE_NONE = 2'b00,
|
||||
WRITE_FFLAGS = 2'b01,
|
||||
WRITE_FRM = 2'b10,
|
||||
WRITE_BOTH = 2'b11
|
||||
} fcsr_write_t;
|
||||
fcsr_write_t fcsr_write_type;
|
||||
|
||||
always_comb begin
|
||||
case (csr_inputs_r.addr) inside
|
||||
FFLAGS : fcsr_write_type = WRITE_FFLAGS;
|
||||
FRM : fcsr_write_type = WRITE_FRM;
|
||||
FCSR : fcsr_write_type = WRITE_BOTH;
|
||||
default : fcsr_write_type = WRITE_NONE;
|
||||
endcase
|
||||
end
|
||||
|
||||
//Older versions of the spec mandated an illegal instruction exception if an instruction
|
||||
//with the dynamic rounding mode was issued and the frm register contained an invalid
|
||||
//rounding mode. This has since been changed to "reserved" behaviour, meaning we do not
|
||||
//have to do anything special. In this case, fp_roundup would default to rne
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
frm <= '0;
|
||||
fflags <= '0;
|
||||
end
|
||||
else begin
|
||||
//Explicit writes
|
||||
if (commit) begin
|
||||
case (fcsr_write_type)
|
||||
WRITE_FFLAGS : fflags <= next_csr[4:0];
|
||||
WRITE_FRM : frm <= next_csr[2:0];
|
||||
WRITE_BOTH : {frm, fflags} <= next_csr[7:0];
|
||||
default;
|
||||
endcase
|
||||
end
|
||||
else //Implicit writes (can never overlap explicit writes)
|
||||
fflags <= fflags | fflag_wmask;
|
||||
end
|
||||
end
|
||||
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//CSR mux
|
||||
logic [31:0] read_mask;
|
||||
always_comb begin
|
||||
case (csr_inputs_r.addr) inside
|
||||
SSTATUS : read_mask = CONFIG.INCLUDE_S_MODE ? sstatus_mask : '1;
|
||||
SIE : read_mask = CONFIG.INCLUDE_S_MODE ? sie_mask : '1;
|
||||
SIP : read_mask = CONFIG.INCLUDE_S_MODE ? sip_mask : '1;
|
||||
default : read_mask = '1;
|
||||
endcase
|
||||
end
|
||||
always_comb begin
|
||||
case (csr_inputs_r.addr) inside
|
||||
//Machine info
|
||||
MISA : selected_csr = CONFIG.INCLUDE_M_MODE ? misa : '0;
|
||||
MVENDORID : selected_csr = CONFIG.INCLUDE_M_MODE ? mvendorid : '0;
|
||||
MARCHID : selected_csr = CONFIG.INCLUDE_M_MODE ? marchid : '0;
|
||||
MIMPID : selected_csr = CONFIG.INCLUDE_M_MODE ? mimpid : '0;
|
||||
MHARTID : selected_csr = CONFIG.INCLUDE_M_MODE ? mhartid : '0;
|
||||
//Machine trap setup
|
||||
MSTATUS : selected_csr = CONFIG.INCLUDE_M_MODE ? mstatus : '0;
|
||||
MEDELEG : selected_csr = CONFIG.INCLUDE_M_MODE ? medeleg : '0;
|
||||
MIDELEG : selected_csr = CONFIG.INCLUDE_M_MODE ? mideleg : '0;
|
||||
MIE : selected_csr = CONFIG.INCLUDE_M_MODE ? mie : '0;
|
||||
MTVEC : selected_csr = CONFIG.INCLUDE_M_MODE ? mtvec : '0;
|
||||
MCOUNTEREN : selected_csr = '0;
|
||||
//Machine trap handling
|
||||
MSCRATCH : selected_csr = CONFIG.INCLUDE_M_MODE ? mscratch : '0;
|
||||
MEPC : selected_csr = CONFIG.INCLUDE_M_MODE ? mepc : '0;
|
||||
MCAUSE : selected_csr = CONFIG.INCLUDE_M_MODE ? mcause : '0;
|
||||
MTVAL : selected_csr = CONFIG.INCLUDE_M_MODE ? mtval : '0;
|
||||
MIP : selected_csr = CONFIG.INCLUDE_M_MODE ? mip : '0;
|
||||
//Machine Memory Protection
|
||||
[12'h3EF : 12'h3A0] : selected_csr = '0;
|
||||
//Machine Timers and Counters
|
||||
MCYCLE : selected_csr = CONFIG.INCLUDE_M_MODE ? mcycle[31:0] : '0;
|
||||
MINSTRET : selected_csr = CONFIG.INCLUDE_M_MODE ? minst_ret[31:0] : '0;
|
||||
[12'hB03 : 12'hB1F] : selected_csr = '0;
|
||||
MCYCLEH : selected_csr = CONFIG.INCLUDE_M_MODE ? 32'(mcycle[COUNTER_W-1:32]) : '0;
|
||||
MINSTRETH : selected_csr = CONFIG.INCLUDE_M_MODE ? 32'(minst_ret[COUNTER_W-1:32]) : '0;
|
||||
[12'hB83 : 12'hB9F] : selected_csr = '0;
|
||||
//Machine Counter Setup
|
||||
[12'h320 : 12'h33F] : selected_csr = '0;
|
||||
|
||||
//Supervisor Trap Setup
|
||||
SSTATUS : selected_csr = CONFIG.INCLUDE_S_MODE ? mstatus : '0;
|
||||
SEDELEG : selected_csr = '0; //No user-level interrupts/exception handling
|
||||
SIDELEG : selected_csr = '0;
|
||||
SIE : selected_csr = CONFIG.INCLUDE_S_MODE ? mie : '0;
|
||||
STVEC : selected_csr = CONFIG.INCLUDE_S_MODE ? stvec : '0;
|
||||
SCOUNTEREN : selected_csr = '0;
|
||||
//Supervisor trap handling
|
||||
SSCRATCH : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0;
|
||||
SEPC : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0;
|
||||
SCAUSE : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0;
|
||||
STVAL : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0;
|
||||
SIP : selected_csr = CONFIG.INCLUDE_S_MODE ? mip : '0;
|
||||
//Supervisor Protection and Translation
|
||||
SATP : selected_csr = CONFIG.INCLUDE_S_MODE ? satp : '0;
|
||||
|
||||
//User status
|
||||
//Floating point
|
||||
FFLAGS : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {27'b0, fflags} : '0;
|
||||
FRM : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {29'b0, frm} : '0;
|
||||
FCSR : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {24'b0, frm, fflags} : '0;
|
||||
//User Counter Timers
|
||||
CYCLE : selected_csr = mcycle[31:0];
|
||||
TIME : selected_csr = mcycle[31:0];
|
||||
INSTRET : selected_csr = minst_ret[31:0];
|
||||
[12'hC03 : 12'hC1F] : selected_csr = '0;
|
||||
CYCLEH : selected_csr = 32'(mcycle[COUNTER_W-1:32]);
|
||||
TIMEH : selected_csr = 32'(mcycle[COUNTER_W-1:32]);
|
||||
INSTRETH : selected_csr = 32'(minst_ret[COUNTER_W-1:32]);
|
||||
[12'hC83 : 12'hC9F] : selected_csr = '0;
|
||||
|
||||
default : selected_csr = '0;
|
||||
endcase
|
||||
end
|
||||
always_ff @(posedge clk) begin
|
||||
if (commit)
|
||||
selected_csr_r <= selected_csr & read_mask;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
mstatus_update_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) $onehot0({mret,sret,interrupt_taken, exception.valid,(mwrite_en(MSTATUS) | swrite_en(SSTATUS))})) else $error("multiple write to mstatus");
|
||||
|
||||
endmodule
|
93
core/execution_units/custom_unit.sv
Normal file
93
core/execution_units/custom_unit.sv
Normal file
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright © 2022 Eric Matthews
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module custom_unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit wb
|
||||
);
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
logic [31:0] result;
|
||||
logic done;
|
||||
id_t id;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Simple 2-cycle adder that adds rs1 and rs2
|
||||
//that has a throughput of 1 (so long as the result is accepted by the writeback stage)
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
//The following signals should be asserted when the decoded instruction
|
||||
//is handled by this execution unit.
|
||||
assign unit_needed = decode_stage.instruction inside {CUSTOM};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {CUSTOM};
|
||||
uses_rs[RS2] = decode_stage.instruction inside {CUSTOM};
|
||||
uses_rd = decode_stage.instruction inside {CUSTOM};
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
assign issue.ready = ~wb.done;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
id <= issue.id;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
result <= rf[RS1] + rf[RS2];
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Write-back
|
||||
assign wb.rd = result;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
wb.done <= 0;
|
||||
else
|
||||
wb.done <= (wb.done & ~wb.ack) | issue.new_request;
|
||||
end
|
||||
assign wb.id = id;
|
||||
endmodule
|
|
@ -25,15 +25,30 @@ module div_unit
|
|||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input gc_outputs_t gc,
|
||||
|
||||
input logic instruction_issued_with_rd,
|
||||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
input div_inputs_t div_inputs,
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit wb
|
||||
);
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
logic mult_div_op;
|
||||
|
||||
logic signed_divop;
|
||||
logic negate_quotient;
|
||||
|
@ -52,74 +67,106 @@ module div_unit
|
|||
typedef struct packed{
|
||||
logic remainder_op;
|
||||
logic negate_result;
|
||||
logic divisor_is_zero;
|
||||
logic reuse_result;
|
||||
id_t id;
|
||||
} div_attributes_t;
|
||||
div_attributes_t wb_attr;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN-1:0] unsigned_dividend;
|
||||
logic [XLEN-1:0] unsigned_divisor;
|
||||
logic [$clog2(32)-1:0] dividend_CLZ;
|
||||
logic [$clog2(32)-1:0] divisor_CLZ;
|
||||
logic divisor_is_zero;
|
||||
logic reuse_result;
|
||||
div_attributes_t attr;
|
||||
} div_fifo_inputs_t;
|
||||
|
||||
div_fifo_inputs_t issue_fifo_inputs;
|
||||
div_fifo_inputs_t div_stage;
|
||||
div_attributes_t wb_attr;
|
||||
|
||||
unsigned_division_interface #(.DATA_WIDTH(32)) div_core();
|
||||
|
||||
logic in_progress;
|
||||
logic div_done;
|
||||
|
||||
fifo_interface #(.DATA_WIDTH($bits(div_fifo_inputs_t))) input_fifo();
|
||||
fifo_interface #(.DATA_WIDTH(XLEN)) wb_fifo();
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
fifo_interface #(.DATA_TYPE(div_fifo_inputs_t)) input_fifo();
|
||||
|
||||
function logic [31:0] negate_if (input logic [31:0] a, logic b);
|
||||
return ({32{b}} ^ a) + 32'(b);
|
||||
endfunction
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign unit_needed = decode_stage.instruction inside {DIV, DIVU, REM, REMU};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = unit_needed;
|
||||
uses_rs[RS2] = unit_needed;
|
||||
uses_rd = unit_needed;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Result resuse (for div/rem pairs)
|
||||
rs_addr_t prev_div_rs_addr [2];
|
||||
logic [1:0] div_rd_match;
|
||||
logic prev_div_result_valid;
|
||||
logic div_rs_overwrite;
|
||||
logic div_op_reuse;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
prev_div_rs_addr <= issue_rs_addr[RS1:RS2];
|
||||
end
|
||||
|
||||
assign div_op_reuse = {prev_div_result_valid, prev_div_rs_addr[RS1], prev_div_rs_addr[RS2]} == {1'b1, issue_rs_addr[RS1],issue_rs_addr[RS2]};
|
||||
|
||||
//Clear if prev div inputs are overwritten by another instruction
|
||||
assign div_rd_match[RS1] = (issue_stage.rd_addr == prev_div_rs_addr[RS1]);
|
||||
assign div_rd_match[RS2] = (issue_stage.rd_addr == prev_div_rs_addr[RS2]);
|
||||
assign div_rs_overwrite = |div_rd_match;
|
||||
|
||||
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
|
||||
.clk, .rst,
|
||||
.set(issue.new_request & ~((issue_stage.rd_addr == issue_rs_addr[RS1]) | (issue_stage.rd_addr == issue_rs_addr[RS2]))),
|
||||
.clr((instruction_issued_with_rd & div_rs_overwrite) | gc.writeback_supress), //No instructions will be issued while gc.writeback_supress is asserted
|
||||
.result(prev_div_result_valid)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Input and output sign determination
|
||||
assign signed_divop = ~div_inputs.op[0];
|
||||
assign signed_divop = ~ issue_stage.fn3[0];
|
||||
|
||||
assign negate_dividend = signed_divop & div_inputs.rs1[31];
|
||||
assign negate_divisor = signed_divop & div_inputs.rs2[31];
|
||||
assign negate_dividend = signed_divop & rf[RS1][31];
|
||||
assign negate_divisor = signed_divop & rf[RS2][31];
|
||||
|
||||
assign negate_quotient = signed_divop & (div_inputs.rs1[31] ^ div_inputs.rs2[31]);
|
||||
assign negate_remainder = signed_divop & (div_inputs.rs1[31]);
|
||||
assign negate_quotient = signed_divop & (rf[RS1][31] ^ rf[RS2][31]);
|
||||
assign negate_remainder = signed_divop & (rf[RS1][31]);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Input Processing
|
||||
assign unsigned_dividend = negate_if (div_inputs.rs1, negate_dividend);
|
||||
assign unsigned_divisor = negate_if (div_inputs.rs2, negate_divisor);
|
||||
assign unsigned_dividend = negate_if (rf[RS1], negate_dividend);
|
||||
assign unsigned_divisor = negate_if (rf[RS2], negate_divisor);
|
||||
|
||||
//Note: If this becomes the critical path, we can use the one's complemented input instead.
|
||||
//It will potentially overestimate (only when the input is a negative power-of-two), and
|
||||
//the divisor width will need to be increased by one to safely handle the case where the divisor CLZ is overestimated
|
||||
clz dividend_clz_block (.clz_input(unsigned_dividend), .clz(dividend_CLZ));
|
||||
clz divisor_clz_block (.clz_input(unsigned_divisor), .clz(divisor_CLZ));
|
||||
assign divisor_is_zero = (&divisor_CLZ) & ~div_inputs.rs2[0];
|
||||
|
||||
assign issue_fifo_inputs.unsigned_dividend = unsigned_dividend;
|
||||
assign issue_fifo_inputs.unsigned_divisor = unsigned_divisor;
|
||||
assign issue_fifo_inputs.dividend_CLZ = divisor_is_zero ? '0 : dividend_CLZ;
|
||||
assign issue_fifo_inputs.divisor_CLZ = divisor_CLZ;
|
||||
|
||||
assign issue_fifo_inputs.attr.remainder_op = div_inputs.op[1];
|
||||
assign issue_fifo_inputs.attr.negate_result = div_inputs.op[1] ? negate_remainder : (negate_quotient & ~divisor_is_zero);
|
||||
assign issue_fifo_inputs.attr.divisor_is_zero = divisor_is_zero;
|
||||
assign issue_fifo_inputs.attr.reuse_result = div_inputs.reuse_result;
|
||||
assign issue_fifo_inputs.attr.id = issue.id;
|
||||
clz #(.WIDTH(32)) dividend_clz_block (
|
||||
.clz_input(unsigned_dividend),
|
||||
.clz(dividend_CLZ),
|
||||
.zero()
|
||||
);
|
||||
clz #(.WIDTH(32)) divisor_clz_block (
|
||||
.clz_input(unsigned_divisor),
|
||||
.clz(divisor_CLZ),
|
||||
.zero(divisor_is_zero)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Input FIFO
|
||||
//Currently just a register (DEPTH=1). As one div instruction can be in-progress
|
||||
//and one in this input "fifo," we can support two in-flight div ops.
|
||||
cva5_fifo #(.DATA_WIDTH($bits(div_fifo_inputs_t)), .FIFO_DEPTH(1))
|
||||
cva5_fifo #(.DATA_TYPE(div_fifo_inputs_t), .FIFO_DEPTH(1))
|
||||
div_input_fifo (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
|
@ -129,17 +176,28 @@ module div_unit
|
|||
logic div_ready;
|
||||
assign div_ready = (~in_progress) | wb.ack;
|
||||
|
||||
assign input_fifo.data_in = issue_fifo_inputs;
|
||||
assign input_fifo.data_in = '{
|
||||
unsigned_dividend : unsigned_dividend,
|
||||
unsigned_divisor : unsigned_divisor,
|
||||
dividend_CLZ : divisor_is_zero ? '0 : dividend_CLZ,
|
||||
divisor_CLZ : divisor_CLZ,
|
||||
divisor_is_zero : divisor_is_zero,
|
||||
reuse_result : div_op_reuse,
|
||||
attr : '{
|
||||
remainder_op : issue_stage.fn3[1],
|
||||
negate_result : (issue_stage.fn3[1] ? negate_remainder : (negate_quotient & ~divisor_is_zero)),
|
||||
id : issue.id
|
||||
}
|
||||
};
|
||||
assign input_fifo.push = issue.new_request;
|
||||
assign input_fifo.potential_push = issue.possible_issue;
|
||||
assign issue.ready = ~input_fifo.full | (~in_progress);
|
||||
assign input_fifo.pop = input_fifo.valid & div_ready;
|
||||
assign div_stage = input_fifo.data_out;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Control Signals
|
||||
assign div_core.start = input_fifo.pop & ~div_stage.attr.reuse_result;
|
||||
assign div_done = div_core.done | (input_fifo.pop & div_stage.attr.reuse_result);
|
||||
assign div_core.start = input_fifo.pop & ~input_fifo.data_out.reuse_result;
|
||||
assign div_done = div_core.done | (input_fifo.pop & input_fifo.data_out.reuse_result);
|
||||
|
||||
//If more than one cycle, set in_progress so that multiple div.start signals are not sent to the div unit.
|
||||
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE('0))
|
||||
|
@ -151,16 +209,16 @@ module div_unit
|
|||
);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (input_fifo.pop)
|
||||
wb_attr <= div_stage.attr;
|
||||
wb_attr <= input_fifo.data_out.attr;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Div core
|
||||
assign div_core.dividend = div_stage.unsigned_dividend;
|
||||
assign div_core.divisor = div_stage.unsigned_divisor;
|
||||
assign div_core.dividend_CLZ = div_stage.dividend_CLZ;
|
||||
assign div_core.divisor_CLZ = div_stage.divisor_CLZ;
|
||||
assign div_core.divisor_is_zero = div_stage.attr.divisor_is_zero;
|
||||
assign div_core.dividend = input_fifo.data_out.unsigned_dividend;
|
||||
assign div_core.divisor = input_fifo.data_out.unsigned_divisor;
|
||||
assign div_core.dividend_CLZ = input_fifo.data_out.dividend_CLZ;
|
||||
assign div_core.divisor_CLZ = input_fifo.data_out.divisor_CLZ;
|
||||
assign div_core.divisor_is_zero = input_fifo.data_out.divisor_is_zero;
|
||||
|
||||
div_core #(.DIV_WIDTH(32))
|
||||
divider_block (
|
73
core/execution_units/fp_unit/divider/carry_save_shift.sv
Normal file
73
core/execution_units/fp_unit/divider/carry_save_shift.sv
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Copyright © 2023 Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module carry_save_shift
|
||||
|
||||
import fpu_types::*;
|
||||
|
||||
#(
|
||||
parameter WIDTH = 32 //Includes the integer bit
|
||||
)(
|
||||
input logic[WIDTH-1:0] four_wsum, //Shifted twice by the CALLER (because of special initialization)
|
||||
input logic[WIDTH-3:0] wcarry,
|
||||
input logic[WIDTH-4:0] divisor,
|
||||
|
||||
output logic[WIDTH-3:0] next_wsum,
|
||||
output logic[WIDTH-3:0] next_wcarry,
|
||||
output q_t next_q,
|
||||
output logic not_in_table //Only used for assertion
|
||||
);
|
||||
|
||||
logic[WIDTH-1:0] four_wcarry;
|
||||
assign four_wcarry = {wcarry, 1'b0, (next_q == POS_ONE || next_q == POS_TWO)}; //Include the carry in from converting -qd to 2s complement here
|
||||
|
||||
logic[WIDTH-3:0] neg_q_d;
|
||||
always_comb begin
|
||||
if (next_q == POS_TWO || next_q == NEG_TWO)
|
||||
neg_q_d = {divisor, 1'b0};
|
||||
else if (next_q == ZERO)
|
||||
neg_q_d = '0;
|
||||
else
|
||||
neg_q_d = {1'b0, divisor};
|
||||
|
||||
if (next_q == POS_ONE || next_q == POS_TWO)
|
||||
neg_q_d = ~neg_q_d;
|
||||
end
|
||||
|
||||
q_lookup lut (
|
||||
.d(divisor[WIDTH-5 -: 3]),
|
||||
.ws(four_wsum[WIDTH-1 -: 7]),
|
||||
.wc(four_wcarry[WIDTH-1 -: 7]),
|
||||
.q(next_q),
|
||||
.not_in_table(not_in_table)
|
||||
);
|
||||
|
||||
generate for (genvar i = 0; i < WIDTH-3; i++) begin : gen_carry_save_adder
|
||||
assign {next_wcarry[i+1], next_wsum[i]} = four_wsum[i] + four_wcarry[i] + neg_q_d[i];
|
||||
end endgenerate
|
||||
|
||||
//Last adder - ignore the carry out
|
||||
assign next_wsum[WIDTH-3] = four_wsum[WIDTH-3] + four_wcarry[WIDTH-3] + neg_q_d[WIDTH-3];
|
||||
|
||||
assign next_wcarry[0] = 0;
|
||||
|
||||
endmodule
|
174
core/execution_units/fp_unit/divider/fp_div_core.sv
Normal file
174
core/execution_units/fp_unit/divider/fp_div_core.sv
Normal file
|
@ -0,0 +1,174 @@
|
|||
/*
|
||||
* Copyright © 2023 Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_div_core
|
||||
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
unsigned_division_interface.divider div
|
||||
);
|
||||
|
||||
localparam DIV_WIDTH = div.DATA_WIDTH;
|
||||
localparam COUNTER_WIDTH = $clog2((1+DIV_WIDTH)/2+3);
|
||||
localparam QUOTIENT_WIDTH = 2*((1+DIV_WIDTH)/2)+2;
|
||||
localparam DECIMAL_WIDTH = DIV_WIDTH+1;
|
||||
localparam RESIDUE_WIDTH = DIV_WIDTH+3;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Radix 4 divider
|
||||
//Follows the design in "Digital Arithmetic" by Ercegovac and Lang
|
||||
//Uses the digit set {-2, -1, 0, 1, 2}
|
||||
logic[RESIDUE_WIDTH-1:0] four_wsum; //Shifted left twice
|
||||
logic[DECIMAL_WIDTH-1:0] wcarry;
|
||||
logic[DECIMAL_WIDTH-2:0] divisor_r;
|
||||
logic[DECIMAL_WIDTH-1:0] next_wsum;
|
||||
logic[DECIMAL_WIDTH-1:0] next_wcarry;
|
||||
|
||||
q_t current_q;
|
||||
q_t next_q;
|
||||
q_t muxed_q;
|
||||
|
||||
logic[QUOTIENT_WIDTH-1:0] quotient;
|
||||
logic[QUOTIENT_WIDTH-1:0] quotient_m;
|
||||
logic[QUOTIENT_WIDTH-1:0] next_quotient;
|
||||
logic[QUOTIENT_WIDTH-1:0] next_quotient_m;
|
||||
|
||||
//Assertions
|
||||
logic decremented_invalid;
|
||||
logic bad_quotient_digit;
|
||||
logic not_in_table;
|
||||
|
||||
//Control logic
|
||||
logic [COUNTER_WIDTH-1:0] counter;
|
||||
logic counter_full;
|
||||
assign counter_full = counter == COUNTER_WIDTH'((1+DIV_WIDTH)/2+2);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
counter <= '0;
|
||||
div.done <= 0;
|
||||
end
|
||||
else begin
|
||||
div.done <= counter_full;
|
||||
if (counter_full)
|
||||
counter <= '0;
|
||||
else if (div.start | |counter)
|
||||
counter <= counter + 1;
|
||||
end
|
||||
end
|
||||
|
||||
//Iterate over the digits
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
divisor_r <= '0;
|
||||
four_wsum <= '0;
|
||||
wcarry <= '0;
|
||||
quotient <= '0;
|
||||
quotient_m <= '0;
|
||||
current_q <= ZERO;
|
||||
end
|
||||
else begin
|
||||
if (div.start) begin
|
||||
divisor_r <= div.divisor;
|
||||
four_wsum <= {3'b0, div.dividend}; //First iteration doesn't shift the inputs
|
||||
current_q <= ZERO;
|
||||
wcarry <= '0;
|
||||
quotient <= '0;
|
||||
quotient_m <= '0;
|
||||
end
|
||||
else if (|counter) begin
|
||||
current_q <= next_q;
|
||||
four_wsum <= {next_wsum, 2'b0};
|
||||
wcarry <= next_wcarry;
|
||||
quotient <= next_quotient;
|
||||
quotient_m <= next_quotient_m;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign div.quotient = quotient[QUOTIENT_WIDTH-2 -: DIV_WIDTH]; //Shift only once instead of twice because inputs are in the range 0.1X but the output can be X.XX
|
||||
|
||||
|
||||
//Carry save adder operating on shifted input
|
||||
carry_save_shift #(.WIDTH(RESIDUE_WIDTH)) partial_sum (
|
||||
.four_wsum(four_wsum),
|
||||
.wcarry(wcarry),
|
||||
.divisor(divisor_r),
|
||||
.next_wsum(next_wsum),
|
||||
.next_wcarry(next_wcarry),
|
||||
.next_q(next_q),
|
||||
.not_in_table(not_in_table)
|
||||
);
|
||||
|
||||
//Digit conversion
|
||||
on_the_fly #(.WIDTH(QUOTIENT_WIDTH)) quotient_conv (
|
||||
.current_Q(quotient),
|
||||
.current_QM(quotient_m),
|
||||
.q(muxed_q),
|
||||
.next_Q(next_quotient),
|
||||
.next_QM(next_quotient_m),
|
||||
.bad_quotient_digit(bad_quotient_digit)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Sign/zero detection using an adder
|
||||
//The alternative is a tree of generate/propagate blocks (see page 265 of "Digital Arithmetic" by Ercegovac and Lang)
|
||||
//For a 55 bit width, both have very similar delays but the tree uses slightly more resources
|
||||
logic is_negative;
|
||||
logic[DECIMAL_WIDTH-1:0] sz_sum;
|
||||
assign sz_sum = four_wsum[RESIDUE_WIDTH-1:2] + wcarry;
|
||||
assign is_negative = sz_sum[DECIMAL_WIDTH-1];
|
||||
|
||||
always_comb begin
|
||||
div.remainder = sz_sum[DIV_WIDTH-1:0];
|
||||
|
||||
muxed_q = current_q;
|
||||
decremented_invalid = 0;
|
||||
if (counter_full & is_negative) begin //Subtract 1
|
||||
unique case (current_q)
|
||||
POS_TWO: muxed_q = POS_ONE;
|
||||
POS_ONE: muxed_q = ZERO;
|
||||
ZERO: muxed_q = NEG_ONE;
|
||||
NEG_ONE: muxed_q = NEG_TWO;
|
||||
NEG_TWO: muxed_q = NEG_THREE;
|
||||
default: decremented_invalid = 1; //For assertions only
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
//Assertions
|
||||
decrement_bad_quotient_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) (!(decremented_invalid)))
|
||||
else $error("Invalid decrement of quotient digit");
|
||||
|
||||
decoding_bad_digit_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) (!(|counter & bad_quotient_digit)))
|
||||
else $error("Bad quotient digit for decoding");
|
||||
|
||||
missed_lut_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) (!(|counter & not_in_table)))
|
||||
else $error("Sum out of range of quotient lookup");
|
||||
|
||||
endmodule
|
81
core/execution_units/fp_unit/divider/on_the_fly.sv
Normal file
81
core/execution_units/fp_unit/divider/on_the_fly.sv
Normal file
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright © 2023 Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module on_the_fly
|
||||
|
||||
import fpu_types::*;
|
||||
|
||||
#(
|
||||
parameter WIDTH = 32
|
||||
)(
|
||||
input logic[WIDTH-1:0] current_Q,
|
||||
input logic[WIDTH-1:0] current_QM,
|
||||
input q_t q,
|
||||
output logic[WIDTH-1:0] next_Q,
|
||||
output logic[WIDTH-1:0] next_QM,
|
||||
output logic bad_quotient_digit //Only used for assertion
|
||||
);
|
||||
|
||||
logic[1:0] qin;
|
||||
logic[1:0] qmin;
|
||||
always_comb begin
|
||||
bad_quotient_digit = 0;
|
||||
unique case (q)
|
||||
POS_TWO,
|
||||
NEG_TWO: begin
|
||||
qin = 2'b10;
|
||||
qmin = 2'b01;
|
||||
end
|
||||
NEG_ONE: begin
|
||||
qin = 2'b11;
|
||||
qmin = 2'b10;
|
||||
end
|
||||
ZERO: begin
|
||||
qin = 2'b00;
|
||||
qmin = 2'b11;
|
||||
end
|
||||
NEG_THREE,
|
||||
POS_ONE: begin
|
||||
qin = 2'b01;
|
||||
qmin = 2'b00;
|
||||
end
|
||||
default: begin
|
||||
qin = 2'bXX;
|
||||
qmin = 2'bXX;
|
||||
bad_quotient_digit = 1;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
if (q == NEG_TWO || q == NEG_ONE || q == NEG_THREE)
|
||||
next_Q = {current_QM[WIDTH-3:0], qin};
|
||||
else
|
||||
next_Q = {current_Q[WIDTH-3:0], qin};
|
||||
|
||||
if (q == NEG_TWO || q == NEG_ONE || q == ZERO)
|
||||
next_QM = {current_QM[WIDTH-3:0], qmin};
|
||||
else
|
||||
next_QM = {current_Q[WIDTH-3:0], qmin};
|
||||
end
|
||||
|
||||
endmodule
|
745
core/execution_units/fp_unit/divider/q_lookup.sv
Normal file
745
core/execution_units/fp_unit/divider/q_lookup.sv
Normal file
|
@ -0,0 +1,745 @@
|
|||
/*
|
||||
* Copyright © 2023 Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module q_lookup
|
||||
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic[2:0] d,
|
||||
input logic[6:0] ws,
|
||||
input logic[6:0] wc,
|
||||
output q_t q,
|
||||
output logic not_in_table //Only used for assertion
|
||||
);
|
||||
|
||||
logic[6:0] combined;
|
||||
assign combined = ws + wc;
|
||||
|
||||
always_comb begin
|
||||
not_in_table = 0;
|
||||
//Table contents from "Digital Arithmetic" by Ercegovac and Lang
|
||||
unique case ({d, combined})
|
||||
10'b0001010100: q = NEG_TWO;
|
||||
10'b0001010101: q = NEG_TWO;
|
||||
10'b0001010110: q = NEG_TWO;
|
||||
10'b0001010111: q = NEG_TWO;
|
||||
10'b0001011000: q = NEG_TWO;
|
||||
10'b0001011001: q = NEG_TWO;
|
||||
10'b0001011010: q = NEG_TWO;
|
||||
10'b0001011011: q = NEG_TWO;
|
||||
10'b0001011100: q = NEG_TWO;
|
||||
10'b0001011101: q = NEG_TWO;
|
||||
10'b0001011110: q = NEG_TWO;
|
||||
10'b0001011111: q = NEG_TWO;
|
||||
10'b0001100000: q = NEG_TWO;
|
||||
10'b0001100001: q = NEG_TWO;
|
||||
10'b0001100010: q = NEG_TWO;
|
||||
10'b0001100011: q = NEG_TWO;
|
||||
10'b0001100100: q = NEG_TWO;
|
||||
10'b0001100101: q = NEG_TWO;
|
||||
10'b0001100110: q = NEG_TWO;
|
||||
10'b0001100111: q = NEG_TWO;
|
||||
10'b0001101000: q = NEG_TWO;
|
||||
10'b0001101001: q = NEG_TWO;
|
||||
10'b0001101010: q = NEG_TWO;
|
||||
10'b0001101011: q = NEG_TWO;
|
||||
10'b0001101100: q = NEG_TWO;
|
||||
10'b0001101101: q = NEG_TWO;
|
||||
10'b0001101110: q = NEG_TWO;
|
||||
10'b0001101111: q = NEG_TWO;
|
||||
10'b0001110000: q = NEG_TWO;
|
||||
10'b0001110001: q = NEG_TWO;
|
||||
10'b0001110010: q = NEG_TWO;
|
||||
10'b0001110011: q = NEG_ONE;
|
||||
10'b0001110100: q = NEG_ONE;
|
||||
10'b0001110101: q = NEG_ONE;
|
||||
10'b0001110110: q = NEG_ONE;
|
||||
10'b0001110111: q = NEG_ONE;
|
||||
10'b0001111000: q = NEG_ONE;
|
||||
10'b0001111001: q = NEG_ONE;
|
||||
10'b0001111010: q = NEG_ONE;
|
||||
10'b0001111011: q = NEG_ONE;
|
||||
10'b0001111100: q = ZERO;
|
||||
10'b0001111101: q = ZERO;
|
||||
10'b0001111110: q = ZERO;
|
||||
10'b0001111111: q = ZERO;
|
||||
10'b0000000000: q = ZERO;
|
||||
10'b0000000001: q = ZERO;
|
||||
10'b0000000010: q = ZERO;
|
||||
10'b0000000011: q = ZERO;
|
||||
10'b0000000100: q = POS_ONE;
|
||||
10'b0000000101: q = POS_ONE;
|
||||
10'b0000000110: q = POS_ONE;
|
||||
10'b0000000111: q = POS_ONE;
|
||||
10'b0000001000: q = POS_ONE;
|
||||
10'b0000001001: q = POS_ONE;
|
||||
10'b0000001010: q = POS_ONE;
|
||||
10'b0000001011: q = POS_ONE;
|
||||
10'b0000001100: q = POS_TWO;
|
||||
10'b0000001101: q = POS_TWO;
|
||||
10'b0000001110: q = POS_TWO;
|
||||
10'b0000001111: q = POS_TWO;
|
||||
10'b0000010000: q = POS_TWO;
|
||||
10'b0000010001: q = POS_TWO;
|
||||
10'b0000010010: q = POS_TWO;
|
||||
10'b0000010011: q = POS_TWO;
|
||||
10'b0000010100: q = POS_TWO;
|
||||
10'b0000010101: q = POS_TWO;
|
||||
10'b0000010110: q = POS_TWO;
|
||||
10'b0000010111: q = POS_TWO;
|
||||
10'b0000011000: q = POS_TWO;
|
||||
10'b0000011001: q = POS_TWO;
|
||||
10'b0000011010: q = POS_TWO;
|
||||
10'b0000011011: q = POS_TWO;
|
||||
10'b0000011100: q = POS_TWO;
|
||||
10'b0000011101: q = POS_TWO;
|
||||
10'b0000011110: q = POS_TWO;
|
||||
10'b0000011111: q = POS_TWO;
|
||||
10'b0000100000: q = POS_TWO;
|
||||
10'b0000100001: q = POS_TWO;
|
||||
10'b0000100010: q = POS_TWO;
|
||||
10'b0000100011: q = POS_TWO;
|
||||
10'b0000100100: q = POS_TWO;
|
||||
10'b0000100101: q = POS_TWO;
|
||||
10'b0000100110: q = POS_TWO;
|
||||
10'b0000100111: q = POS_TWO;
|
||||
10'b0000101000: q = POS_TWO;
|
||||
10'b0000101001: q = POS_TWO;
|
||||
10'b0000101010: q = POS_TWO;
|
||||
10'b0011010100: q = NEG_TWO;
|
||||
10'b0011010101: q = NEG_TWO;
|
||||
10'b0011010110: q = NEG_TWO;
|
||||
10'b0011010111: q = NEG_TWO;
|
||||
10'b0011011000: q = NEG_TWO;
|
||||
10'b0011011001: q = NEG_TWO;
|
||||
10'b0011011010: q = NEG_TWO;
|
||||
10'b0011011011: q = NEG_TWO;
|
||||
10'b0011011100: q = NEG_TWO;
|
||||
10'b0011011101: q = NEG_TWO;
|
||||
10'b0011011110: q = NEG_TWO;
|
||||
10'b0011011111: q = NEG_TWO;
|
||||
10'b0011100000: q = NEG_TWO;
|
||||
10'b0011100001: q = NEG_TWO;
|
||||
10'b0011100010: q = NEG_TWO;
|
||||
10'b0011100011: q = NEG_TWO;
|
||||
10'b0011100100: q = NEG_TWO;
|
||||
10'b0011100101: q = NEG_TWO;
|
||||
10'b0011100110: q = NEG_TWO;
|
||||
10'b0011100111: q = NEG_TWO;
|
||||
10'b0011101000: q = NEG_TWO;
|
||||
10'b0011101001: q = NEG_TWO;
|
||||
10'b0011101010: q = NEG_TWO;
|
||||
10'b0011101011: q = NEG_TWO;
|
||||
10'b0011101100: q = NEG_TWO;
|
||||
10'b0011101101: q = NEG_TWO;
|
||||
10'b0011101110: q = NEG_TWO;
|
||||
10'b0011101111: q = NEG_TWO;
|
||||
10'b0011110000: q = NEG_TWO;
|
||||
10'b0011110001: q = NEG_ONE;
|
||||
10'b0011110010: q = NEG_ONE;
|
||||
10'b0011110011: q = NEG_ONE;
|
||||
10'b0011110100: q = NEG_ONE;
|
||||
10'b0011110101: q = NEG_ONE;
|
||||
10'b0011110110: q = NEG_ONE;
|
||||
10'b0011110111: q = NEG_ONE;
|
||||
10'b0011111000: q = NEG_ONE;
|
||||
10'b0011111001: q = NEG_ONE;
|
||||
10'b0011111010: q = ZERO;
|
||||
10'b0011111011: q = ZERO;
|
||||
10'b0011111100: q = ZERO;
|
||||
10'b0011111101: q = ZERO;
|
||||
10'b0011111110: q = ZERO;
|
||||
10'b0011111111: q = ZERO;
|
||||
10'b0010000000: q = ZERO;
|
||||
10'b0010000001: q = ZERO;
|
||||
10'b0010000010: q = ZERO;
|
||||
10'b0010000011: q = ZERO;
|
||||
10'b0010000100: q = POS_ONE;
|
||||
10'b0010000101: q = POS_ONE;
|
||||
10'b0010000110: q = POS_ONE;
|
||||
10'b0010000111: q = POS_ONE;
|
||||
10'b0010001000: q = POS_ONE;
|
||||
10'b0010001001: q = POS_ONE;
|
||||
10'b0010001010: q = POS_ONE;
|
||||
10'b0010001011: q = POS_ONE;
|
||||
10'b0010001100: q = POS_ONE;
|
||||
10'b0010001101: q = POS_ONE;
|
||||
10'b0010001110: q = POS_TWO;
|
||||
10'b0010001111: q = POS_TWO;
|
||||
10'b0010010000: q = POS_TWO;
|
||||
10'b0010010001: q = POS_TWO;
|
||||
10'b0010010010: q = POS_TWO;
|
||||
10'b0010010011: q = POS_TWO;
|
||||
10'b0010010100: q = POS_TWO;
|
||||
10'b0010010101: q = POS_TWO;
|
||||
10'b0010010110: q = POS_TWO;
|
||||
10'b0010010111: q = POS_TWO;
|
||||
10'b0010011000: q = POS_TWO;
|
||||
10'b0010011001: q = POS_TWO;
|
||||
10'b0010011010: q = POS_TWO;
|
||||
10'b0010011011: q = POS_TWO;
|
||||
10'b0010011100: q = POS_TWO;
|
||||
10'b0010011101: q = POS_TWO;
|
||||
10'b0010011110: q = POS_TWO;
|
||||
10'b0010011111: q = POS_TWO;
|
||||
10'b0010100000: q = POS_TWO;
|
||||
10'b0010100001: q = POS_TWO;
|
||||
10'b0010100010: q = POS_TWO;
|
||||
10'b0010100011: q = POS_TWO;
|
||||
10'b0010100100: q = POS_TWO;
|
||||
10'b0010100101: q = POS_TWO;
|
||||
10'b0010100110: q = POS_TWO;
|
||||
10'b0010100111: q = POS_TWO;
|
||||
10'b0010101000: q = POS_TWO;
|
||||
10'b0010101001: q = POS_TWO;
|
||||
10'b0010101010: q = POS_TWO;
|
||||
10'b0101010100: q = NEG_TWO;
|
||||
10'b0101010101: q = NEG_TWO;
|
||||
10'b0101010110: q = NEG_TWO;
|
||||
10'b0101010111: q = NEG_TWO;
|
||||
10'b0101011000: q = NEG_TWO;
|
||||
10'b0101011001: q = NEG_TWO;
|
||||
10'b0101011010: q = NEG_TWO;
|
||||
10'b0101011011: q = NEG_TWO;
|
||||
10'b0101011100: q = NEG_TWO;
|
||||
10'b0101011101: q = NEG_TWO;
|
||||
10'b0101011110: q = NEG_TWO;
|
||||
10'b0101011111: q = NEG_TWO;
|
||||
10'b0101100000: q = NEG_TWO;
|
||||
10'b0101100001: q = NEG_TWO;
|
||||
10'b0101100010: q = NEG_TWO;
|
||||
10'b0101100011: q = NEG_TWO;
|
||||
10'b0101100100: q = NEG_TWO;
|
||||
10'b0101100101: q = NEG_TWO;
|
||||
10'b0101100110: q = NEG_TWO;
|
||||
10'b0101100111: q = NEG_TWO;
|
||||
10'b0101101000: q = NEG_TWO;
|
||||
10'b0101101001: q = NEG_TWO;
|
||||
10'b0101101010: q = NEG_TWO;
|
||||
10'b0101101011: q = NEG_TWO;
|
||||
10'b0101101100: q = NEG_TWO;
|
||||
10'b0101101101: q = NEG_TWO;
|
||||
10'b0101101110: q = NEG_TWO;
|
||||
10'b0101101111: q = NEG_TWO;
|
||||
10'b0101110000: q = NEG_ONE;
|
||||
10'b0101110001: q = NEG_ONE;
|
||||
10'b0101110010: q = NEG_ONE;
|
||||
10'b0101110011: q = NEG_ONE;
|
||||
10'b0101110100: q = NEG_ONE;
|
||||
10'b0101110101: q = NEG_ONE;
|
||||
10'b0101110110: q = NEG_ONE;
|
||||
10'b0101110111: q = NEG_ONE;
|
||||
10'b0101111000: q = NEG_ONE;
|
||||
10'b0101111001: q = NEG_ONE;
|
||||
10'b0101111010: q = ZERO;
|
||||
10'b0101111011: q = ZERO;
|
||||
10'b0101111100: q = ZERO;
|
||||
10'b0101111101: q = ZERO;
|
||||
10'b0101111110: q = ZERO;
|
||||
10'b0101111111: q = ZERO;
|
||||
10'b0100000000: q = ZERO;
|
||||
10'b0100000001: q = ZERO;
|
||||
10'b0100000010: q = ZERO;
|
||||
10'b0100000011: q = ZERO;
|
||||
10'b0100000100: q = POS_ONE;
|
||||
10'b0100000101: q = POS_ONE;
|
||||
10'b0100000110: q = POS_ONE;
|
||||
10'b0100000111: q = POS_ONE;
|
||||
10'b0100001000: q = POS_ONE;
|
||||
10'b0100001001: q = POS_ONE;
|
||||
10'b0100001010: q = POS_ONE;
|
||||
10'b0100001011: q = POS_ONE;
|
||||
10'b0100001100: q = POS_ONE;
|
||||
10'b0100001101: q = POS_ONE;
|
||||
10'b0100001110: q = POS_ONE;
|
||||
10'b0100001111: q = POS_TWO;
|
||||
10'b0100010000: q = POS_TWO;
|
||||
10'b0100010001: q = POS_TWO;
|
||||
10'b0100010010: q = POS_TWO;
|
||||
10'b0100010011: q = POS_TWO;
|
||||
10'b0100010100: q = POS_TWO;
|
||||
10'b0100010101: q = POS_TWO;
|
||||
10'b0100010110: q = POS_TWO;
|
||||
10'b0100010111: q = POS_TWO;
|
||||
10'b0100011000: q = POS_TWO;
|
||||
10'b0100011001: q = POS_TWO;
|
||||
10'b0100011010: q = POS_TWO;
|
||||
10'b0100011011: q = POS_TWO;
|
||||
10'b0100011100: q = POS_TWO;
|
||||
10'b0100011101: q = POS_TWO;
|
||||
10'b0100011110: q = POS_TWO;
|
||||
10'b0100011111: q = POS_TWO;
|
||||
10'b0100100000: q = POS_TWO;
|
||||
10'b0100100001: q = POS_TWO;
|
||||
10'b0100100010: q = POS_TWO;
|
||||
10'b0100100011: q = POS_TWO;
|
||||
10'b0100100100: q = POS_TWO;
|
||||
10'b0100100101: q = POS_TWO;
|
||||
10'b0100100110: q = POS_TWO;
|
||||
10'b0100100111: q = POS_TWO;
|
||||
10'b0100101000: q = POS_TWO;
|
||||
10'b0100101001: q = POS_TWO;
|
||||
10'b0100101010: q = POS_TWO;
|
||||
10'b0111010100: q = NEG_TWO;
|
||||
10'b0111010101: q = NEG_TWO;
|
||||
10'b0111010110: q = NEG_TWO;
|
||||
10'b0111010111: q = NEG_TWO;
|
||||
10'b0111011000: q = NEG_TWO;
|
||||
10'b0111011001: q = NEG_TWO;
|
||||
10'b0111011010: q = NEG_TWO;
|
||||
10'b0111011011: q = NEG_TWO;
|
||||
10'b0111011100: q = NEG_TWO;
|
||||
10'b0111011101: q = NEG_TWO;
|
||||
10'b0111011110: q = NEG_TWO;
|
||||
10'b0111011111: q = NEG_TWO;
|
||||
10'b0111100000: q = NEG_TWO;
|
||||
10'b0111100001: q = NEG_TWO;
|
||||
10'b0111100010: q = NEG_TWO;
|
||||
10'b0111100011: q = NEG_TWO;
|
||||
10'b0111100100: q = NEG_TWO;
|
||||
10'b0111100101: q = NEG_TWO;
|
||||
10'b0111100110: q = NEG_TWO;
|
||||
10'b0111100111: q = NEG_TWO;
|
||||
10'b0111101000: q = NEG_TWO;
|
||||
10'b0111101001: q = NEG_TWO;
|
||||
10'b0111101010: q = NEG_TWO;
|
||||
10'b0111101011: q = NEG_TWO;
|
||||
10'b0111101100: q = NEG_TWO;
|
||||
10'b0111101101: q = NEG_TWO;
|
||||
10'b0111101110: q = NEG_ONE;
|
||||
10'b0111101111: q = NEG_ONE;
|
||||
10'b0111110000: q = NEG_ONE;
|
||||
10'b0111110001: q = NEG_ONE;
|
||||
10'b0111110010: q = NEG_ONE;
|
||||
10'b0111110011: q = NEG_ONE;
|
||||
10'b0111110100: q = NEG_ONE;
|
||||
10'b0111110101: q = NEG_ONE;
|
||||
10'b0111110110: q = NEG_ONE;
|
||||
10'b0111110111: q = NEG_ONE;
|
||||
10'b0111111000: q = NEG_ONE;
|
||||
10'b0111111001: q = NEG_ONE;
|
||||
10'b0111111010: q = ZERO;
|
||||
10'b0111111011: q = ZERO;
|
||||
10'b0111111100: q = ZERO;
|
||||
10'b0111111101: q = ZERO;
|
||||
10'b0111111110: q = ZERO;
|
||||
10'b0111111111: q = ZERO;
|
||||
10'b0110000000: q = ZERO;
|
||||
10'b0110000001: q = ZERO;
|
||||
10'b0110000010: q = ZERO;
|
||||
10'b0110000011: q = ZERO;
|
||||
10'b0110000100: q = POS_ONE;
|
||||
10'b0110000101: q = POS_ONE;
|
||||
10'b0110000110: q = POS_ONE;
|
||||
10'b0110000111: q = POS_ONE;
|
||||
10'b0110001000: q = POS_ONE;
|
||||
10'b0110001001: q = POS_ONE;
|
||||
10'b0110001010: q = POS_ONE;
|
||||
10'b0110001011: q = POS_ONE;
|
||||
10'b0110001100: q = POS_ONE;
|
||||
10'b0110001101: q = POS_ONE;
|
||||
10'b0110001110: q = POS_ONE;
|
||||
10'b0110001111: q = POS_ONE;
|
||||
10'b0110010000: q = POS_TWO;
|
||||
10'b0110010001: q = POS_TWO;
|
||||
10'b0110010010: q = POS_TWO;
|
||||
10'b0110010011: q = POS_TWO;
|
||||
10'b0110010100: q = POS_TWO;
|
||||
10'b0110010101: q = POS_TWO;
|
||||
10'b0110010110: q = POS_TWO;
|
||||
10'b0110010111: q = POS_TWO;
|
||||
10'b0110011000: q = POS_TWO;
|
||||
10'b0110011001: q = POS_TWO;
|
||||
10'b0110011010: q = POS_TWO;
|
||||
10'b0110011011: q = POS_TWO;
|
||||
10'b0110011100: q = POS_TWO;
|
||||
10'b0110011101: q = POS_TWO;
|
||||
10'b0110011110: q = POS_TWO;
|
||||
10'b0110011111: q = POS_TWO;
|
||||
10'b0110100000: q = POS_TWO;
|
||||
10'b0110100001: q = POS_TWO;
|
||||
10'b0110100010: q = POS_TWO;
|
||||
10'b0110100011: q = POS_TWO;
|
||||
10'b0110100100: q = POS_TWO;
|
||||
10'b0110100101: q = POS_TWO;
|
||||
10'b0110100110: q = POS_TWO;
|
||||
10'b0110100111: q = POS_TWO;
|
||||
10'b0110101000: q = POS_TWO;
|
||||
10'b0110101001: q = POS_TWO;
|
||||
10'b0110101010: q = POS_TWO;
|
||||
10'b1001010100: q = NEG_TWO;
|
||||
10'b1001010101: q = NEG_TWO;
|
||||
10'b1001010110: q = NEG_TWO;
|
||||
10'b1001010111: q = NEG_TWO;
|
||||
10'b1001011000: q = NEG_TWO;
|
||||
10'b1001011001: q = NEG_TWO;
|
||||
10'b1001011010: q = NEG_TWO;
|
||||
10'b1001011011: q = NEG_TWO;
|
||||
10'b1001011100: q = NEG_TWO;
|
||||
10'b1001011101: q = NEG_TWO;
|
||||
10'b1001011110: q = NEG_TWO;
|
||||
10'b1001011111: q = NEG_TWO;
|
||||
10'b1001100000: q = NEG_TWO;
|
||||
10'b1001100001: q = NEG_TWO;
|
||||
10'b1001100010: q = NEG_TWO;
|
||||
10'b1001100011: q = NEG_TWO;
|
||||
10'b1001100100: q = NEG_TWO;
|
||||
10'b1001100101: q = NEG_TWO;
|
||||
10'b1001100110: q = NEG_TWO;
|
||||
10'b1001100111: q = NEG_TWO;
|
||||
10'b1001101000: q = NEG_TWO;
|
||||
10'b1001101001: q = NEG_TWO;
|
||||
10'b1001101010: q = NEG_TWO;
|
||||
10'b1001101011: q = NEG_TWO;
|
||||
10'b1001101100: q = NEG_ONE;
|
||||
10'b1001101101: q = NEG_ONE;
|
||||
10'b1001101110: q = NEG_ONE;
|
||||
10'b1001101111: q = NEG_ONE;
|
||||
10'b1001110000: q = NEG_ONE;
|
||||
10'b1001110001: q = NEG_ONE;
|
||||
10'b1001110010: q = NEG_ONE;
|
||||
10'b1001110011: q = NEG_ONE;
|
||||
10'b1001110100: q = NEG_ONE;
|
||||
10'b1001110101: q = NEG_ONE;
|
||||
10'b1001110110: q = NEG_ONE;
|
||||
10'b1001110111: q = NEG_ONE;
|
||||
10'b1001111000: q = ZERO;
|
||||
10'b1001111001: q = ZERO;
|
||||
10'b1001111010: q = ZERO;
|
||||
10'b1001111011: q = ZERO;
|
||||
10'b1001111100: q = ZERO;
|
||||
10'b1001111101: q = ZERO;
|
||||
10'b1001111110: q = ZERO;
|
||||
10'b1001111111: q = ZERO;
|
||||
10'b1000000000: q = ZERO;
|
||||
10'b1000000001: q = ZERO;
|
||||
10'b1000000010: q = ZERO;
|
||||
10'b1000000011: q = ZERO;
|
||||
10'b1000000100: q = ZERO;
|
||||
10'b1000000101: q = ZERO;
|
||||
10'b1000000110: q = POS_ONE;
|
||||
10'b1000000111: q = POS_ONE;
|
||||
10'b1000001000: q = POS_ONE;
|
||||
10'b1000001001: q = POS_ONE;
|
||||
10'b1000001010: q = POS_ONE;
|
||||
10'b1000001011: q = POS_ONE;
|
||||
10'b1000001100: q = POS_ONE;
|
||||
10'b1000001101: q = POS_ONE;
|
||||
10'b1000001110: q = POS_ONE;
|
||||
10'b1000001111: q = POS_ONE;
|
||||
10'b1000010000: q = POS_ONE;
|
||||
10'b1000010001: q = POS_ONE;
|
||||
10'b1000010010: q = POS_TWO;
|
||||
10'b1000010011: q = POS_TWO;
|
||||
10'b1000010100: q = POS_TWO;
|
||||
10'b1000010101: q = POS_TWO;
|
||||
10'b1000010110: q = POS_TWO;
|
||||
10'b1000010111: q = POS_TWO;
|
||||
10'b1000011000: q = POS_TWO;
|
||||
10'b1000011001: q = POS_TWO;
|
||||
10'b1000011010: q = POS_TWO;
|
||||
10'b1000011011: q = POS_TWO;
|
||||
10'b1000011100: q = POS_TWO;
|
||||
10'b1000011101: q = POS_TWO;
|
||||
10'b1000011110: q = POS_TWO;
|
||||
10'b1000011111: q = POS_TWO;
|
||||
10'b1000100000: q = POS_TWO;
|
||||
10'b1000100001: q = POS_TWO;
|
||||
10'b1000100010: q = POS_TWO;
|
||||
10'b1000100011: q = POS_TWO;
|
||||
10'b1000100100: q = POS_TWO;
|
||||
10'b1000100101: q = POS_TWO;
|
||||
10'b1000100110: q = POS_TWO;
|
||||
10'b1000100111: q = POS_TWO;
|
||||
10'b1000101000: q = POS_TWO;
|
||||
10'b1000101001: q = POS_TWO;
|
||||
10'b1000101010: q = POS_TWO;
|
||||
10'b1011010100: q = NEG_TWO;
|
||||
10'b1011010101: q = NEG_TWO;
|
||||
10'b1011010110: q = NEG_TWO;
|
||||
10'b1011010111: q = NEG_TWO;
|
||||
10'b1011011000: q = NEG_TWO;
|
||||
10'b1011011001: q = NEG_TWO;
|
||||
10'b1011011010: q = NEG_TWO;
|
||||
10'b1011011011: q = NEG_TWO;
|
||||
10'b1011011100: q = NEG_TWO;
|
||||
10'b1011011101: q = NEG_TWO;
|
||||
10'b1011011110: q = NEG_TWO;
|
||||
10'b1011011111: q = NEG_TWO;
|
||||
10'b1011100000: q = NEG_TWO;
|
||||
10'b1011100001: q = NEG_TWO;
|
||||
10'b1011100010: q = NEG_TWO;
|
||||
10'b1011100011: q = NEG_TWO;
|
||||
10'b1011100100: q = NEG_TWO;
|
||||
10'b1011100101: q = NEG_TWO;
|
||||
10'b1011100110: q = NEG_TWO;
|
||||
10'b1011100111: q = NEG_TWO;
|
||||
10'b1011101000: q = NEG_TWO;
|
||||
10'b1011101001: q = NEG_TWO;
|
||||
10'b1011101010: q = NEG_TWO;
|
||||
10'b1011101011: q = NEG_TWO;
|
||||
10'b1011101100: q = NEG_ONE;
|
||||
10'b1011101101: q = NEG_ONE;
|
||||
10'b1011101110: q = NEG_ONE;
|
||||
10'b1011101111: q = NEG_ONE;
|
||||
10'b1011110000: q = NEG_ONE;
|
||||
10'b1011110001: q = NEG_ONE;
|
||||
10'b1011110010: q = NEG_ONE;
|
||||
10'b1011110011: q = NEG_ONE;
|
||||
10'b1011110100: q = NEG_ONE;
|
||||
10'b1011110101: q = NEG_ONE;
|
||||
10'b1011110110: q = NEG_ONE;
|
||||
10'b1011110111: q = NEG_ONE;
|
||||
10'b1011111000: q = ZERO;
|
||||
10'b1011111001: q = ZERO;
|
||||
10'b1011111010: q = ZERO;
|
||||
10'b1011111011: q = ZERO;
|
||||
10'b1011111100: q = ZERO;
|
||||
10'b1011111101: q = ZERO;
|
||||
10'b1011111110: q = ZERO;
|
||||
10'b1011111111: q = ZERO;
|
||||
10'b1010000000: q = ZERO;
|
||||
10'b1010000001: q = ZERO;
|
||||
10'b1010000010: q = ZERO;
|
||||
10'b1010000011: q = ZERO;
|
||||
10'b1010000100: q = ZERO;
|
||||
10'b1010000101: q = ZERO;
|
||||
10'b1010000110: q = POS_ONE;
|
||||
10'b1010000111: q = POS_ONE;
|
||||
10'b1010001000: q = POS_ONE;
|
||||
10'b1010001001: q = POS_ONE;
|
||||
10'b1010001010: q = POS_ONE;
|
||||
10'b1010001011: q = POS_ONE;
|
||||
10'b1010001100: q = POS_ONE;
|
||||
10'b1010001101: q = POS_ONE;
|
||||
10'b1010001110: q = POS_ONE;
|
||||
10'b1010001111: q = POS_ONE;
|
||||
10'b1010010000: q = POS_ONE;
|
||||
10'b1010010001: q = POS_ONE;
|
||||
10'b1010010010: q = POS_ONE;
|
||||
10'b1010010011: q = POS_ONE;
|
||||
10'b1010010100: q = POS_TWO;
|
||||
10'b1010010101: q = POS_TWO;
|
||||
10'b1010010110: q = POS_TWO;
|
||||
10'b1010010111: q = POS_TWO;
|
||||
10'b1010011000: q = POS_TWO;
|
||||
10'b1010011001: q = POS_TWO;
|
||||
10'b1010011010: q = POS_TWO;
|
||||
10'b1010011011: q = POS_TWO;
|
||||
10'b1010011100: q = POS_TWO;
|
||||
10'b1010011101: q = POS_TWO;
|
||||
10'b1010011110: q = POS_TWO;
|
||||
10'b1010011111: q = POS_TWO;
|
||||
10'b1010100000: q = POS_TWO;
|
||||
10'b1010100001: q = POS_TWO;
|
||||
10'b1010100010: q = POS_TWO;
|
||||
10'b1010100011: q = POS_TWO;
|
||||
10'b1010100100: q = POS_TWO;
|
||||
10'b1010100101: q = POS_TWO;
|
||||
10'b1010100110: q = POS_TWO;
|
||||
10'b1010100111: q = POS_TWO;
|
||||
10'b1010101000: q = POS_TWO;
|
||||
10'b1010101001: q = POS_TWO;
|
||||
10'b1010101010: q = POS_TWO;
|
||||
10'b1101010100: q = NEG_TWO;
|
||||
10'b1101010101: q = NEG_TWO;
|
||||
10'b1101010110: q = NEG_TWO;
|
||||
10'b1101010111: q = NEG_TWO;
|
||||
10'b1101011000: q = NEG_TWO;
|
||||
10'b1101011001: q = NEG_TWO;
|
||||
10'b1101011010: q = NEG_TWO;
|
||||
10'b1101011011: q = NEG_TWO;
|
||||
10'b1101011100: q = NEG_TWO;
|
||||
10'b1101011101: q = NEG_TWO;
|
||||
10'b1101011110: q = NEG_TWO;
|
||||
10'b1101011111: q = NEG_TWO;
|
||||
10'b1101100000: q = NEG_TWO;
|
||||
10'b1101100001: q = NEG_TWO;
|
||||
10'b1101100010: q = NEG_TWO;
|
||||
10'b1101100011: q = NEG_TWO;
|
||||
10'b1101100100: q = NEG_TWO;
|
||||
10'b1101100101: q = NEG_TWO;
|
||||
10'b1101100110: q = NEG_TWO;
|
||||
10'b1101100111: q = NEG_TWO;
|
||||
10'b1101101000: q = NEG_TWO;
|
||||
10'b1101101001: q = NEG_TWO;
|
||||
10'b1101101010: q = NEG_ONE;
|
||||
10'b1101101011: q = NEG_ONE;
|
||||
10'b1101101100: q = NEG_ONE;
|
||||
10'b1101101101: q = NEG_ONE;
|
||||
10'b1101101110: q = NEG_ONE;
|
||||
10'b1101101111: q = NEG_ONE;
|
||||
10'b1101110000: q = NEG_ONE;
|
||||
10'b1101110001: q = NEG_ONE;
|
||||
10'b1101110010: q = NEG_ONE;
|
||||
10'b1101110011: q = NEG_ONE;
|
||||
10'b1101110100: q = NEG_ONE;
|
||||
10'b1101110101: q = NEG_ONE;
|
||||
10'b1101110110: q = NEG_ONE;
|
||||
10'b1101110111: q = NEG_ONE;
|
||||
10'b1101111000: q = ZERO;
|
||||
10'b1101111001: q = ZERO;
|
||||
10'b1101111010: q = ZERO;
|
||||
10'b1101111011: q = ZERO;
|
||||
10'b1101111100: q = ZERO;
|
||||
10'b1101111101: q = ZERO;
|
||||
10'b1101111110: q = ZERO;
|
||||
10'b1101111111: q = ZERO;
|
||||
10'b1100000000: q = ZERO;
|
||||
10'b1100000001: q = ZERO;
|
||||
10'b1100000010: q = ZERO;
|
||||
10'b1100000011: q = ZERO;
|
||||
10'b1100000100: q = ZERO;
|
||||
10'b1100000101: q = ZERO;
|
||||
10'b1100000110: q = ZERO;
|
||||
10'b1100000111: q = ZERO;
|
||||
10'b1100001000: q = POS_ONE;
|
||||
10'b1100001001: q = POS_ONE;
|
||||
10'b1100001010: q = POS_ONE;
|
||||
10'b1100001011: q = POS_ONE;
|
||||
10'b1100001100: q = POS_ONE;
|
||||
10'b1100001101: q = POS_ONE;
|
||||
10'b1100001110: q = POS_ONE;
|
||||
10'b1100001111: q = POS_ONE;
|
||||
10'b1100010000: q = POS_ONE;
|
||||
10'b1100010001: q = POS_ONE;
|
||||
10'b1100010010: q = POS_ONE;
|
||||
10'b1100010011: q = POS_ONE;
|
||||
10'b1100010100: q = POS_TWO;
|
||||
10'b1100010101: q = POS_TWO;
|
||||
10'b1100010110: q = POS_TWO;
|
||||
10'b1100010111: q = POS_TWO;
|
||||
10'b1100011000: q = POS_TWO;
|
||||
10'b1100011001: q = POS_TWO;
|
||||
10'b1100011010: q = POS_TWO;
|
||||
10'b1100011011: q = POS_TWO;
|
||||
10'b1100011100: q = POS_TWO;
|
||||
10'b1100011101: q = POS_TWO;
|
||||
10'b1100011110: q = POS_TWO;
|
||||
10'b1100011111: q = POS_TWO;
|
||||
10'b1100100000: q = POS_TWO;
|
||||
10'b1100100001: q = POS_TWO;
|
||||
10'b1100100010: q = POS_TWO;
|
||||
10'b1100100011: q = POS_TWO;
|
||||
10'b1100100100: q = POS_TWO;
|
||||
10'b1100100101: q = POS_TWO;
|
||||
10'b1100100110: q = POS_TWO;
|
||||
10'b1100100111: q = POS_TWO;
|
||||
10'b1100101000: q = POS_TWO;
|
||||
10'b1100101001: q = POS_TWO;
|
||||
10'b1100101010: q = POS_TWO;
|
||||
10'b1111010100: q = NEG_TWO;
|
||||
10'b1111010101: q = NEG_TWO;
|
||||
10'b1111010110: q = NEG_TWO;
|
||||
10'b1111010111: q = NEG_TWO;
|
||||
10'b1111011000: q = NEG_TWO;
|
||||
10'b1111011001: q = NEG_TWO;
|
||||
10'b1111011010: q = NEG_TWO;
|
||||
10'b1111011011: q = NEG_TWO;
|
||||
10'b1111011100: q = NEG_TWO;
|
||||
10'b1111011101: q = NEG_TWO;
|
||||
10'b1111011110: q = NEG_TWO;
|
||||
10'b1111011111: q = NEG_TWO;
|
||||
10'b1111100000: q = NEG_TWO;
|
||||
10'b1111100001: q = NEG_TWO;
|
||||
10'b1111100010: q = NEG_TWO;
|
||||
10'b1111100011: q = NEG_TWO;
|
||||
10'b1111100100: q = NEG_TWO;
|
||||
10'b1111100101: q = NEG_TWO;
|
||||
10'b1111100110: q = NEG_TWO;
|
||||
10'b1111100111: q = NEG_TWO;
|
||||
10'b1111101000: q = NEG_ONE;
|
||||
10'b1111101001: q = NEG_ONE;
|
||||
10'b1111101010: q = NEG_ONE;
|
||||
10'b1111101011: q = NEG_ONE;
|
||||
10'b1111101100: q = NEG_ONE;
|
||||
10'b1111101101: q = NEG_ONE;
|
||||
10'b1111101110: q = NEG_ONE;
|
||||
10'b1111101111: q = NEG_ONE;
|
||||
10'b1111110000: q = NEG_ONE;
|
||||
10'b1111110001: q = NEG_ONE;
|
||||
10'b1111110010: q = NEG_ONE;
|
||||
10'b1111110011: q = NEG_ONE;
|
||||
10'b1111110100: q = NEG_ONE;
|
||||
10'b1111110101: q = NEG_ONE;
|
||||
10'b1111110110: q = NEG_ONE;
|
||||
10'b1111110111: q = NEG_ONE;
|
||||
10'b1111111000: q = ZERO;
|
||||
10'b1111111001: q = ZERO;
|
||||
10'b1111111010: q = ZERO;
|
||||
10'b1111111011: q = ZERO;
|
||||
10'b1111111100: q = ZERO;
|
||||
10'b1111111101: q = ZERO;
|
||||
10'b1111111110: q = ZERO;
|
||||
10'b1111111111: q = ZERO;
|
||||
10'b1110000000: q = ZERO;
|
||||
10'b1110000001: q = ZERO;
|
||||
10'b1110000010: q = ZERO;
|
||||
10'b1110000011: q = ZERO;
|
||||
10'b1110000100: q = ZERO;
|
||||
10'b1110000101: q = ZERO;
|
||||
10'b1110000110: q = ZERO;
|
||||
10'b1110000111: q = ZERO;
|
||||
10'b1110001000: q = POS_ONE;
|
||||
10'b1110001001: q = POS_ONE;
|
||||
10'b1110001010: q = POS_ONE;
|
||||
10'b1110001011: q = POS_ONE;
|
||||
10'b1110001100: q = POS_ONE;
|
||||
10'b1110001101: q = POS_ONE;
|
||||
10'b1110001110: q = POS_ONE;
|
||||
10'b1110001111: q = POS_ONE;
|
||||
10'b1110010000: q = POS_ONE;
|
||||
10'b1110010001: q = POS_ONE;
|
||||
10'b1110010010: q = POS_ONE;
|
||||
10'b1110010011: q = POS_ONE;
|
||||
10'b1110010100: q = POS_ONE;
|
||||
10'b1110010101: q = POS_ONE;
|
||||
10'b1110010110: q = POS_ONE;
|
||||
10'b1110010111: q = POS_ONE;
|
||||
10'b1110011000: q = POS_TWO;
|
||||
10'b1110011001: q = POS_TWO;
|
||||
10'b1110011010: q = POS_TWO;
|
||||
10'b1110011011: q = POS_TWO;
|
||||
10'b1110011100: q = POS_TWO;
|
||||
10'b1110011101: q = POS_TWO;
|
||||
10'b1110011110: q = POS_TWO;
|
||||
10'b1110011111: q = POS_TWO;
|
||||
10'b1110100000: q = POS_TWO;
|
||||
10'b1110100001: q = POS_TWO;
|
||||
10'b1110100010: q = POS_TWO;
|
||||
10'b1110100011: q = POS_TWO;
|
||||
10'b1110100100: q = POS_TWO;
|
||||
10'b1110100101: q = POS_TWO;
|
||||
10'b1110100110: q = POS_TWO;
|
||||
10'b1110100111: q = POS_TWO;
|
||||
10'b1110101000: q = POS_TWO;
|
||||
10'b1110101001: q = POS_TWO;
|
||||
10'b1110101010: q = POS_TWO;
|
||||
default: begin
|
||||
q = q_t'(3'bXXX); //This prevents the tool from creating potentially costly default behaviour
|
||||
not_in_table = 1; //For assertions only
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule
|
293
core/execution_units/fp_unit/fp_add.sv
Normal file
293
core/execution_units/fp_unit/fp_add.sv
Normal file
|
@ -0,0 +1,293 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_add
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input fp_add_inputs_t args,
|
||||
unit_issue_interface.unit issue,
|
||||
fp_intermediate_wb_interface.unit wb
|
||||
);
|
||||
|
||||
logic advance_to_add;
|
||||
logic advance_to_final;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Cycle 1
|
||||
//Swap and align arguments
|
||||
//Also detect special cases
|
||||
logic temp_rs2_sign;
|
||||
assign temp_rs2_sign = args.add ? args.rs2.d.sign : ~args.rs2.d.sign;
|
||||
|
||||
//Special case handling
|
||||
logic nv[2:0];
|
||||
logic inf[2:0];
|
||||
logic qnan[1:0];
|
||||
logic subtract[2:0];
|
||||
logic zero_result_sign[2:0];
|
||||
logic inf_sign[2:0];
|
||||
|
||||
//SNAN or "magnitude subtraction of infinities"
|
||||
assign nv[0] = args.rs1_special_case.snan | args.rs2_special_case.snan | (args.rs1_special_case.inf & args.rs2_special_case.inf & (args.rs1.d.sign ^ temp_rs2_sign));
|
||||
assign qnan[0] = args.rs1_special_case.snan | args.rs1_special_case.qnan | args.rs2_special_case.snan | args.rs2_special_case.qnan | nv[0];
|
||||
assign inf[0] = (args.rs1_special_case.inf | args.rs2_special_case.inf) & ~qnan[0];
|
||||
assign inf_sign[0] = args.rs1_special_case.inf ? args.rs1.d.sign : temp_rs2_sign;
|
||||
assign subtract[0] = args.rs1.d.sign ^ temp_rs2_sign;
|
||||
assign zero_result_sign[0] = args.rm == 3'b010;
|
||||
|
||||
//Swap arguments, moving input with larger expo to rs1
|
||||
logic rs1_sign[2:0];
|
||||
expo_d_t rs1_expo[2:0];
|
||||
logic rs1_expo_overflow[2:0];
|
||||
logic[FRAC_WIDTH+1:0] rs1_frac[1:0];
|
||||
logic[FRAC_WIDTH+1:0] rs2_frac[0:0];
|
||||
grs_t rs1_grs[1:0];
|
||||
grs_t temp_rs2_grs;
|
||||
|
||||
always_comb begin
|
||||
if (~args.swap) begin
|
||||
rs1_sign[0] = args.rs1.d.sign;
|
||||
rs1_expo_overflow[0] = args.rs1_expo_overflow;
|
||||
rs1_expo[0] = args.rs1.d.expo;
|
||||
rs1_frac[0] = {args.rs1_safe, args.rs1_hidden, args.rs1.d.frac};
|
||||
rs1_grs[0] = args.fp_add_grs;
|
||||
|
||||
rs2_frac[0] = {args.rs2_safe, args.rs2_hidden, args.rs2.d.frac};
|
||||
temp_rs2_grs = '0;
|
||||
end else begin
|
||||
rs1_sign[0] = temp_rs2_sign;
|
||||
rs1_expo_overflow[0] = 1'b0;
|
||||
rs1_expo[0] = args.rs2.d.expo;
|
||||
rs1_frac[0] = {args.rs2_safe, args.rs2_hidden, args.rs2.d.frac};
|
||||
rs1_grs[0] = '0;
|
||||
|
||||
rs2_frac[0] = {args.rs1_safe, args.rs1_hidden, args.rs1.d.frac};
|
||||
temp_rs2_grs = args.fp_add_grs;
|
||||
end
|
||||
end
|
||||
|
||||
//Alignment through shifting
|
||||
logic shift_sticky[1:0];
|
||||
logic[FRAC_WIDTH+1:0] rs2_frac_aligned[1:0];
|
||||
grs_t rs2_grs[1:0];
|
||||
logic[FRAC_WIDTH+GRS_WIDTH+1:0] shifter_input;
|
||||
|
||||
assign shifter_input = {rs2_frac[0], temp_rs2_grs};
|
||||
assign {rs2_frac_aligned[0], rs2_grs[0]} = shifter_input >> args.expo_diff;
|
||||
|
||||
//If the shift amount is too large, bits might get shifted out so this checks for them
|
||||
fp_sticky_tracking #(.INPUT_WIDTH($bits(shifter_input)), .SHIFT_WIDTH(EXPO_WIDTH+1)) sticky_tracking (
|
||||
.shifter_input(shifter_input),
|
||||
.shift_amount(args.expo_diff),
|
||||
.sticky_bit(shift_sticky[0])
|
||||
);
|
||||
|
||||
//Pipeline to next stage
|
||||
logic valid_r;
|
||||
rm_t rm_r;
|
||||
id_t id_r;
|
||||
logic d2s_r;
|
||||
|
||||
assign advance_to_add = ~valid_r | advance_to_final;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
valid_r <= 0;
|
||||
else if (advance_to_add)
|
||||
valid_r <= issue.new_request;
|
||||
|
||||
if (advance_to_add) begin
|
||||
d2s_r <= args.single;
|
||||
id_r <= issue.id;
|
||||
rm_r <= args.rm;
|
||||
|
||||
nv[1] <= nv[0];
|
||||
qnan[1] <= qnan[0];
|
||||
inf[1] <= inf[0];
|
||||
inf_sign[1] <= inf_sign[0];
|
||||
subtract[1] <= subtract[0];
|
||||
zero_result_sign[1] <= zero_result_sign[0];
|
||||
|
||||
rs1_sign[1] <= rs1_sign[0];
|
||||
rs1_expo[1] <= rs1_expo[0];
|
||||
rs1_expo_overflow[1] <= rs1_expo_overflow[0];
|
||||
rs1_frac[1] <= rs1_frac[0];
|
||||
rs1_grs[1] <= rs1_grs[0];
|
||||
|
||||
rs2_grs[1] <= rs2_grs[0];
|
||||
rs2_frac_aligned[1] <= rs2_frac_aligned[0];
|
||||
shift_sticky[1] <= shift_sticky[0];
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Cycle 2
|
||||
//Perform the sign-magnitude mantissa addition
|
||||
//Coded as an adder followed by negation, but the tools will transform this into two parallel additions with a muxing of the result
|
||||
//Negation is only required for different sign addition that returns a negative result
|
||||
logic[FRAC_WIDTH+GRS_WIDTH+2:0] adder_in1;
|
||||
logic[FRAC_WIDTH+GRS_WIDTH+2:0] adder_in2, adder_in2_1s;
|
||||
logic carry_add;
|
||||
grs_t grs_add;
|
||||
logic[FRAC_WIDTH+1:0] frac_add;
|
||||
logic sticky_add;
|
||||
logic[1+GRS_WIDTH+FRAC_WIDTH+2-1:0] sum;
|
||||
logic[1+GRS_WIDTH+FRAC_WIDTH+2-1:0] sum_final;
|
||||
|
||||
assign adder_in2 = {rs2_frac_aligned[1], rs2_grs[1], shift_sticky[1]};
|
||||
assign adder_in2_1s = adder_in2 ^ {(FRAC_WIDTH+GRS_WIDTH+3){subtract[1]}};
|
||||
assign adder_in1 = {rs1_frac[1], rs1_grs[1], 1'b0};
|
||||
|
||||
assign {carry_add, sum} = adder_in1 + adder_in2_1s + {{(FRAC_WIDTH+GRS_WIDTH+2){1'b0}}, subtract[1]};
|
||||
//subtract & ~carry_add = 1 if subtract and adder_in1 > adder_in2_1s, 0 if adder_in1 < adder_in2_1s
|
||||
assign sum_final = ~carry_add & subtract[1] ? -sum : sum;
|
||||
assign {frac_add, grs_add, sticky_add} = sum_final;
|
||||
|
||||
|
||||
//Pipeline to next stage
|
||||
logic[FRAC_WIDTH+1:0] result_frac;
|
||||
grs_t result_grs;
|
||||
logic result_carry_out;
|
||||
logic output_special;
|
||||
logic result_expo_zero;
|
||||
|
||||
assign advance_to_final = wb.ack | ~wb.done;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
wb.done <= 0;
|
||||
else if (advance_to_final)
|
||||
wb.done <= valid_r;
|
||||
|
||||
if (advance_to_final) begin
|
||||
wb.d2s <= d2s_r;
|
||||
wb.id <= id_r;
|
||||
wb.rm <= rm_r;
|
||||
|
||||
nv[2] <= nv[1];
|
||||
inf[2] <= inf[1];
|
||||
inf_sign[2] <= inf_sign[1];
|
||||
output_special <= inf[1] | qnan[1];
|
||||
subtract[2] <= subtract[1];
|
||||
zero_result_sign[2] <= zero_result_sign[1];
|
||||
|
||||
rs1_sign[2] <= rs1_sign[1];
|
||||
rs1_expo[2] <= rs1_expo[1];
|
||||
result_expo_zero <= ~|rs1_expo[1];
|
||||
rs1_expo_overflow[2] <= rs1_expo_overflow[1];
|
||||
|
||||
result_frac <= frac_add;
|
||||
result_carry_out <= carry_add;
|
||||
result_grs[GRS_WIDTH-1:1] <= grs_add[GRS_WIDTH-1:1];
|
||||
result_grs[0] <= grs_add[0] | sticky_add; //Don't lose the sticky
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Cycle 3
|
||||
//Find CLZ and determine shift amount
|
||||
//Override on special case and drive outputs
|
||||
logic result_zero;
|
||||
logic[$clog2(FRAC_WIDTH+1+GRS_WIDTH)-1:0] clz_count;
|
||||
clz #(.WIDTH(FRAC_WIDTH+1+GRS_WIDTH)) shift_clz (
|
||||
.clz_input({result_frac[FRAC_WIDTH:0], result_grs}),
|
||||
.clz(clz_count),
|
||||
.zero(result_zero)
|
||||
);
|
||||
|
||||
//Determine exponent and sign
|
||||
logic carry_set;
|
||||
logic output_zero;
|
||||
logic result_sign;
|
||||
logic result_expo_overflow;
|
||||
expo_d_t result_expo;
|
||||
fp_shift_amt_t clz_shift_amt;
|
||||
|
||||
assign carry_set = ~subtract[2] & result_carry_out;
|
||||
assign output_zero = result_zero & ~result_frac[FRAC_WIDTH+1] & ~carry_set;
|
||||
assign result_sign = output_zero & subtract[2] ? zero_result_sign[2] : (~result_carry_out & subtract[2]) ^ rs1_sign[2];
|
||||
assign result_expo_overflow = ~output_zero & rs1_expo_overflow[2];
|
||||
always_comb begin
|
||||
clz_shift_amt = '0;
|
||||
clz_shift_amt[$bits(clz_count)-1:0] = clz_count;
|
||||
|
||||
if (output_zero)
|
||||
result_expo = '0;
|
||||
else if (result_expo_zero & (result_frac[FRAC_WIDTH] | carry_set | result_frac[FRAC_WIDTH+1])) //Subnormal promotion
|
||||
result_expo = 1; //Will be added to the right shift amount to get the correct exponent
|
||||
else if (clz_shift_amt >= rs1_expo[2] & ~result_expo_zero & ~result_frac[FRAC_WIDTH+1] & ~carry_set) //Subnormal demotion
|
||||
result_expo = rs1_expo[2] - 1;
|
||||
else
|
||||
result_expo = rs1_expo[2];
|
||||
end
|
||||
|
||||
fp_t special_result;
|
||||
always_comb begin
|
||||
if (inf[2]) begin
|
||||
special_result.d.sign = inf_sign[2];
|
||||
special_result.d.expo = '1;
|
||||
special_result.d.frac = '0;
|
||||
end
|
||||
else //qnan
|
||||
special_result.raw = CANONICAL_NAN;
|
||||
end
|
||||
|
||||
//Writeback
|
||||
assign issue.ready = advance_to_add;
|
||||
assign wb.fflags.nv = nv[2];
|
||||
assign wb.fflags.of = 0;
|
||||
assign wb.fflags.uf = 0;
|
||||
assign wb.fflags.dz = 0;
|
||||
assign wb.fflags.nx = 0; //Will be set by normalization
|
||||
assign wb.carry = ~output_special & carry_set;
|
||||
assign wb.safe = result_frac[FRAC_WIDTH+1] & ~output_special;
|
||||
assign wb.hidden = result_frac[FRAC_WIDTH] | output_special;
|
||||
assign wb.grs = output_special ? '0 : result_grs;
|
||||
always_comb begin
|
||||
wb.clz = '0;
|
||||
if (~output_zero & ~output_special)
|
||||
wb.clz[$bits(clz_count)-1:0] = clz_count;
|
||||
|
||||
if (output_special)
|
||||
wb.rd = special_result;
|
||||
else begin
|
||||
wb.rd.d.sign = result_sign;
|
||||
wb.rd.d.expo = result_expo;
|
||||
wb.rd.d.frac = result_frac[FRAC_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
assign wb.expo_overflow = result_expo_overflow & ~output_special;
|
||||
assign wb.subnormal = ~|result_expo & ~output_special & ~wb.right_shift & ~result_expo_overflow;
|
||||
assign wb.right_shift = ~output_special & (result_frac[FRAC_WIDTH+1] | carry_set);
|
||||
assign wb.right_shift_amt = {{(EXPO_WIDTH-2){1'b0}}, carry_set, result_frac[FRAC_WIDTH+1] & ~carry_set}; //Either 1 or 2
|
||||
assign wb.ignore_max_expo = output_special;
|
||||
|
||||
endmodule
|
203
core/execution_units/fp_unit/fp_div.sv
Normal file
203
core/execution_units/fp_unit/fp_div.sv
Normal file
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_div
|
||||
|
||||
import cva5_config::*;
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input fp_div_inputs_t args,
|
||||
unit_issue_interface.unit issue,
|
||||
fp_intermediate_wb_interface.unit wb
|
||||
);
|
||||
|
||||
unsigned_division_interface #(.DATA_WIDTH(FRAC_WIDTH+3)) div();
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Iterative divider core, bypassed on special cases
|
||||
logic result_sign;
|
||||
logic busy;
|
||||
logic new_request_r;
|
||||
assign issue.ready = ~busy | wb.ack;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
busy <= 0;
|
||||
new_request_r <= 0;
|
||||
end
|
||||
else begin
|
||||
if (wb.ack)
|
||||
busy <= 0;
|
||||
if (issue.new_request)
|
||||
busy <= 1;
|
||||
new_request_r <= issue.new_request;
|
||||
end
|
||||
if (issue.new_request)
|
||||
result_sign <= args.rs1.d.sign ^ args.rs2.d.sign;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Special cases
|
||||
//Edge cases like NaN, infinity, and zero don't require division so return immediately
|
||||
logic nv, nv_r;
|
||||
logic dz, dz_r;
|
||||
logic qnan, qnan_r;
|
||||
logic inf;
|
||||
logic zero, zero_r;
|
||||
logic early_exit;
|
||||
fp_t special_result;
|
||||
|
||||
//Special case handling
|
||||
assign nv = (args.rs1_special_case.zero & args.rs2_special_case.zero) | (args.rs1_special_case.inf & args.rs2_special_case.inf) | args.rs1_special_case.snan | args.rs2_special_case.snan;
|
||||
assign dz = ~|args.rs1_special_case & args.rs2_special_case.zero;
|
||||
assign qnan = nv | args.rs1_special_case.qnan | args.rs2_special_case.qnan;
|
||||
assign inf = ~qnan & (dz | args.rs1_special_case.inf);
|
||||
assign zero = ~qnan & (args.rs1_special_case.zero | args.rs2_special_case.inf);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
early_exit <= 0;
|
||||
else if (wb.ack)
|
||||
early_exit <= 0;
|
||||
else if (issue.new_request)
|
||||
early_exit <= qnan | inf | zero;
|
||||
|
||||
if (issue.new_request) begin
|
||||
nv_r <= nv;
|
||||
dz_r <= dz;
|
||||
qnan_r <= qnan;
|
||||
zero_r <= zero;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
if (zero_r) begin
|
||||
special_result.d.sign = result_sign;
|
||||
special_result.raw[FLEN-2:0] = '0;
|
||||
end
|
||||
else if (qnan_r)
|
||||
special_result.raw = CANONICAL_NAN;
|
||||
else begin
|
||||
special_result.d.sign = result_sign;
|
||||
special_result.d.expo = '1;
|
||||
special_result.d.frac = '0;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Mantissa division core
|
||||
//Designed to be swappable (though note that only a subset of the division interface ports are used)
|
||||
//Operates on normalized values and width is extended to compute guard/round/sticky
|
||||
logic result_hidden;
|
||||
frac_d_t result_frac;
|
||||
logic[1:0] result_gr;
|
||||
fp_shift_amt_t left_shift_amt;
|
||||
|
||||
assign div.dividend = {1'b1, args.rs1.d.frac, 2'b0};
|
||||
assign div.divisor = {1'b1, args.rs2.d.frac, 2'b0};
|
||||
assign div.start = issue.new_request & ~(qnan | inf | zero); //start div only if no special cases
|
||||
assign {result_hidden, result_frac, result_gr} = div.quotient;
|
||||
fp_div_core div_core (
|
||||
.div(div),
|
||||
.*);
|
||||
|
||||
//Calculate CLZ: because 0.5 < result < 2, the shift amount is either 0 or 1
|
||||
assign left_shift_amt[EXPO_WIDTH-1:1] = '0;
|
||||
assign left_shift_amt[0] = ~result_hidden;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Exponent handling
|
||||
//Subtract exponents
|
||||
//Special considerations for subnormal numbers
|
||||
logic right_shift;
|
||||
fp_shift_amt_t right_shift_amt;
|
||||
logic[EXPO_WIDTH+1:0] expo_intermediate;
|
||||
logic[EXPO_WIDTH+1:0] expo_intermediate_r;
|
||||
|
||||
assign expo_intermediate =
|
||||
({1'b0, args.rs1.d.expo} + {{EXPO_WIDTH{1'b0}}, ~args.rs1_hidden} - {1'b0, args.rs1_prenormalize_shift_amt}) -
|
||||
({1'b0, args.rs2.d.expo} + {{EXPO_WIDTH{1'b0}}, ~args.rs2_hidden} - {1'b0, args.rs2_prenormalize_shift_amt})
|
||||
+ BIAS;
|
||||
|
||||
assign right_shift = expo_intermediate_r[EXPO_WIDTH+1] | (~|expo_intermediate_r[EXPO_WIDTH:1] & ((~result_hidden & expo_intermediate_r[0]) | ~expo_intermediate_r[0]));
|
||||
assign right_shift_amt = ~expo_intermediate_r[EXPO_WIDTH-1:0] + 2;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
expo_intermediate_r <= expo_intermediate;
|
||||
end
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output management
|
||||
//Either return the early execute values on cycle 1, or the regular values once the divider finishes
|
||||
logic div_hold;
|
||||
assign wb.done = div.done | div_hold | early_exit;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
div_hold <= 0;
|
||||
else
|
||||
div_hold <= ~wb.ack & (div.done | div_hold);
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request) begin
|
||||
wb.id <= issue.id;
|
||||
wb.rm <= args.rm;
|
||||
wb.d2s <= args.single;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
if (new_request_r)
|
||||
wb.rd = special_result;
|
||||
else begin
|
||||
wb.rd.d.sign = result_sign;
|
||||
wb.rd.d.expo = expo_intermediate_r[EXPO_WIDTH-1:0];
|
||||
wb.rd.d.frac = result_frac;
|
||||
end
|
||||
end
|
||||
//Note that this overflow detection also captures subnormal numbers but they are ignored when subnormal is set
|
||||
assign wb.expo_overflow = expo_intermediate_r[EXPO_WIDTH] & ~new_request_r;
|
||||
assign wb.fflags.nv = nv_r;
|
||||
assign wb.fflags.dz = dz_r;
|
||||
//Set in writeback
|
||||
assign wb.fflags.of = 0;
|
||||
assign wb.fflags.uf = 0;
|
||||
assign wb.fflags.nx = 0;
|
||||
assign wb.carry = 0;
|
||||
assign wb.safe = 0;
|
||||
assign wb.hidden = (new_request_r & ~zero_r) | (~new_request_r & result_hidden);
|
||||
assign wb.grs = new_request_r ? '0 : {result_gr, div.remainder, {(GRS_WIDTH-FRAC_WIDTH-5){1'b0}}};
|
||||
assign wb.clz = new_request_r ? '0 : left_shift_amt;
|
||||
assign wb.subnormal = ~new_request_r & right_shift;
|
||||
assign wb.right_shift = ~new_request_r & right_shift;
|
||||
assign wb.right_shift_amt = right_shift_amt;
|
||||
assign wb.ignore_max_expo = new_request_r;
|
||||
|
||||
endmodule
|
106
core/execution_units/fp_unit/fp_div_sqrt_wrapper.sv
Normal file
106
core/execution_units/fp_unit/fp_div_sqrt_wrapper.sv
Normal file
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_div_sqrt_wrapper
|
||||
|
||||
import cva5_config::*;
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input fp_div_inputs_t div_inputs,
|
||||
input fp_sqrt_inputs_t sqrt_inputs,
|
||||
unit_issue_interface.unit div_issue,
|
||||
unit_issue_interface.unit sqrt_issue,
|
||||
fp_intermediate_wb_interface.unit wb
|
||||
);
|
||||
|
||||
fp_intermediate_wb_interface div_wb();
|
||||
fp_intermediate_wb_interface sqrt_wb();
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Div/Sqrt with distinct issue
|
||||
//Shared writeback
|
||||
fp_div div (
|
||||
.args(div_inputs),
|
||||
.issue(div_issue),
|
||||
.wb(div_wb),
|
||||
.*);
|
||||
|
||||
fp_sqrt sqrt (
|
||||
.args(sqrt_inputs),
|
||||
.issue(sqrt_issue),
|
||||
.wb(sqrt_wb),
|
||||
.*);
|
||||
|
||||
//SQRT has higher priority on ties because of longer latency
|
||||
always_comb begin
|
||||
sqrt_wb.ack = wb.ack & sqrt_wb.done;
|
||||
div_wb.ack = wb.ack & ~sqrt_wb.done;
|
||||
|
||||
if (sqrt_wb.done) begin
|
||||
wb.id = sqrt_wb.id;
|
||||
wb.done = 1;
|
||||
wb.rd = sqrt_wb.rd;
|
||||
wb.expo_overflow = sqrt_wb.expo_overflow;
|
||||
wb.fflags = sqrt_wb.fflags;
|
||||
wb.rm = sqrt_wb.rm;
|
||||
wb.carry = sqrt_wb.carry;
|
||||
wb.safe = sqrt_wb.safe;
|
||||
wb.hidden = sqrt_wb.hidden;
|
||||
//Collapse sticky - this saves a wide 2:1 mux
|
||||
wb.grs[GRS_WIDTH-1-:2] = sqrt_wb.grs[GRS_WIDTH-1-:2];
|
||||
wb.grs[GRS_WIDTH-3] = |sqrt_wb.grs[GRS_WIDTH-3:0];
|
||||
wb.grs[GRS_WIDTH-4:0] = '0;
|
||||
wb.clz = sqrt_wb.clz;
|
||||
wb.right_shift = sqrt_wb.right_shift;
|
||||
wb.right_shift_amt = sqrt_wb.right_shift_amt;
|
||||
wb.subnormal = sqrt_wb.subnormal;
|
||||
wb.ignore_max_expo = sqrt_wb.ignore_max_expo;
|
||||
wb.d2s = sqrt_wb.d2s;
|
||||
end else begin
|
||||
wb.id = div_wb.id;
|
||||
wb.done = div_wb.done;
|
||||
wb.rd = div_wb.rd;
|
||||
wb.expo_overflow = div_wb.expo_overflow;
|
||||
wb.fflags = div_wb.fflags;
|
||||
wb.rm = div_wb.rm;
|
||||
wb.carry = div_wb.carry;
|
||||
wb.safe = div_wb.safe;
|
||||
wb.hidden = div_wb.hidden;
|
||||
//Collapse sticky - this saves a wide 2:1 mux
|
||||
wb.grs[GRS_WIDTH-1-:3] = div_wb.grs[GRS_WIDTH-1-:3]; //Preserve MSB sticky because there can be a left shift of 1
|
||||
wb.grs[GRS_WIDTH-4] = |div_wb.grs[GRS_WIDTH-4:0];
|
||||
wb.grs[GRS_WIDTH-5:0] = '0;
|
||||
wb.clz = div_wb.clz;
|
||||
wb.right_shift = div_wb.right_shift;
|
||||
wb.right_shift_amt = div_wb.right_shift_amt;
|
||||
wb.subnormal = div_wb.subnormal;
|
||||
wb.ignore_max_expo = div_wb.ignore_max_expo;
|
||||
wb.d2s = div_wb.d2s;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
101
core/execution_units/fp_unit/fp_madd_wrapper.sv
Normal file
101
core/execution_units/fp_unit/fp_madd_wrapper.sv
Normal file
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_madd_wrapper
|
||||
|
||||
import cva5_config::*;
|
||||
import fpu_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
#(
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input fp_madd_inputs_t args,
|
||||
unit_issue_interface.unit issue,
|
||||
fp_intermediate_wb_interface.unit madd_wb,
|
||||
fp_intermediate_wb_interface.unit mul_wb
|
||||
);
|
||||
|
||||
unit_issue_interface mul_issue();
|
||||
unit_issue_interface add_issue();
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Multiplication unit
|
||||
//Writes back multiplication instructions directly with its own port
|
||||
//Generates FMA operands
|
||||
fp_add_inputs_t fma_mul_outputs;
|
||||
logic fma_valid;
|
||||
logic fma_valid_r;
|
||||
logic fma_advance;
|
||||
id_t fma_id;
|
||||
assign fma_advance = ~fma_valid_r | add_issue.ready;
|
||||
|
||||
assign mul_issue.new_request = ~args.add & issue.new_request;
|
||||
assign mul_issue.id = issue.id;
|
||||
fp_mul #(.CONFIG(CONFIG)) mul_core (
|
||||
.mul_args(args.mul_args),
|
||||
.fma(args.fma),
|
||||
.fma_args(args.fma_args),
|
||||
.issue(mul_issue),
|
||||
.wb(mul_wb),
|
||||
.add_ready(fma_advance),
|
||||
.add_valid(fma_valid),
|
||||
.add_id(fma_id),
|
||||
.add_args(fma_mul_outputs),
|
||||
.*);
|
||||
|
||||
//It would probably be possible to use these directly without registering if some of the exponent logic in the multiplier was pushed to an earlier cycle
|
||||
fp_add_inputs_t fma_mul_outputs_r;
|
||||
id_t fma_id_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
fma_valid_r <= 0;
|
||||
else if (fma_advance)
|
||||
fma_valid_r <= fma_valid;
|
||||
if (fma_advance) begin
|
||||
fma_id_r <= fma_id;
|
||||
fma_mul_outputs_r <= fma_mul_outputs;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Addition unit
|
||||
//Input comes from FMA or add instructions, prioritizing FMA
|
||||
//FMA inputs are the registered outputs from the multiplier
|
||||
fp_add_inputs_t add_inputs;
|
||||
assign add_inputs = fma_valid_r ? fma_mul_outputs_r : args.add_args;
|
||||
assign add_issue.id = fma_valid_r ? fma_id_r : issue.id;
|
||||
assign add_issue.new_request = fma_valid_r | (issue.new_request & args.add);
|
||||
|
||||
fp_add add_core (
|
||||
.args(add_inputs),
|
||||
.issue(add_issue),
|
||||
.wb(madd_wb),
|
||||
.*);
|
||||
|
||||
assign issue.ready = (~args.add & mul_issue.ready) | (args.add & add_issue.ready & ~fma_valid_r);
|
||||
|
||||
endmodule
|
298
core/execution_units/fp_unit/fp_mul.sv
Normal file
298
core/execution_units/fp_unit/fp_mul.sv
Normal file
|
@ -0,0 +1,298 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_mul
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
|
||||
#(
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input fp_mul_inputs_t mul_args,
|
||||
input logic fma,
|
||||
input fp_fma_inputs_t fma_args,
|
||||
unit_issue_interface.unit issue,
|
||||
fp_intermediate_wb_interface.unit wb,
|
||||
input logic add_ready,
|
||||
output logic add_valid,
|
||||
output id_t add_id,
|
||||
output fp_add_inputs_t add_args
|
||||
);
|
||||
|
||||
localparam HALF_GRS_WIDTH = GRS_WIDTH/2;
|
||||
|
||||
logic advance_to_mul2;
|
||||
logic advance_to_final;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Cycle 1
|
||||
//Half of the multiplication
|
||||
//Special case detection
|
||||
logic nv[2:0];
|
||||
logic inf[2:0];
|
||||
logic qnan[2:0];
|
||||
logic true_zero[2:0];
|
||||
logic subnormal_zero[2:0];
|
||||
|
||||
assign nv[0] = (mul_args.rs1_special_case.zero & mul_args.rs2_special_case.inf) | (mul_args.rs1_special_case.inf & mul_args.rs2_special_case.zero) | mul_args.rs1_special_case.snan | mul_args.rs2_special_case.snan;
|
||||
assign qnan[0] = nv[0] | mul_args.rs1_special_case.snan | mul_args.rs1_special_case.qnan | mul_args.rs2_special_case.snan | mul_args.rs2_special_case.qnan;
|
||||
assign inf[0] = ((mul_args.rs1_special_case.inf & ~mul_args.rs2_special_case.zero) | (~mul_args.rs1_special_case.zero & mul_args.rs2_special_case.inf)) & ~qnan[0];
|
||||
assign true_zero[0] = (mul_args.rs1_special_case.zero | mul_args.rs2_special_case.zero) & ~qnan[0];
|
||||
//The exponent logic can only handle 1 subnormal argument. 2 subnormals produces 0 mantissa but a set sticky bit
|
||||
assign subnormal_zero[0] = ~mul_args.rs1_hidden & ~mul_args.rs2_hidden & ~true_zero[0];
|
||||
|
||||
//Unpacking
|
||||
id_t id[2:0];
|
||||
rm_t rm[2:0];
|
||||
logic d2s[2:0];
|
||||
logic sign_xor[2:0];
|
||||
expo_d_t rs1_expo[1:0];
|
||||
expo_d_t rs2_expo[1:0];
|
||||
fp_shift_amt_t rs2_prenormalize_shift_amt[1:0];
|
||||
fp_fma_inputs_t fma_info[2:0];
|
||||
|
||||
assign id[0] = issue.id;
|
||||
assign rm[0] = mul_args.rm;
|
||||
assign d2s[0] = mul_args.single;
|
||||
assign sign_xor[0] = mul_args.rs1.d.sign ^ mul_args.rs2.d.sign;
|
||||
assign rs1_expo[0] = mul_args.rs1.d.expo;
|
||||
assign rs2_expo[0] = mul_args.rs2.d.expo + {{(EXPO_WIDTH-1){1'b0}}, ~mul_args.rs2_hidden};
|
||||
assign rs2_prenormalize_shift_amt[0] = mul_args.rs2_prenormalize_shift_amt;
|
||||
assign fma_info[0] = fma_args;
|
||||
|
||||
//Pipelining
|
||||
logic valid_r;
|
||||
logic fma_r;
|
||||
|
||||
assign advance_to_mul2 = ~valid_r | advance_to_final;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
valid_r <= 0;
|
||||
else if (advance_to_mul2)
|
||||
valid_r <= issue.new_request;
|
||||
|
||||
if (advance_to_mul2) begin
|
||||
fma_r <= fma;
|
||||
id[1] <= id[0];
|
||||
rm[1] <= rm[0];
|
||||
d2s[1] <= d2s[0];
|
||||
sign_xor[1] <= sign_xor[0];
|
||||
rs1_expo[1] <= rs1_expo[0];
|
||||
rs2_expo[1] <= rs2_expo[0];
|
||||
rs2_prenormalize_shift_amt[1] <= rs2_prenormalize_shift_amt[0];
|
||||
fma_info[1] <= fma_info[0];
|
||||
|
||||
nv[1] <= nv[0];
|
||||
qnan[1] <= qnan[0];
|
||||
inf[1] <= inf[0];
|
||||
true_zero[1] <= true_zero[0];
|
||||
subnormal_zero[1] <= subnormal_zero[0];
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Multiplication itself
|
||||
//Pipelined over 2 cycles
|
||||
logic[FRAC_WIDTH:0] mul_in1;
|
||||
logic[FRAC_WIDTH:0] mul_in2;
|
||||
logic[2*FRAC_WIDTH+2-1:0] intermediate_frac;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (advance_to_mul2) begin
|
||||
mul_in1 <= {1'b1, mul_args.rs1.d.frac};
|
||||
mul_in2 <= {1'b1, mul_args.rs2.d.frac};
|
||||
end
|
||||
if (advance_to_final)
|
||||
intermediate_frac <= mul_in1 * mul_in2;
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Cycle 2
|
||||
//Second half of the multiplication
|
||||
//Exponent logic depends on the presence of subnormal numbers
|
||||
logic[EXPO_WIDTH+1:0] signed_expo;
|
||||
logic[EXPO_WIDTH:0] neg_signed_expo;
|
||||
logic[EXPO_WIDTH:0] intermediate_expo;
|
||||
logic intermediate_expo_is_zero;
|
||||
|
||||
//Negative intermediate expo -> subnormal result
|
||||
//To normalize a subnormal result, the exponent is set to abs(intermediate expo), and the frac is right shifted for the same amount. Normalization handles driving the expo_norm to 0
|
||||
assign signed_expo = {1'b0, rs1_expo[1]} + ({1'b0, rs2_expo[1]} - {1'b0, rs2_prenormalize_shift_amt[1]}) - {2'b0, {(EXPO_WIDTH-1){1'b1}}};
|
||||
assign neg_signed_expo = -signed_expo[EXPO_WIDTH:0];
|
||||
assign intermediate_expo = signed_expo[EXPO_WIDTH+1] ? neg_signed_expo : signed_expo[EXPO_WIDTH:0];
|
||||
assign intermediate_expo_is_zero = ~|signed_expo;
|
||||
|
||||
//Pipelining
|
||||
logic result_expo_overflow;
|
||||
expo_d_t result_expo;
|
||||
logic[EXPO_WIDTH+1:0] result_expo_diff;
|
||||
logic result_expo_is_negative;
|
||||
logic result_expo_is_zero;
|
||||
logic output_special;
|
||||
|
||||
assign advance_to_final = (wb.done & wb.ack) | (~wb.done & ~add_valid) | (add_valid & add_ready);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst) begin
|
||||
wb.done <= 0;
|
||||
add_valid <= 0;
|
||||
end
|
||||
else if (advance_to_final) begin
|
||||
wb.done <= valid_r & ~fma_r;
|
||||
add_valid <= valid_r & fma_r;
|
||||
end
|
||||
|
||||
if (advance_to_final) begin
|
||||
id[2] <= id[1];
|
||||
d2s[2] <= d2s[1];
|
||||
rm[2] <= rm[1];
|
||||
sign_xor[2] <= sign_xor[1];
|
||||
nv[2] <= nv[1];
|
||||
qnan[2] <= qnan[1];
|
||||
inf[2] <= inf[1];
|
||||
true_zero[2] <= true_zero[1];
|
||||
subnormal_zero[2] <= subnormal_zero[1];
|
||||
output_special <= inf[1] | qnan[1] | true_zero[1] | subnormal_zero[1];
|
||||
fma_info[2] <= fma_info[1];
|
||||
|
||||
result_expo_overflow <= intermediate_expo[EXPO_WIDTH];
|
||||
result_expo_is_negative <= signed_expo[EXPO_WIDTH+1];
|
||||
result_expo_is_zero <= intermediate_expo_is_zero;
|
||||
result_expo <= intermediate_expo[EXPO_WIDTH-1:0];
|
||||
result_expo_diff <= signed_expo;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Output
|
||||
//Finalize multiplication outputs
|
||||
//Create FMA arguments
|
||||
logic result_safe;
|
||||
logic result_hidden;
|
||||
frac_d_t result_frac;
|
||||
logic[HALF_GRS_WIDTH-1:0] result_grs;
|
||||
logic result_is_subnormal;
|
||||
|
||||
assign {result_safe, result_hidden, result_frac} = intermediate_frac[2*FRAC_WIDTH+2-1-:2+FRAC_WIDTH];
|
||||
//There is no reduction for the full grs, but this accommodates optional intermediate rounding
|
||||
assign result_grs = {intermediate_frac[FRAC_WIDTH-1-:HALF_GRS_WIDTH-1], |intermediate_frac[FRAC_WIDTH-HALF_GRS_WIDTH:0]};
|
||||
|
||||
assign result_is_subnormal = result_expo_is_negative | (result_expo_is_zero & ~result_safe);
|
||||
|
||||
//Special case handling
|
||||
fp_t special_result;
|
||||
|
||||
always_comb begin
|
||||
if (inf[2]) begin
|
||||
special_result.d.sign = sign_xor[2];
|
||||
special_result.d.expo = '1;
|
||||
special_result.d.frac = '0;
|
||||
end
|
||||
else if (qnan[2])
|
||||
special_result.raw = CANONICAL_NAN;
|
||||
else begin //Zero
|
||||
special_result.d.sign = sign_xor[2];
|
||||
special_result.d.expo = '0;
|
||||
special_result.d.frac = '0;
|
||||
end
|
||||
end
|
||||
|
||||
assign issue.ready = advance_to_mul2;
|
||||
|
||||
//Writeback
|
||||
assign wb.id = id[2];
|
||||
assign wb.d2s = d2s[2];
|
||||
assign wb.fflags.nv = nv[2];
|
||||
assign wb.fflags.of = 0;
|
||||
assign wb.fflags.uf = 0;
|
||||
assign wb.fflags.dz = 0;
|
||||
assign wb.fflags.nx = 0; //Will be set by normalization
|
||||
assign wb.carry = 0;
|
||||
assign wb.safe = result_safe;
|
||||
assign wb.hidden = output_special ? qnan[2] | inf[2] : result_hidden;
|
||||
assign wb.clz = '0;
|
||||
assign wb.ignore_max_expo = output_special;
|
||||
always_comb begin
|
||||
wb.grs = '0;
|
||||
if (subnormal_zero[2])
|
||||
wb.grs[0] = 1'b1; //Result is some nonzero number - set sticky
|
||||
else if (~output_special)
|
||||
wb.grs[GRS_WIDTH-1-:HALF_GRS_WIDTH] = result_grs;
|
||||
|
||||
if (output_special)
|
||||
wb.rd = special_result;
|
||||
else begin
|
||||
wb.rd.d.sign = sign_xor[2];
|
||||
wb.rd.d.expo = result_expo;
|
||||
wb.rd.d.frac = result_frac;
|
||||
end
|
||||
end
|
||||
assign wb.rm = rm[2];
|
||||
assign wb.expo_overflow = result_expo_overflow & ~output_special;
|
||||
assign wb.subnormal = result_is_subnormal & ~output_special;
|
||||
assign wb.right_shift = (result_is_subnormal | result_safe) & ~output_special;
|
||||
//If the result is subnormal, right shift frac by 1 extra position
|
||||
assign wb.right_shift_amt = result_is_subnormal ? result_expo+1 : 1;
|
||||
|
||||
//FMA args
|
||||
assign add_id = id[2];
|
||||
assign add_args.rm = rm[2];
|
||||
assign add_args.single = d2s[2];
|
||||
assign add_args.add = fma_info[2].add_sign;
|
||||
assign add_args.rs1_expo_overflow = wb.expo_overflow;
|
||||
assign add_args.fp_add_grs = wb.grs;
|
||||
assign add_args.rs1.d.sign = wb.rd.d.sign ^ fma_info[2].mul_sign;
|
||||
assign add_args.rs1.d.expo = result_expo_is_negative ? '0 : wb.rd.d.expo;
|
||||
assign add_args.rs1.d.frac = wb.rd.d.frac;
|
||||
assign add_args.rs1_hidden = wb.hidden;
|
||||
assign add_args.rs1_safe = wb.safe & ~subnormal_zero[2];
|
||||
assign add_args.rs1_special_case.zero = true_zero[2] | subnormal_zero[2];
|
||||
assign add_args.rs1_special_case.inf = inf[2];
|
||||
assign add_args.rs1_special_case.qnan = qnan[2];
|
||||
assign add_args.rs1_special_case.snan = nv[2];
|
||||
|
||||
assign add_args.rs2 = fma_info[2].rs3;
|
||||
assign add_args.rs2_hidden = fma_info[2].rs3_hidden;
|
||||
assign add_args.rs2_safe = 0;
|
||||
assign add_args.rs2_special_case = fma_info[2].rs3_special_case;
|
||||
|
||||
//Compare exponents for swapping
|
||||
logic rs3_add;
|
||||
logic[EXPO_WIDTH+1:0] expo_diff;
|
||||
logic[EXPO_WIDTH:0] expo_diff_negate;
|
||||
logic[EXPO_WIDTH+1:0] expo_diff_rs1;
|
||||
|
||||
assign rs3_add = ~fma_info[2].rs3_hidden;
|
||||
assign expo_diff_rs1 = result_expo_is_negative & ~output_special ? result_expo_diff : {1'b0, wb.expo_overflow, wb.rd.d.expo};
|
||||
assign expo_diff = expo_diff_rs1 - ({2'b0, fma_info[2].rs3.d.expo} + {{(EXPO_WIDTH){1'b0}}, 1'b0, rs3_add});
|
||||
assign expo_diff_negate = -expo_diff[EXPO_WIDTH:0];
|
||||
assign add_args.expo_diff = expo_diff[EXPO_WIDTH+1] ? expo_diff_negate : expo_diff[EXPO_WIDTH:0];
|
||||
assign add_args.swap = expo_diff[EXPO_WIDTH+1];
|
||||
|
||||
endmodule
|
410
core/execution_units/fp_unit/fp_normalize_rounding_top.sv
Normal file
410
core/execution_units/fp_unit/fp_normalize_rounding_top.sv
Normal file
|
@ -0,0 +1,410 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_normalize_rounding_top
|
||||
|
||||
import cva5_config::*;
|
||||
import fpu_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
#(
|
||||
parameter int unsigned NUM_WB_UNITS = 4
|
||||
)(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
fp_intermediate_wb_interface.wb intermediate_wb[NUM_WB_UNITS-1:0], //Priority order highest to lowest
|
||||
unit_writeback_interface.unit wb,
|
||||
output fflags_t fflags
|
||||
);
|
||||
|
||||
localparam SHIFT_WIDTH = 3+FRAC_WIDTH+GRS_WIDTH;
|
||||
|
||||
function logic[SHIFT_WIDTH-1:0] reverse(input logic[SHIFT_WIDTH-1:0] in);
|
||||
foreach(in[i])
|
||||
reverse[i] = in[SHIFT_WIDTH-1-i];
|
||||
endfunction
|
||||
|
||||
typedef struct packed {
|
||||
id_t id;
|
||||
logic valid;
|
||||
fp_t data;
|
||||
logic expo_overflow;
|
||||
fflags_t fflags;
|
||||
rm_t rm;
|
||||
logic d2s;
|
||||
logic carry;
|
||||
logic safe;
|
||||
logic hidden;
|
||||
grs_t grs;
|
||||
fp_shift_amt_t clz;
|
||||
logic subnormal;
|
||||
logic right_shift;
|
||||
fp_shift_amt_t right_shift_amt;
|
||||
logic ignore_max_expo;
|
||||
} fp_normalize_packet_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
id_t id;
|
||||
fflags_t fflags;
|
||||
rm_t rm;
|
||||
logic d2s;
|
||||
logic sign_norm;
|
||||
expo_d_t expo_norm;
|
||||
logic expo_overflow_norm;
|
||||
logic right_shift;
|
||||
fp_shift_amt_t shift_amt;
|
||||
logic sp_overflow;
|
||||
logic[EXPO_WIDTH_F-1:0] sp_expo;
|
||||
logic[SHIFT_WIDTH-1:0] shifter_in;
|
||||
} fp_shift_packet_t;
|
||||
|
||||
typedef struct packed {
|
||||
id_t id;
|
||||
logic valid;
|
||||
fp_t data;
|
||||
logic expo_overflow;
|
||||
logic hidden;
|
||||
rm_t rm;
|
||||
fflags_t fflags;
|
||||
logic d2s;
|
||||
logic round_lsb;
|
||||
logic[2:0] round_grs;
|
||||
logic[1:0] tiny_rs;
|
||||
} fp_round_packet_t;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//First chooses a writeback request
|
||||
//Then normalizes through shifting and rounds
|
||||
logic advance_norm;
|
||||
logic advance_shift;
|
||||
logic advance_round;
|
||||
fp_normalize_packet_t normalize_packet;
|
||||
fp_shift_packet_t shift_packet;
|
||||
fp_round_packet_t round_packet;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Writeback
|
||||
//Chooses a writeback request in descending priority order
|
||||
//First unpacks interface signals so they can be dynamically indexed
|
||||
logic[$clog2(NUM_WB_UNITS)-1:0] unit_sel;
|
||||
//TODO: false circular dependency because misc_wb2fp uses ack as ready
|
||||
//unit_done[2:0] -> unit_ack[3] -> wb2fp.ack -> wb2fp.ready -> issue_to[4] -> wb2fp.new_request -> unit_done[3]
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
logic[NUM_WB_UNITS-1:0] unit_ack;
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
id_t[NUM_WB_UNITS-1:0] unit_instruction_id;
|
||||
logic[NUM_WB_UNITS-1:0] unit_done;
|
||||
fp_t[NUM_WB_UNITS-1:0] unit_rd;
|
||||
logic[NUM_WB_UNITS-1:0] unit_expo_overflow;
|
||||
fflags_t[NUM_WB_UNITS-1:0] unit_fflags;
|
||||
rm_t[NUM_WB_UNITS-1:0] unit_rm;
|
||||
logic[NUM_WB_UNITS-1:0] unit_carry;
|
||||
logic[NUM_WB_UNITS-1:0] unit_safe;
|
||||
logic[NUM_WB_UNITS-1:0] unit_hidden;
|
||||
grs_t[NUM_WB_UNITS-1:0] unit_grs;
|
||||
fp_shift_amt_t[NUM_WB_UNITS-1:0] unit_clz;
|
||||
logic[NUM_WB_UNITS-1:0] unit_right_shift;
|
||||
fp_shift_amt_t[NUM_WB_UNITS-1:0] unit_right_shift_amt;
|
||||
logic[NUM_WB_UNITS-1:0] unit_subnormal;
|
||||
logic[NUM_WB_UNITS-1:0] unit_ignore_max_expo;
|
||||
logic[NUM_WB_UNITS-1:0] unit_d2s;
|
||||
|
||||
generate for (genvar i = 0; i < NUM_WB_UNITS; i++) begin : gen_unpack
|
||||
assign intermediate_wb[i].ack = unit_ack[i];
|
||||
assign unit_instruction_id[i] = intermediate_wb[i].id;
|
||||
assign unit_done[i] = intermediate_wb[i].done;
|
||||
assign unit_rd[i] = intermediate_wb[i].rd;
|
||||
assign unit_expo_overflow[i] = intermediate_wb[i].expo_overflow;
|
||||
assign unit_fflags[i] = intermediate_wb[i].fflags;
|
||||
assign unit_rm[i] = intermediate_wb[i].rm;
|
||||
assign unit_carry[i] = intermediate_wb[i].carry;
|
||||
assign unit_safe[i] = intermediate_wb[i].safe;
|
||||
assign unit_hidden[i] = intermediate_wb[i].hidden;
|
||||
assign unit_grs[i] = intermediate_wb[i].grs;
|
||||
assign unit_clz[i] = intermediate_wb[i].clz;
|
||||
assign unit_right_shift[i] = intermediate_wb[i].right_shift;
|
||||
assign unit_right_shift_amt[i] = intermediate_wb[i].right_shift_amt;
|
||||
assign unit_subnormal[i] = intermediate_wb[i].subnormal;
|
||||
assign unit_ignore_max_expo[i] = intermediate_wb[i].ignore_max_expo;
|
||||
assign unit_d2s[i] = intermediate_wb[i].d2s;
|
||||
end endgenerate
|
||||
|
||||
//Per-ID muxes for commit buffer
|
||||
always_comb begin
|
||||
unit_sel = $bits(unit_sel)'(NUM_WB_UNITS-1); //Must default to lowest priority because any other units override
|
||||
for (int i = NUM_WB_UNITS-2; i >= 0; i--) begin
|
||||
if (unit_done[i])
|
||||
unit_sel = $bits(unit_sel)'(i);
|
||||
end
|
||||
|
||||
unit_ack = '0;
|
||||
unit_ack[unit_sel] = advance_norm;
|
||||
end
|
||||
|
||||
//Advance logic
|
||||
assign advance_norm = advance_shift | ~normalize_packet.valid;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
normalize_packet.valid <= 0;
|
||||
else if (advance_norm)
|
||||
normalize_packet.valid <= |unit_done;
|
||||
|
||||
if (advance_norm) begin
|
||||
normalize_packet.id <= unit_instruction_id[unit_sel];
|
||||
normalize_packet.data <= unit_rd[unit_sel];
|
||||
normalize_packet.expo_overflow <= unit_expo_overflow[unit_sel];
|
||||
normalize_packet.fflags <= unit_fflags[unit_sel];
|
||||
normalize_packet.rm <= unit_rm[unit_sel];
|
||||
normalize_packet.d2s <= unit_d2s[unit_sel];
|
||||
normalize_packet.carry <= unit_carry[unit_sel];
|
||||
normalize_packet.safe <= unit_safe[unit_sel];
|
||||
normalize_packet.hidden <= unit_hidden[unit_sel];
|
||||
normalize_packet.grs <= unit_grs[unit_sel];
|
||||
normalize_packet.clz <= unit_clz[unit_sel];
|
||||
normalize_packet.subnormal <= unit_subnormal[unit_sel];
|
||||
normalize_packet.right_shift <= unit_right_shift[unit_sel];
|
||||
normalize_packet.right_shift_amt <= unit_right_shift_amt[unit_sel];
|
||||
normalize_packet.ignore_max_expo <= unit_ignore_max_expo[unit_sel];
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Normalization
|
||||
//Determine the shift amount and direction according to the exponent
|
||||
//Potentially flip the mantissa
|
||||
logic right_shift;
|
||||
logic dp_overflow;
|
||||
logic sp_overflow;
|
||||
fp_shift_amt_t shift_amt;
|
||||
expo_d_t dp_expo;
|
||||
expo_s_t sp_expo;
|
||||
logic[SHIFT_WIDTH-1:0] in_left;
|
||||
logic[SHIFT_WIDTH-1:0] in_right;
|
||||
|
||||
fp_prenormalize normalize_inst(
|
||||
.single(normalize_packet.d2s),
|
||||
.right_shift_in(normalize_packet.right_shift),
|
||||
.overflow_in(normalize_packet.expo_overflow),
|
||||
.subnormal(normalize_packet.subnormal),
|
||||
.expo_in(normalize_packet.data.d.expo),
|
||||
.ignore_max_expo(normalize_packet.ignore_max_expo),
|
||||
.left_shift_amt(normalize_packet.clz),
|
||||
.right_shift_amt(normalize_packet.right_shift_amt),
|
||||
|
||||
.right_shift_out(right_shift),
|
||||
.dp_overflow_out(dp_overflow),
|
||||
.sp_overflow_out(sp_overflow),
|
||||
.shift_amt_out(shift_amt),
|
||||
.dp_expo_out(dp_expo),
|
||||
.sp_expo_out(sp_expo)
|
||||
);
|
||||
|
||||
//Shifter input
|
||||
assign in_right = {normalize_packet.carry, normalize_packet.safe, normalize_packet.hidden, normalize_packet.data.d.frac, normalize_packet.grs};
|
||||
assign in_left = reverse(in_right);
|
||||
|
||||
//Advance logic
|
||||
assign advance_shift = advance_round | ~shift_packet.valid;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
shift_packet.valid <= 0;
|
||||
else if (advance_shift)
|
||||
shift_packet.valid <= normalize_packet.valid;
|
||||
|
||||
if (advance_shift) begin
|
||||
shift_packet.sign_norm <= normalize_packet.data.d.sign;
|
||||
shift_packet.rm <= normalize_packet.rm;
|
||||
shift_packet.id <= normalize_packet.id;
|
||||
shift_packet.fflags <= normalize_packet.fflags;
|
||||
shift_packet.d2s <= normalize_packet.d2s;
|
||||
shift_packet.right_shift <= right_shift;
|
||||
shift_packet.expo_overflow_norm <= dp_overflow;
|
||||
shift_packet.sp_overflow <= sp_overflow;
|
||||
shift_packet.shift_amt <= shift_amt;
|
||||
shift_packet.expo_norm <= dp_expo;
|
||||
shift_packet.sp_expo <= sp_expo;
|
||||
shift_packet.shifter_in <= right_shift ? in_right : in_left;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Shifting and Roundup
|
||||
//Extremely wide right shifter, output is flipped for left shifts
|
||||
//Extracts the bits used for determining rounding
|
||||
logic[SHIFT_WIDTH-1:0] shift_intermediate;
|
||||
logic[SHIFT_WIDTH-1:0] shift_final;
|
||||
grs_t grs_norm;
|
||||
frac_d_t frac_norm;
|
||||
logic round_lsb;
|
||||
logic[2:0] round_grs;
|
||||
logic[1:0] tiny_rs;
|
||||
|
||||
assign shift_intermediate = shift_packet.shifter_in >> shift_packet.shift_amt;
|
||||
assign shift_final = shift_packet.right_shift ? shift_intermediate : reverse(shift_intermediate);
|
||||
assign grs_norm = shift_final[GRS_WIDTH-1:0];
|
||||
assign frac_norm = shift_final[GRS_WIDTH+FRAC_WIDTH-1:GRS_WIDTH];
|
||||
|
||||
//Right shifts may lose sticky bits - keep track
|
||||
logic sticky;
|
||||
logic set_sticky;
|
||||
assign set_sticky = sticky & shift_packet.right_shift;
|
||||
fp_sticky_tracking #(.INPUT_WIDTH(SHIFT_WIDTH), .SHIFT_WIDTH(EXPO_WIDTH)) right_sticky (
|
||||
.shifter_input(shift_packet.shifter_in),
|
||||
.shift_amount(shift_packet.shift_amt),
|
||||
.sticky_bit(sticky)
|
||||
);
|
||||
|
||||
//GRS extraction for rounding
|
||||
//RISC-V specifies that tininess must be detected after rounding, as opposed to before. They only differ on underflow for +-2^-EMIN.
|
||||
//IEEE 754 states that we must therefore determine tininess as if the exponent range was unbounded (but not the fraction)
|
||||
//Therefore, we must undo the right shift of 1 to fit the exponent range when determining the roundup
|
||||
always_comb begin
|
||||
if (shift_packet.d2s) begin
|
||||
round_lsb = frac_norm[FRAC_WIDTH-FRAC_WIDTH_F];
|
||||
round_grs[2:1] = frac_norm[FRAC_WIDTH-FRAC_WIDTH_F-1-:2];
|
||||
round_grs[0] = |frac_norm[FRAC_WIDTH-FRAC_WIDTH_F-3:0] | |grs_norm | set_sticky;
|
||||
tiny_rs[1] = frac_norm[FRAC_WIDTH-FRAC_WIDTH_F-3];
|
||||
tiny_rs[0] = |frac_norm[FRAC_WIDTH-FRAC_WIDTH_F-4:0] | |grs_norm | set_sticky;
|
||||
end
|
||||
else begin
|
||||
round_lsb = frac_norm[0];
|
||||
round_grs[2:1] = grs_norm[GRS_WIDTH-1-:2];
|
||||
round_grs[0] = |grs_norm[GRS_WIDTH-3:0] | set_sticky;
|
||||
tiny_rs[1] = grs_norm[GRS_WIDTH-3];
|
||||
tiny_rs[0] = |grs_norm[GRS_WIDTH-4:0] | set_sticky;
|
||||
end
|
||||
end
|
||||
|
||||
//Advance logic
|
||||
assign advance_round = wb.ack | ~round_packet.valid;
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
round_packet.valid <= 0;
|
||||
else if (advance_round)
|
||||
round_packet.valid <= shift_packet.valid;
|
||||
|
||||
if (advance_round) begin
|
||||
round_packet.hidden <= shift_final[GRS_WIDTH+FRAC_WIDTH];
|
||||
round_packet.id <= shift_packet.id;
|
||||
round_packet.rm <= shift_packet.rm;
|
||||
round_packet.d2s <= shift_packet.d2s;
|
||||
round_packet.round_lsb <= round_lsb;
|
||||
round_packet.round_grs <= round_grs;
|
||||
round_packet.tiny_rs <= tiny_rs;
|
||||
round_packet.data.d.sign <= shift_packet.sign_norm;
|
||||
round_packet.fflags.nv <= shift_packet.fflags.nv;
|
||||
round_packet.fflags.dz <= shift_packet.fflags.dz;
|
||||
round_packet.fflags.of <= shift_packet.fflags.of;
|
||||
round_packet.fflags.uf <= shift_packet.fflags.uf;
|
||||
round_packet.fflags.nx <= shift_packet.fflags.nx | |round_grs;
|
||||
|
||||
if (shift_packet.d2s) begin
|
||||
round_packet.expo_overflow <= shift_packet.sp_overflow;
|
||||
round_packet.data.d.expo <= {{(EXPO_WIDTH-EXPO_WIDTH_F){1'b1}}, shift_packet.sp_expo}; //Allow the roundup to propagate to overflow
|
||||
round_packet.data.d.frac <= {frac_norm[FRAC_WIDTH-1-:FRAC_WIDTH_F], {(FRAC_WIDTH-FRAC_WIDTH_F){1'b1}}};
|
||||
end
|
||||
else begin
|
||||
round_packet.expo_overflow <= shift_packet.expo_overflow_norm;
|
||||
round_packet.data.d.expo <= shift_packet.expo_norm;
|
||||
round_packet.data.d.frac <= frac_norm;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Rounding
|
||||
//Perform the rounding by adding based on the saved bits from the previous cycle
|
||||
//Also detects overflow
|
||||
logic frac_overflow;
|
||||
frac_d_t frac_out;
|
||||
expo_d_t expo_out;
|
||||
logic overflow_exp;
|
||||
fp_t rd;
|
||||
logic roundup;
|
||||
logic roundup_tiny;
|
||||
fp_t result_if_overflow;
|
||||
|
||||
fp_roundup real_round (
|
||||
.sign(round_packet.data.d.sign),
|
||||
.rm(round_packet.rm),
|
||||
.grs(round_packet.round_grs),
|
||||
.lsb(round_packet.round_lsb),
|
||||
.roundup(roundup),
|
||||
.result_if_overflow(result_if_overflow)
|
||||
);
|
||||
|
||||
fp_roundup tininess_round (
|
||||
.sign(round_packet.data.d.sign),
|
||||
.rm(round_packet.rm),
|
||||
.grs({round_packet.round_grs[1], round_packet.tiny_rs}),
|
||||
.lsb(round_packet.round_grs[2]),
|
||||
.roundup(roundup_tiny),
|
||||
.result_if_overflow()
|
||||
);
|
||||
|
||||
assign {frac_overflow, frac_out} = round_packet.data.d.frac + (FRAC_WIDTH)'(roundup);
|
||||
assign expo_out = round_packet.data.d.expo + EXPO_WIDTH'(frac_overflow);
|
||||
|
||||
//Compute exponent overflow due to rounding in parallel with roundup addition
|
||||
assign overflow_exp = (frac_overflow & &round_packet.data.d.expo[EXPO_WIDTH-1:1]) | round_packet.expo_overflow;
|
||||
|
||||
//Output
|
||||
assign wb.id = round_packet.id;
|
||||
assign wb.done = round_packet.valid;
|
||||
assign wb.rd = rd.raw;
|
||||
always_comb begin
|
||||
if (overflow_exp) begin
|
||||
//Convert dp overflow value to sp
|
||||
if (round_packet.d2s) begin
|
||||
rd.s.box = '1;
|
||||
rd.s.sign = result_if_overflow.d.sign;
|
||||
rd.s.expo = result_if_overflow.d.expo[EXPO_WIDTH_F-1:0];
|
||||
rd.s.frac = result_if_overflow.d.frac[FRAC_WIDTH_F-1:0];
|
||||
end
|
||||
else
|
||||
rd = result_if_overflow;
|
||||
end
|
||||
else if (round_packet.d2s) begin
|
||||
rd.s.box = '1;
|
||||
rd.s.sign = round_packet.data.d.sign;
|
||||
rd.s.expo = expo_out[EXPO_WIDTH_F-1:0];
|
||||
rd.s.frac = frac_out[FRAC_WIDTH-1-:FRAC_WIDTH_F];
|
||||
end
|
||||
else begin
|
||||
rd.d.sign = round_packet.data.d.sign;
|
||||
rd.d.expo = expo_out;
|
||||
rd.d.frac = frac_out;
|
||||
end
|
||||
end
|
||||
|
||||
assign fflags.nv = round_packet.fflags.nv;
|
||||
assign fflags.dz = round_packet.fflags.dz;
|
||||
assign fflags.of = round_packet.fflags.of | ~round_packet.fflags.nv & overflow_exp;
|
||||
//Underflow only occurs if inexact
|
||||
assign fflags.uf = round_packet.fflags.uf | (~round_packet.fflags.nv & round_packet.fflags.nx & ~round_packet.hidden & (~frac_overflow | ~(round_packet.round_grs[2] & roundup_tiny)));
|
||||
//Overflow is inexact
|
||||
assign fflags.nx = round_packet.fflags.nx | ~round_packet.fflags.nv & overflow_exp;
|
||||
|
||||
endmodule
|
109
core/execution_units/fp_unit/fp_prenormalize.sv
Normal file
109
core/execution_units/fp_unit/fp_prenormalize.sv
Normal file
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_prenormalize
|
||||
|
||||
import cva5_config::*;
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic single,
|
||||
input logic right_shift_in,
|
||||
input logic overflow_in,
|
||||
input logic subnormal,
|
||||
input expo_d_t expo_in,
|
||||
input logic ignore_max_expo,
|
||||
input fp_shift_amt_t left_shift_amt,
|
||||
input fp_shift_amt_t right_shift_amt,
|
||||
|
||||
output logic right_shift_out,
|
||||
output logic dp_overflow_out,
|
||||
output logic sp_overflow_out,
|
||||
output fp_shift_amt_t shift_amt_out,
|
||||
output expo_d_t dp_expo_out,
|
||||
output expo_s_t sp_expo_out
|
||||
);
|
||||
|
||||
logic[EXPO_WIDTH:0] starting_expo;
|
||||
assign starting_expo = {overflow_in, expo_in};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Double precision
|
||||
//Left shifts are capped at reducing the exponent to 0
|
||||
//Right shifts increment the exponent except when subnormal
|
||||
logic expo_less_than_left_shift_amt;
|
||||
fp_shift_amt_t left_shift_amt_adjusted;
|
||||
logic[EXPO_WIDTH:0] expo_norm_left_shift_intermediate;
|
||||
logic[EXPO_WIDTH:0] expo_norm_left_shift;
|
||||
logic[EXPO_WIDTH:0] expo_norm_right_shift;
|
||||
logic dp_overflow_intermediate;
|
||||
|
||||
//Left shift logic - cap the left shift amount to the exponent if it would turn negative
|
||||
assign {expo_less_than_left_shift_amt, expo_norm_left_shift_intermediate} = {starting_expo & {(EXPO_WIDTH+1){~subnormal}}} - (EXPO_WIDTH+1)'(left_shift_amt); //drive to zero if subnormal
|
||||
assign left_shift_amt_adjusted = expo_less_than_left_shift_amt ? expo_in : left_shift_amt;
|
||||
assign expo_norm_left_shift = expo_less_than_left_shift_amt ? '0 : expo_norm_left_shift_intermediate;
|
||||
|
||||
//Right shift logic - exponent is zero if subnormal
|
||||
assign expo_norm_right_shift = subnormal ? '0 : starting_expo + (EXPO_WIDTH+1)'(right_shift_amt);
|
||||
|
||||
//Select the final double precision exponent and overflow value
|
||||
assign {dp_overflow_intermediate, dp_expo_out} = right_shift_in ? expo_norm_right_shift : expo_norm_left_shift;
|
||||
assign dp_overflow_out = dp_overflow_intermediate | (~ignore_max_expo & &dp_expo_out);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Single precision
|
||||
//Normal double numbers map onto the subnormal single range
|
||||
//This means left shifts may turn into right shifts
|
||||
logic[EXPO_WIDTH-1:0] single_shift_amt;
|
||||
logic[EXPO_WIDTH-1:0] expo_sum;
|
||||
logic shift_sign;
|
||||
|
||||
always_comb begin
|
||||
single_shift_amt = right_shift_in ? right_shift_amt : -left_shift_amt;
|
||||
expo_sum = expo_in + single_shift_amt;
|
||||
sp_overflow_out = overflow_in | (expo_sum > BIAS+BIAS_F & ~&expo_sum); //All 1 = NaN/infinity but not an overflow
|
||||
|
||||
//Determine SP expo and shift amount due to subnormal numbers
|
||||
sp_expo_out = '0;
|
||||
if (expo_sum <= BIAS-BIAS_F && expo_sum > BIAS-BIAS_F-FRAC_WIDTH_F-3)
|
||||
single_shift_amt += (BIAS-BIAS_F+1) - expo_sum;
|
||||
else if (expo_sum <= BIAS-BIAS_F-FRAC_WIDTH_F-3) //Cap shift amount to prevent losing the sticky bit entirely
|
||||
single_shift_amt += FRAC_WIDTH_F+3;
|
||||
else //Maps onto regular range
|
||||
sp_expo_out = {expo_sum[EXPO_WIDTH-1], expo_sum[EXPO_WIDTH_F-2:0]};
|
||||
|
||||
shift_sign = single_shift_amt[EXPO_WIDTH-1];
|
||||
if (shift_sign)
|
||||
single_shift_amt = -single_shift_amt;
|
||||
|
||||
if (single) begin
|
||||
right_shift_out = ~shift_sign;
|
||||
shift_amt_out = single_shift_amt;
|
||||
end
|
||||
else begin
|
||||
right_shift_out = right_shift_in;
|
||||
shift_amt_out = right_shift_in ? right_shift_amt : left_shift_amt_adjusted;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
446
core/execution_units/fp_unit/fp_preprocessing.sv
Normal file
446
core/execution_units/fp_unit/fp_preprocessing.sv
Normal file
|
@ -0,0 +1,446 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_preprocessing
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
|
||||
#(
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG,
|
||||
parameter FP_NUM_UNITS = 5
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
unit_issue_interface.decode unit_issue[FP_NUM_UNITS-1:0],
|
||||
|
||||
//Unit Inputs
|
||||
input fp_preprocessing_packet_t pkt,
|
||||
|
||||
output logic ready,
|
||||
output fp_madd_inputs_t madd_args,
|
||||
output fp_div_inputs_t div_args,
|
||||
output fp_sqrt_inputs_t sqrt_args,
|
||||
output fp_wb2fp_misc_inputs_t wb2fp_args,
|
||||
output fp_wb2int_misc_inputs_t wb2int_args
|
||||
);
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Control Logic
|
||||
//Cycle 0 has combinational speculative preprocessing that is registered on valid requests
|
||||
//Cycle 1 has some additional preprocessing and also issues the instruction
|
||||
id_t id_r;
|
||||
rm_t rm_r;
|
||||
logic single;
|
||||
logic single_r;
|
||||
logic[FP_NUM_UNITS-1:0] target_unit;
|
||||
logic[FP_NUM_UNITS-1:0] issue_to;
|
||||
logic[FP_NUM_UNITS-1:0] unit_ready;
|
||||
logic accept_request;
|
||||
logic stage2_valid;
|
||||
logic stage2_advance;
|
||||
|
||||
//Unpack interface array
|
||||
generate for (genvar i = 0; i < FP_NUM_UNITS; i++) begin : gen_interface_unpack
|
||||
assign unit_ready[i] = unit_issue[i].ready;
|
||||
assign unit_issue[i].new_request = issue_to[i];
|
||||
assign unit_issue[i].id = id_r;
|
||||
end endgenerate
|
||||
|
||||
assign stage2_advance = stage2_valid & |(unit_ready & target_unit);
|
||||
assign issue_to = target_unit & {FP_NUM_UNITS{stage2_advance}};
|
||||
assign ready = ~stage2_valid | stage2_advance;
|
||||
assign accept_request = ready & pkt.valid;
|
||||
|
||||
assign single = pkt.is_single;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
target_unit <= '0;
|
||||
stage2_valid <= 0;
|
||||
end
|
||||
else begin
|
||||
if (accept_request) begin
|
||||
target_unit <= pkt.unit;
|
||||
stage2_valid <= 1;
|
||||
end
|
||||
else if (stage2_advance)
|
||||
stage2_valid <= 0;
|
||||
end
|
||||
|
||||
if (accept_request) begin
|
||||
id_r <= pkt.id;
|
||||
rm_r <= pkt.rm;
|
||||
single_r <= single;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Cycle 0 preprocessing
|
||||
//Single to double, normalization, and special case detection
|
||||
//Also computes whether the arguments should be swapped
|
||||
fp_t rs1, rs1_r;
|
||||
fp_t rs2, rs2_r;
|
||||
fp_t rs3, rs3_r;
|
||||
special_case_t[2:0] special_case, special_case_r;
|
||||
logic[2:0] hidden, hidden_r;
|
||||
logic[0:0] hidden_single;
|
||||
logic[1:0] hidden_double;
|
||||
fp_t[2:0] rs_converted;
|
||||
logic rs1_boxed, rs2_boxed;
|
||||
fp_shift_amt_t rs1_norm_shift, rs1_norm_shift_r;
|
||||
fp_shift_amt_t rs2_norm_shift, rs2_norm_shift_r;
|
||||
frac_d_t rs1_norm_frac, rs1_norm_frac_r;
|
||||
frac_d_t rs2_norm_frac, rs2_norm_frac_r;
|
||||
|
||||
assign rs1 = pkt.rs1;
|
||||
assign rs2 = pkt.rs2;
|
||||
assign rs3 = pkt.rs3;
|
||||
|
||||
//Unit instantiation
|
||||
fp_rs_preprocess #(.CONFIG(CONFIG)) rs1_pre (
|
||||
.in(rs1),
|
||||
.single(single),
|
||||
.double(rs_converted[0]),
|
||||
.special(special_case[0]),
|
||||
.is_boxed(rs1_boxed),
|
||||
.hidden(hidden[0]),
|
||||
.hidden_double(hidden_double[0]),
|
||||
.hidden_single(hidden_single[0]),
|
||||
.prenormalize_shift(rs1_norm_shift),
|
||||
.prenormalize_frac(rs1_norm_frac)
|
||||
);
|
||||
|
||||
fp_rs_preprocess #(.CONFIG(CONFIG)) rs2_pre (
|
||||
.in(rs2),
|
||||
.single(single),
|
||||
.double(rs_converted[1]),
|
||||
.special(special_case[1]),
|
||||
.is_boxed(rs2_boxed),
|
||||
.hidden(hidden[1]),
|
||||
.hidden_double(hidden_double[1]),
|
||||
.hidden_single(),
|
||||
.prenormalize_shift(rs2_norm_shift),
|
||||
.prenormalize_frac(rs2_norm_frac)
|
||||
);
|
||||
|
||||
fp_rs_preprocess #(.CONFIG(CONFIG)) rs3_pre (
|
||||
.in(rs3),
|
||||
.single(single),
|
||||
.double(rs_converted[2]),
|
||||
.special(special_case[2]),
|
||||
.is_boxed(),
|
||||
.hidden(hidden[2]),
|
||||
.hidden_double(),
|
||||
.hidden_single(),
|
||||
.prenormalize_shift(),
|
||||
.prenormalize_frac()
|
||||
);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (accept_request) begin
|
||||
rs1_r <= single ? rs_converted[0] : rs1;
|
||||
rs2_r <= single ? rs_converted[1] : rs2;
|
||||
rs3_r <= single ? rs_converted[2] : rs3;
|
||||
special_case_r <= special_case;
|
||||
hidden_r <= hidden;
|
||||
rs1_norm_shift_r <= rs1_norm_shift;
|
||||
rs2_norm_shift_r <= rs2_norm_shift;
|
||||
rs1_norm_frac_r <= rs1_norm_frac;
|
||||
rs2_norm_frac_r <= rs2_norm_frac;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
//Swap calculation
|
||||
logic[EXPO_WIDTH:0] expo_diff;
|
||||
logic swap, swap_r;
|
||||
logic rs1_smaller_mantissa;
|
||||
expo_d_t rs1_expo_padded;
|
||||
expo_d_t rs2_expo_padded;
|
||||
logic[FRAC_WIDTH_F-1:0] rs1_mant;
|
||||
logic[FRAC_WIDTH_F-1:0] rs2_mant;
|
||||
|
||||
assign swap = expo_diff[EXPO_WIDTH] ? 1 : |expo_diff[EXPO_WIDTH-1:0] ? 0 : rs1_smaller_mantissa;
|
||||
|
||||
assign rs1_expo_padded[EXPO_WIDTH-1:EXPO_WIDTH_F] = '0;
|
||||
assign rs2_expo_padded[EXPO_WIDTH-1:EXPO_WIDTH_F] = '0;
|
||||
|
||||
//The exponent comparison checks boxing because the minmax instruction assumes NaNs are the larger operand
|
||||
assign rs1_expo_padded[EXPO_WIDTH_F-1:0] = rs1_boxed ? rs1.s.expo : '1;
|
||||
assign rs2_expo_padded[EXPO_WIDTH_F-1:0] = rs2_boxed ? rs2.s.expo : '1;
|
||||
//For the mantissa, all that is required is inf < snan/qnan
|
||||
assign rs1_mant = {~rs1_boxed | rs1.s.frac[FRAC_WIDTH_F-1], rs1.s.frac[FRAC_WIDTH_F-2:0]};
|
||||
assign rs2_mant = {~rs2_boxed | rs2.s.frac[FRAC_WIDTH_F-1], rs2.s.frac[FRAC_WIDTH_F-2:0]};
|
||||
|
||||
always_comb begin
|
||||
if (single) begin
|
||||
rs1_smaller_mantissa = rs1_mant < rs2_mant;
|
||||
expo_diff = rs1_expo_padded - rs2_expo_padded;
|
||||
end
|
||||
else begin
|
||||
rs1_smaller_mantissa = rs1.d.frac < rs2.d.frac;
|
||||
expo_diff = rs1.d.expo - rs2.d.expo;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (accept_request)
|
||||
swap_r <= swap;
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//Cycle 1 swap
|
||||
//After the swap RS1 will hold the larger argument
|
||||
fp_t rs1_norm;
|
||||
fp_t rs2_norm;
|
||||
fp_t rs1_swapped;
|
||||
fp_t rs2_swapped;
|
||||
fp_shift_amt_t rs2_swapped_shift;
|
||||
logic rs1_swapped_hidden;
|
||||
logic rs2_swapped_hidden;
|
||||
|
||||
always_comb begin
|
||||
rs1_norm.d.sign = rs1_r.d.sign;
|
||||
rs1_norm.d.expo = rs1_r.d.expo;
|
||||
rs1_norm.d.frac = rs1_norm_frac_r;
|
||||
rs2_norm.d.sign = rs2_r.d.sign;
|
||||
rs2_norm.d.expo = rs2_r.d.expo;
|
||||
rs2_norm.d.frac = rs2_norm_frac_r;
|
||||
|
||||
//Do not need to swap special case, because multiplication is the only unit that needs it and the order doesn't matter there
|
||||
if (swap_r) begin
|
||||
{rs1_swapped, rs2_swapped} = {rs2_norm, rs1_norm};
|
||||
{rs1_swapped_hidden, rs2_swapped_hidden} = {hidden_r[1], hidden_r[0]};
|
||||
rs2_swapped_shift = rs1_norm_shift_r;
|
||||
end else begin
|
||||
{rs1_swapped, rs2_swapped} = {rs1_norm, rs2_norm};
|
||||
{rs1_swapped_hidden, rs2_swapped_hidden} = {hidden_r[0], hidden_r[1]};
|
||||
rs2_swapped_shift = rs2_norm_shift_r;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//FMA Unit
|
||||
//Issue cycle FMA
|
||||
logic is_fma_r;
|
||||
logic is_fadd_r;
|
||||
logic add_r;
|
||||
logic neg_mul_r;
|
||||
|
||||
//FMA
|
||||
assign madd_args.fma = is_fma_r;
|
||||
assign madd_args.fma_args.mul_sign = neg_mul_r;
|
||||
assign madd_args.fma_args.add_sign = add_r;
|
||||
assign madd_args.fma_args.rs3 = rs3_r;
|
||||
assign madd_args.fma_args.rs3_hidden = hidden_r[2];
|
||||
assign madd_args.fma_args.rs3_special_case = special_case_r[2];
|
||||
|
||||
//FMUL
|
||||
assign madd_args.mul_args.rs1_special_case = special_case_r[0];
|
||||
assign madd_args.mul_args.rs2_special_case = special_case_r[1];
|
||||
assign madd_args.mul_args.rs1_hidden = rs1_swapped_hidden;
|
||||
assign madd_args.mul_args.rs2_hidden = rs2_swapped_hidden;
|
||||
assign madd_args.mul_args.rs1 = rs1_swapped;
|
||||
assign madd_args.mul_args.rs2 = rs2_swapped;
|
||||
assign madd_args.mul_args.rm = rm_r;
|
||||
assign madd_args.mul_args.single = single_r;
|
||||
assign madd_args.mul_args.rs2_prenormalize_shift_amt = rs2_swapped_shift;
|
||||
|
||||
//FADD
|
||||
logic[EXPO_WIDTH:0] expo_diff_issued;
|
||||
logic[EXPO_WIDTH:0] double_expo_diff;
|
||||
logic[EXPO_WIDTH:0] double_expo_diff_r;
|
||||
|
||||
//Precalculate the double exponent difference, saves time in the next cycle (because the hidden bits don't need to be included)
|
||||
assign double_expo_diff = (rs1.d.expo + {{(EXPO_WIDTH-1){1'b0}}, ~hidden_double[0]}) - (rs2.d.expo + {{(EXPO_WIDTH-1){1'b0}}, ~hidden_double[1]});
|
||||
|
||||
always_comb begin
|
||||
if (single_r)
|
||||
expo_diff_issued = rs1_r.d.expo - rs2_r.d.expo;
|
||||
else
|
||||
expo_diff_issued = double_expo_diff_r;
|
||||
if (swap_r)
|
||||
expo_diff_issued = -expo_diff_issued;
|
||||
end
|
||||
|
||||
assign madd_args.add = is_fadd_r;
|
||||
assign madd_args.add_args.rs1 = rs1_r;
|
||||
assign madd_args.add_args.rs2 = rs2_r;
|
||||
assign madd_args.add_args.rs1_hidden = hidden_r[0];
|
||||
assign madd_args.add_args.rs2_hidden = hidden_r[1];
|
||||
assign madd_args.add_args.rs1_safe = 0;
|
||||
assign madd_args.add_args.rs2_safe = 0;
|
||||
assign madd_args.add_args.rs1_special_case = special_case_r[0];
|
||||
assign madd_args.add_args.rs2_special_case = special_case_r[1];
|
||||
assign madd_args.add_args.rs1_expo_overflow = 0;
|
||||
assign madd_args.add_args.expo_diff = expo_diff_issued;
|
||||
assign madd_args.add_args.add = add_r;
|
||||
assign madd_args.add_args.swap = swap_r;
|
||||
assign madd_args.add_args.fp_add_grs = '0;
|
||||
assign madd_args.add_args.rm = rm_r;
|
||||
assign madd_args.add_args.single = single_r;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (accept_request) begin
|
||||
is_fma_r <= pkt.is_fma;
|
||||
is_fadd_r <= pkt.is_fadd;
|
||||
add_r <= pkt.add;
|
||||
neg_mul_r <= pkt.neg_mul;
|
||||
double_expo_diff_r <= double_expo_diff;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//FDIV
|
||||
assign div_args.rs1 = rs1_norm;
|
||||
assign div_args.rs2 = rs2_norm;
|
||||
assign div_args.rm = rm_r;
|
||||
assign div_args.rs1_hidden = hidden_r[0];
|
||||
assign div_args.rs2_hidden = hidden_r[1];
|
||||
assign div_args.rs1_prenormalize_shift_amt = rs1_norm_shift_r;
|
||||
assign div_args.rs2_prenormalize_shift_amt = rs2_norm_shift_r;
|
||||
assign div_args.single = single_r;
|
||||
assign div_args.rs1_special_case = special_case_r[0];
|
||||
assign div_args.rs2_special_case = special_case_r[1];
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//FSQRT
|
||||
assign sqrt_args.rs1 = rs1_norm;
|
||||
assign sqrt_args.rs1_hidden = hidden_r[0];
|
||||
assign sqrt_args.special_case = special_case_r[0];
|
||||
assign sqrt_args.rs1_prenormalize_shift_amt = rs1_norm_shift_r;
|
||||
assign sqrt_args.rm = rm_r;
|
||||
assign sqrt_args.single = single_r;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//WB2FP
|
||||
//Issue cycle F2I
|
||||
logic rs1_boxed_r;
|
||||
logic rs2_boxed_r;
|
||||
logic[31:0] int_rs_abs;
|
||||
logic[31:0] int_rs_abs_r;
|
||||
logic[31:0] int_rs1_r;
|
||||
logic i2f_sign;
|
||||
logic i2f_sign_r;
|
||||
logic is_i2f_r;
|
||||
logic is_minmax_r;
|
||||
logic is_sign_inj_r;
|
||||
logic is_sign_inj_single_r;
|
||||
logic is_mv_i2f_r;
|
||||
logic is_d2s_r;
|
||||
|
||||
assign i2f_sign = pkt.conv_signed & pkt.int_rs1[31];
|
||||
assign int_rs_abs = i2f_sign ? -pkt.int_rs1 : pkt.int_rs1;
|
||||
|
||||
//Cycle 1 - WB2FP
|
||||
assign wb2fp_args.i2f = is_i2f_r;
|
||||
assign wb2fp_args.fminmax = is_minmax_r;
|
||||
assign wb2fp_args.fsgnj = is_sign_inj_r;
|
||||
assign wb2fp_args.fmv = is_mv_i2f_r;
|
||||
assign wb2fp_args.d2s = is_d2s_r;
|
||||
|
||||
assign wb2fp_args.int_rs = int_rs1_r;
|
||||
assign wb2fp_args.rs1 = rs1_r;
|
||||
assign wb2fp_args.rs1_hidden = hidden_r[0];
|
||||
assign wb2fp_args.rs1_special_case = special_case_r[0];
|
||||
assign wb2fp_args.fsgnj_single = is_sign_inj_single_r;
|
||||
assign wb2fp_args.rs1_boxed = rs1_boxed_r;
|
||||
assign wb2fp_args.rs2_boxed = rs2_boxed_r;
|
||||
assign wb2fp_args.swap = swap_r;
|
||||
assign wb2fp_args.rs2 = rs2_r;
|
||||
assign wb2fp_args.single = single_r;
|
||||
assign wb2fp_args.rm = rm_r;
|
||||
assign wb2fp_args.rs2_special_case = special_case_r[1];
|
||||
assign wb2fp_args.int_rs_abs = int_rs_abs_r;
|
||||
assign wb2fp_args.i2f_sign = i2f_sign_r;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (accept_request) begin
|
||||
rs1_boxed_r <= rs1_boxed;
|
||||
rs2_boxed_r <= rs2_boxed;
|
||||
int_rs1_r <= pkt.int_rs1;
|
||||
int_rs_abs_r <= int_rs_abs;
|
||||
i2f_sign_r <= i2f_sign;
|
||||
is_i2f_r <= pkt.is_i2f;
|
||||
is_minmax_r <= pkt.is_minmax;
|
||||
is_sign_inj_r <= pkt.is_sign_inj;
|
||||
is_sign_inj_single_r <= pkt.is_sign_inj_single;
|
||||
is_mv_i2f_r <= pkt.is_mv_i2f;
|
||||
is_d2s_r <= pkt.is_d2s;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//WB2INT
|
||||
//Issue cycle F2I
|
||||
logic f2i_is_signed_r;
|
||||
logic is_class_r;
|
||||
logic is_fcmp_r;
|
||||
logic is_f2i_r;
|
||||
logic rs1_hidden_single_r;
|
||||
expo_d_t rs1_expo_unbiased;
|
||||
expo_d_t rs1_expo_unbiased_r;
|
||||
logic int_less_than_1;
|
||||
logic int_less_than_1_r;
|
||||
|
||||
//Cycle 0 F2I preprocessing
|
||||
expo_d_t expo_amt;
|
||||
expo_d_t bias_amt;
|
||||
assign expo_amt = single ? {{(EXPO_WIDTH-EXPO_WIDTH_F){1'b0}}, rs1.s.expo} : rs1.d.expo;
|
||||
assign bias_amt = single ? BIAS_F : BIAS;
|
||||
assign {int_less_than_1, rs1_expo_unbiased} = expo_amt - bias_amt;
|
||||
|
||||
//Cycle 1 - WB2INT
|
||||
assign wb2int_args.fclass = is_class_r;
|
||||
assign wb2int_args.fcmp = is_fcmp_r;
|
||||
assign wb2int_args.f2i = is_f2i_r;
|
||||
|
||||
assign wb2int_args.int_less_than_1 = int_less_than_1_r;
|
||||
assign wb2int_args.rs1_expo_unbiased = rs1_expo_unbiased_r;
|
||||
assign wb2int_args.rs1 = rs1_r;
|
||||
assign wb2int_args.rs1_original_hidden_bit = single_r ? rs1_hidden_single_r : hidden_r[0];
|
||||
assign wb2int_args.rs1_special_case = special_case_r[0];
|
||||
assign wb2int_args.rs2_special_case = special_case_r[1];
|
||||
assign wb2int_args.rs2 = rs2_r;
|
||||
assign wb2int_args.swap = swap_r;
|
||||
assign wb2int_args.rm = rm_r;
|
||||
assign wb2int_args.rs1_hidden = hidden_r[0];
|
||||
assign wb2int_args.is_signed = f2i_is_signed_r;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (accept_request) begin
|
||||
f2i_is_signed_r <= pkt.conv_signed;
|
||||
is_class_r <= pkt.is_class;
|
||||
is_fcmp_r <= pkt.is_fcmp;
|
||||
is_f2i_r <= pkt.is_f2i;
|
||||
rs1_hidden_single_r <= hidden_single[0];
|
||||
int_less_than_1_r <= int_less_than_1;
|
||||
rs1_expo_unbiased_r <= rs1_expo_unbiased;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
71
core/execution_units/fp_unit/fp_roundup.sv
Normal file
71
core/execution_units/fp_unit/fp_roundup.sv
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_roundup
|
||||
|
||||
import cva5_config::*;
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic sign,
|
||||
input rm_t rm,
|
||||
input logic[2:0] grs,
|
||||
input logic lsb,
|
||||
output logic roundup,
|
||||
output fp_t result_if_overflow
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
result_if_overflow.d.sign = sign;
|
||||
|
||||
unique case(rm)
|
||||
default: begin //nearest ties to even
|
||||
result_if_overflow.d.expo = '1;
|
||||
result_if_overflow.d.frac = '0;
|
||||
roundup = grs[2] & (lsb | |grs[1:0]);
|
||||
end
|
||||
3'b100: begin //nearest ties to away
|
||||
result_if_overflow.d.expo = '1;
|
||||
result_if_overflow.d.frac = '0;
|
||||
roundup = grs[2];
|
||||
end
|
||||
3'b011: begin //round to positive inf
|
||||
//only round if: positive, has extra bits in grs
|
||||
result_if_overflow.d.expo = {{(EXPO_WIDTH-1){1'b1}}, ~sign};
|
||||
result_if_overflow.d.frac = {FRAC_WIDTH{sign}};
|
||||
roundup = ~sign & |grs;
|
||||
end
|
||||
3'b010: begin //round to negative inf
|
||||
//only round if: negative, has extra bits in grs
|
||||
result_if_overflow.d.expo = {{(EXPO_WIDTH-1){1'b1}}, sign};
|
||||
result_if_overflow.d.frac = {FRAC_WIDTH{~sign}};
|
||||
roundup = sign & |grs;
|
||||
end
|
||||
3'b001: begin //round to zero
|
||||
result_if_overflow.d.expo = {{(EXPO_WIDTH-1){1'b1}}, 1'b0};
|
||||
result_if_overflow.d.frac = '1;
|
||||
roundup = 0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule
|
173
core/execution_units/fp_unit/fp_rs_preprocess.sv
Normal file
173
core/execution_units/fp_unit/fp_rs_preprocess.sv
Normal file
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* Copyright © 2023 Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_rs_preprocess
|
||||
|
||||
import cva5_config::*;
|
||||
import fpu_types::*;
|
||||
|
||||
#(
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input fp_t in, //Packed form
|
||||
input logic single,
|
||||
output fp_t double, //Only valid if input was single
|
||||
|
||||
//Special cases
|
||||
output special_case_t special,
|
||||
output logic is_boxed,
|
||||
output logic hidden,
|
||||
output logic hidden_double,
|
||||
output logic hidden_single,
|
||||
|
||||
//Pre normalization
|
||||
output fp_shift_amt_t prenormalize_shift,
|
||||
output frac_d_t prenormalize_frac
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Special case detection
|
||||
//Depends on the type of the input
|
||||
//Single precision must check NaN boxing
|
||||
logic inf_d, inf_s;
|
||||
logic snan_d, snan_s;
|
||||
logic qnan_d, qnan_s;
|
||||
logic zero_d, zero_s;
|
||||
|
||||
assign is_boxed = &in.s.box;
|
||||
|
||||
assign special.inf = single ? inf_s & is_boxed : inf_d;
|
||||
assign special.snan = single ? snan_s & is_boxed : snan_d;
|
||||
assign special.qnan = single ? qnan_s | ~is_boxed : qnan_d;
|
||||
assign special.zero = single ? zero_s & is_boxed : zero_d;
|
||||
assign hidden = single ? ~zero_s : hidden_double; //TODO: singles sharing subnormal range with doubles
|
||||
|
||||
fp_special_case_detection #(.FRAC_W(FRAC_WIDTH_F), .EXPO_W(EXPO_WIDTH_F), .SUBNORMAL(1)) input_case_s (
|
||||
.expo(in.s.expo),
|
||||
.frac(in.s.frac),
|
||||
.is_inf(inf_s),
|
||||
.is_SNaN(snan_s),
|
||||
.is_QNaN(qnan_s),
|
||||
.is_zero(zero_s),
|
||||
.hidden(hidden_single)
|
||||
);
|
||||
fp_special_case_detection #(.FRAC_W(FRAC_WIDTH), .EXPO_W(EXPO_WIDTH), .SUBNORMAL(1)) input_case_d (
|
||||
.expo(in.d.expo),
|
||||
.frac(in.d.frac),
|
||||
.is_inf(inf_d),
|
||||
.is_SNaN(snan_d),
|
||||
.is_QNaN(qnan_d),
|
||||
.is_zero(zero_d),
|
||||
.hidden(hidden_double)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Normalization
|
||||
//Done by shifting to set the implicit leading 1 (required by many execution units for subnormal numbers)
|
||||
//Does CLZ + shift in one cycle
|
||||
logic[EXPO_WIDTH_F-1:0] exponent_add;
|
||||
logic[FRAC_WIDTH-1:0] shift_arr;
|
||||
logic clz_hidden;
|
||||
logic[FRAC_WIDTH+1:0] clz_arr;
|
||||
logic[$clog2(FRAC_WIDTH+2)-1:0] clz_count;
|
||||
|
||||
//Set up the array for shifting
|
||||
always_comb begin
|
||||
if (single) begin
|
||||
clz_hidden = hidden_single;
|
||||
shift_arr = '0;
|
||||
shift_arr[FRAC_WIDTH-1 -: FRAC_WIDTH_F] = in.s.frac;
|
||||
end
|
||||
else begin
|
||||
clz_hidden = hidden_double;
|
||||
shift_arr = in.d.frac;
|
||||
end
|
||||
end
|
||||
|
||||
//Check leading zero to get shift count
|
||||
assign clz_arr = {clz_hidden, shift_arr, 1'b1}; //Pad to ensure the count is always accurate
|
||||
clz #(.WIDTH(FRAC_WIDTH+2)) frac_clz (
|
||||
.clz_input(clz_arr),
|
||||
.clz(clz_count),
|
||||
.zero()
|
||||
);
|
||||
|
||||
//Do the normalization shift
|
||||
always_comb begin
|
||||
prenormalize_frac = shift_arr << clz_count;
|
||||
prenormalize_shift = '0;
|
||||
if (~single)
|
||||
prenormalize_shift[$clog2(FRAC_WIDTH)-1:0] = clz_count[$clog2(FRAC_WIDTH)-1:0];
|
||||
exponent_add = '0;
|
||||
exponent_add[$clog2(FRAC_WIDTH+2)-1:0] = clz_count;
|
||||
end
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Single to Double
|
||||
//Scales exponent considering different ranges and shifting amounts
|
||||
//Uses normalized mantissa
|
||||
expo_d_t add_amt;
|
||||
expo_d_t bias_amt;
|
||||
expo_d_t expo_out;
|
||||
|
||||
//Input case -> Output
|
||||
//Not NaN boxed -> CNaN
|
||||
//sNaN -> CNaN
|
||||
//qNaN -> CNaN
|
||||
//+-0 -> +-0
|
||||
//+-infty -> +-infty
|
||||
//subnormal -> not subnormal (this depends on relative widths)
|
||||
//regular -> regular
|
||||
|
||||
//Sign
|
||||
assign double.d.sign = snan_s | qnan_s | ~is_boxed ? 1'b0 : in.s.sign;
|
||||
|
||||
//Exponent
|
||||
assign add_amt = hidden_single ? {{(EXPO_WIDTH-EXPO_WIDTH_F){1'b0}}, in.s.expo} : -{{(EXPO_WIDTH-EXPO_WIDTH_F){1'b0}}, exponent_add};
|
||||
|
||||
always_comb begin
|
||||
bias_amt = BIAS - BIAS_F;
|
||||
if (~hidden_single)
|
||||
bias_amt[0] = 1;
|
||||
end
|
||||
assign expo_out = bias_amt + add_amt;
|
||||
|
||||
always_comb begin
|
||||
if (inf_s | snan_s | qnan_s | ~is_boxed)
|
||||
double.d.expo = '1;
|
||||
else if (zero_s)
|
||||
double.d.expo = '0;
|
||||
else
|
||||
double.d.expo = expo_out;
|
||||
end
|
||||
|
||||
//Mantissa
|
||||
always_comb begin
|
||||
if (snan_s | qnan_s | ~is_boxed) //NaNs get canonicalized from s->d
|
||||
double.d.frac = {1'b1, {(FRAC_WIDTH-1){1'b0}}};
|
||||
else
|
||||
double.d.frac = prenormalize_frac;
|
||||
end
|
||||
|
||||
endmodule
|
51
core/execution_units/fp_unit/fp_special_case_detection.sv
Normal file
51
core/execution_units/fp_unit/fp_special_case_detection.sv
Normal file
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_special_case_detection
|
||||
|
||||
#(
|
||||
parameter FRAC_W = 52,
|
||||
parameter EXPO_W = 11,
|
||||
parameter SUBNORMAL = 1
|
||||
)(
|
||||
input logic[EXPO_W-1:0] expo,
|
||||
input logic[FRAC_W-1:0] frac,
|
||||
output logic is_inf,
|
||||
output logic is_SNaN,
|
||||
output logic is_QNaN,
|
||||
output logic is_zero,
|
||||
output logic hidden
|
||||
);
|
||||
|
||||
logic expo_all_1s;
|
||||
logic frac_lower_0s;
|
||||
assign expo_all_1s = &expo;
|
||||
assign frac_lower_0s = ~|frac[FRAC_W-2:0];
|
||||
|
||||
assign hidden = |expo;
|
||||
assign is_inf = expo_all_1s & ~frac[FRAC_W-1] & frac_lower_0s; //Fully 0
|
||||
assign is_SNaN = expo_all_1s & ~frac[FRAC_W-1] & ~frac_lower_0s; //Leading 0 but not fully 0
|
||||
assign is_QNaN = expo_all_1s & frac[FRAC_W-1]; //Leading 1
|
||||
assign is_zero = SUBNORMAL ? ~hidden & ~frac[FRAC_W-1] & frac_lower_0s : ~hidden; //Flush to 0 when not enabled
|
||||
|
||||
endmodule
|
193
core/execution_units/fp_unit/fp_sqrt.sv
Normal file
193
core/execution_units/fp_unit/fp_sqrt.sv
Normal file
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_sqrt
|
||||
|
||||
import cva5_config::*;
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input fp_sqrt_inputs_t args,
|
||||
unit_issue_interface.unit issue,
|
||||
fp_intermediate_wb_interface.unit wb
|
||||
);
|
||||
|
||||
//Hidden + GRS + 1 (because without the +1 it gave the wrong sticky bit in certain cases)
|
||||
unsigned_sqrt_interface #(.DATA_WIDTH(FRAC_WIDTH+5)) sqrt();
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Iterative square root core, bypassed on special cases
|
||||
logic busy;
|
||||
logic new_request_r;
|
||||
assign issue.ready = ~busy | wb.ack;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
busy <= 0;
|
||||
new_request_r <= 0;
|
||||
end
|
||||
else begin
|
||||
if (wb.ack)
|
||||
busy <= 0;
|
||||
if (issue.new_request)
|
||||
busy <= 1;
|
||||
new_request_r <= issue.new_request;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Special cases
|
||||
//Handle edge cases like negative numbers, infinity, NaN, zero, and powers of 2
|
||||
//Don't require mantissa calculation and bypass the core
|
||||
logic nv, nv_r;
|
||||
logic inf; //Default if not qnan_r or zero_r
|
||||
logic qnan, qnan_r;
|
||||
logic zero, zero_r;
|
||||
logic early_exit;
|
||||
logic result_sign;
|
||||
fp_t special_result;
|
||||
expo_d_t result_expo;
|
||||
|
||||
assign nv = (args.rs1.d.sign & ~args.special_case.qnan & ~args.special_case.zero) | args.special_case.snan;
|
||||
assign qnan = args.special_case.qnan | nv;
|
||||
assign zero = args.special_case.zero;
|
||||
assign inf = args.special_case.inf & ~args.rs1.d.sign;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
early_exit <= 0;
|
||||
else if (wb.ack)
|
||||
early_exit <= 0;
|
||||
else if (issue.new_request)
|
||||
early_exit <= inf | zero | qnan;
|
||||
|
||||
if (issue.new_request) begin
|
||||
result_sign <= args.rs1.d.sign;
|
||||
nv_r <= nv;
|
||||
qnan_r <= qnan;
|
||||
zero_r <= zero;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
if (qnan_r)
|
||||
special_result.raw = CANONICAL_NAN;
|
||||
else if (zero_r) begin
|
||||
special_result.d.sign = result_sign;
|
||||
special_result.d.expo = '0;
|
||||
special_result.d.frac = '0;
|
||||
end
|
||||
else begin //Inf
|
||||
special_result.d.sign = 0;
|
||||
special_result.d.expo = '1;
|
||||
special_result.d.frac = '0;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Exponent logic
|
||||
//Normalized for subnormal inputs
|
||||
//Halved for positive exponents and doubled for negative exponents
|
||||
logic[EXPO_WIDTH:0] norm_expo;
|
||||
logic[EXPO_WIDTH:0] norm_expo_r;
|
||||
logic[EXPO_WIDTH:0] unbiased_expo;
|
||||
assign norm_expo = args.rs1.d.expo + {{(EXPO_WIDTH-1){1'b0}}, ~args.rs1_hidden} - args.rs1_prenormalize_shift_amt;
|
||||
|
||||
assign unbiased_expo = norm_expo_r - {{(EXPO_WIDTH-1){1'b0}}, ~norm_expo_r[0]} - BIAS;
|
||||
//Right shift by 1 halves both positive and negative numbers
|
||||
assign result_expo = unbiased_expo[EXPO_WIDTH:1] + BIAS;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
norm_expo_r <= norm_expo;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Mantissa square root core
|
||||
//Designed to be swappable
|
||||
//Operates on normalized values shifted for alignment
|
||||
logic result_hidden;
|
||||
logic[3:0] result_grs;
|
||||
frac_d_t result_frac;
|
||||
assign sqrt.radicand = norm_expo[0] ? {2'b01, args.rs1.d.frac, 3'b0} : {1'b1, args.rs1.d.frac, 4'b0};
|
||||
assign sqrt.start = issue.new_request & ~(inf | zero | qnan);
|
||||
assign {result_hidden, result_frac, result_grs} = sqrt.result;
|
||||
|
||||
fp_sqrt_core sqrt_core (
|
||||
.sqrt(sqrt),
|
||||
.*);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output management
|
||||
//Either return the early execute values on cycle 1, or the regular values once the square root finishes
|
||||
logic sqrt_hold;
|
||||
assign wb.done = sqrt.done | sqrt_hold | early_exit;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
sqrt_hold <= 0;
|
||||
else
|
||||
sqrt_hold <= ~wb.ack & (sqrt.done | sqrt_hold);
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request) begin
|
||||
wb.id <= issue.id;
|
||||
wb.rm <= args.rm;
|
||||
wb.d2s <= args.single;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
if (new_request_r)
|
||||
wb.rd = special_result;
|
||||
else begin
|
||||
wb.rd.d.sign = 0;
|
||||
wb.rd.d.expo = result_expo;
|
||||
wb.rd.d.frac = result_frac;
|
||||
end
|
||||
wb.grs = '0;
|
||||
if (~new_request_r) begin
|
||||
wb.grs[GRS_WIDTH-1-:4] = result_grs;
|
||||
wb.grs[GRS_WIDTH-5-:FRAC_WIDTH+5] = sqrt.remainder;
|
||||
end
|
||||
end
|
||||
assign wb.expo_overflow = 0;
|
||||
assign wb.fflags.nv = nv_r;
|
||||
assign wb.fflags.dz = 0;
|
||||
assign wb.fflags.of = 0;
|
||||
assign wb.fflags.uf = 0;
|
||||
assign wb.fflags.nx = 0; //Set in writeback
|
||||
assign wb.carry = 0;
|
||||
assign wb.safe = 0;
|
||||
assign wb.subnormal = 0;
|
||||
assign wb.hidden = (new_request_r & ~zero_r) | (~new_request_r & result_hidden);
|
||||
assign wb.clz = '0;
|
||||
assign wb.right_shift = 0;
|
||||
assign wb.right_shift_amt = 'x;
|
||||
assign wb.ignore_max_expo = 1;
|
||||
|
||||
endmodule
|
109
core/execution_units/fp_unit/fp_sqrt_core.sv
Normal file
109
core/execution_units/fp_unit/fp_sqrt_core.sv
Normal file
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_sqrt_core
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
unsigned_sqrt_interface.sqrt sqrt
|
||||
);
|
||||
|
||||
typedef logic[$clog2(sqrt.DATA_WIDTH)-1:0] counter_t;
|
||||
typedef logic[sqrt.DATA_WIDTH-1:0] frac_t;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Radix 2 square root
|
||||
//Fixed latency generating one result bit per cycle
|
||||
|
||||
//Control logic
|
||||
logic counter_full;
|
||||
counter_t counter;
|
||||
assign counter_full = counter == counter_t'(sqrt.DATA_WIDTH);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
counter <= '0;
|
||||
sqrt.done <= 0;
|
||||
end
|
||||
else begin
|
||||
sqrt.done <= counter_full;
|
||||
if (counter_full)
|
||||
counter <= '0;
|
||||
else if (sqrt.start | |counter)
|
||||
counter <= counter + 1;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Subtraction
|
||||
frac_t rad;
|
||||
frac_t current_subtractend;
|
||||
frac_t next_subtractend;
|
||||
frac_t subtractor;
|
||||
frac_t subtraction;
|
||||
logic overflow;
|
||||
|
||||
assign subtractor = {sqrt.result[sqrt.DATA_WIDTH-3:0], 2'b01};
|
||||
assign {overflow, subtraction} = current_subtractend - subtractor;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Next Working subtractend Determination
|
||||
always_comb begin
|
||||
if (overflow)
|
||||
next_subtractend = {current_subtractend[sqrt.DATA_WIDTH-3:0], rad[sqrt.DATA_WIDTH-1-:2]};
|
||||
else
|
||||
next_subtractend = {subtraction[sqrt.DATA_WIDTH-3:0], rad[sqrt.DATA_WIDTH-1-:2]};
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (sqrt.start) //First working subtractend extracts the upper 2 bits of the radicand
|
||||
current_subtractend <= {{(sqrt.DATA_WIDTH-2){1'b0}}, sqrt.radicand[sqrt.DATA_WIDTH-1-:2]};
|
||||
else
|
||||
current_subtractend <= next_subtractend;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Update remaining radicand digits
|
||||
always_ff @(posedge clk) begin
|
||||
if (sqrt.start) //The upper two bits are pushed to the working subtractend register
|
||||
rad <= {sqrt.radicand[sqrt.DATA_WIDTH-3:0], 2'b00};
|
||||
else
|
||||
rad <= rad << 2;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Quotient Determination
|
||||
always_ff @(posedge clk) begin
|
||||
if (sqrt.start) begin
|
||||
sqrt.result <= '0;
|
||||
sqrt.remainder <= '0;
|
||||
end
|
||||
else if (|counter) begin
|
||||
//Shift in new quotient bit
|
||||
sqrt.result <= {sqrt.result[sqrt.DATA_WIDTH-2:0], ~overflow};
|
||||
sqrt.remainder <= next_subtractend;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
70
core/execution_units/fp_unit/fp_sticky_tracking.sv
Normal file
70
core/execution_units/fp_unit/fp_sticky_tracking.sv
Normal file
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_sticky_tracking
|
||||
|
||||
#(
|
||||
parameter INPUT_WIDTH = 24,
|
||||
parameter SHIFT_WIDTH = 11
|
||||
)(
|
||||
input logic[INPUT_WIDTH-1:0] shifter_input,
|
||||
input logic[SHIFT_WIDTH-1:0] shift_amount,
|
||||
output logic sticky_bit
|
||||
);
|
||||
|
||||
//This unit returns a single bit which indicates whether a 1 got right shifted out of the input
|
||||
|
||||
//ORs all shifted
|
||||
function logic shift_reduce(input logic[3:0] a, input logic[1:0] sel, input logic fully_shifted);
|
||||
case({fully_shifted, sel})
|
||||
0 : shift_reduce = a[0];
|
||||
1 : shift_reduce = |a[1:0];
|
||||
2 : shift_reduce = |a[2:0];
|
||||
default : shift_reduce = |a;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
localparam PADDED_WIDTH = 2**SHIFT_WIDTH;
|
||||
localparam NUM_TIERS = (SHIFT_WIDTH+1)/2; //log4 - each level reduces width by a factor of 4
|
||||
logic[PADDED_WIDTH-1:0] tier[NUM_TIERS];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
int tier_width;
|
||||
int curr_shift_amount;
|
||||
always_comb begin
|
||||
tier = '{default: '0};
|
||||
//Pad with 0s to ensure that shift amounts larger than INPUT_WIDTH generate the correct sticky
|
||||
tier[0] = {{(PADDED_WIDTH-INPUT_WIDTH){1'b0}}, shifter_input};
|
||||
|
||||
tier_width = PADDED_WIDTH/4;
|
||||
for (int i = 1; i < NUM_TIERS; i++) begin
|
||||
curr_shift_amount = 32'(shift_amount) >> 2*i;
|
||||
for (int j = 0; j < tier_width; j++)
|
||||
tier[i][j] = shift_reduce(tier[i-1][j*4 +: 4], shift_amount[(i-1)*2 +: 2], j < curr_shift_amount);
|
||||
tier_width = tier_width/4;
|
||||
end
|
||||
|
||||
sticky_bit = shift_reduce(tier[NUM_TIERS-1][3:0], shift_amount[$clog2(PADDED_WIDTH)-1 -: 2], 1'b0);
|
||||
end
|
||||
endmodule
|
229
core/execution_units/fp_unit/fp_wb2fp_misc.sv
Normal file
229
core/execution_units/fp_unit/fp_wb2fp_misc.sv
Normal file
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_wb2fp_misc
|
||||
|
||||
import fpu_types::*;
|
||||
import cva5_config::*;
|
||||
|
||||
(
|
||||
input fp_wb2fp_misc_inputs_t args,
|
||||
unit_issue_interface.unit issue,
|
||||
fp_intermediate_wb_interface.unit wb
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Sign injections, min/max, s2d, d2s, i2f, and moves
|
||||
//Single cycle, sharing a writeback port
|
||||
assign issue.ready = wb.ack; //ACK functions as READY here
|
||||
assign wb.id = issue.id;
|
||||
assign wb.done = issue.new_request;
|
||||
assign wb.rm = args.rm; //Only used for i2f
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FMV
|
||||
//Transfers bits unchanged from an INT register to an FP register, boxing them
|
||||
//In reduced precision, transfers the lower bits
|
||||
fp_t fmv_rd;
|
||||
assign fmv_rd.s.box = '1;
|
||||
assign {fmv_rd.s.sign, fmv_rd.s.expo, fmv_rd.s.frac} = args.int_rs[FLEN_F-1:0];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FS2D
|
||||
//The actual conversion is done in preprocessing
|
||||
//Can only raise on SNAN
|
||||
fp_t s2d_rd;
|
||||
assign s2d_rd = args.rs1;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FD2S
|
||||
//The actual conversion is done in postprocessing
|
||||
//Canonicalizes NaNs and can also raise on SNAN
|
||||
fp_t d2s_rd;
|
||||
assign d2s_rd.raw = args.rs1_special_case.snan | args.rs1_special_case.qnan ? CANONICAL_NAN : args.rs1.raw;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FSGN
|
||||
//Modifies the sign of the first operand
|
||||
//Does NOT canonicalize NaNs and does not raise any flags
|
||||
logic sgn_sign;
|
||||
logic rs1_sign;
|
||||
logic rs2_sign;
|
||||
fp_t sgn_rd;
|
||||
|
||||
assign rs1_sign = args.fsgnj_single ? args.rs1_boxed & args.rs1.s.sign : args.rs1.d.sign;
|
||||
assign rs2_sign = args.fsgnj_single ? args.rs2_boxed & args.rs2.s.sign : args.rs2.d.sign;
|
||||
|
||||
always_comb begin
|
||||
if (args.rm[1]) //JX
|
||||
sgn_sign = rs1_sign ^ rs2_sign;
|
||||
else if (args.rm[0]) //JN
|
||||
sgn_sign = ~rs2_sign;
|
||||
else //J
|
||||
sgn_sign = rs2_sign;
|
||||
|
||||
if (args.fsgnj_single) begin
|
||||
sgn_rd.s.box = '1;
|
||||
sgn_rd.s.sign = sgn_sign;
|
||||
//If rs1 is unboxed it is treated as the canonical NaN
|
||||
if (args.rs1_boxed)
|
||||
sgn_rd.raw[FLEN_F-2:0] = args.rs1.raw[FLEN_F-2:0];
|
||||
else
|
||||
sgn_rd.raw[FLEN_F-2:0] = {{EXPO_WIDTH_F{1'b1}}, 1'b1, {FRAC_WIDTH_F-1{1'b0}}};
|
||||
end
|
||||
else begin
|
||||
sgn_rd.d.sign = sgn_sign;
|
||||
sgn_rd.d.expo = args.rs1.d.expo;
|
||||
sgn_rd.d.frac = args.rs1.d.frac;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FMIN/FMAX
|
||||
//Returns the larger/smaller argument
|
||||
//Canonicalizes NaNs and can raise invalid
|
||||
fp_t fminmax_rd;
|
||||
logic fminmax_hidden;
|
||||
logic rs1_nan;
|
||||
logic rs2_nan;
|
||||
|
||||
assign rs1_nan = args.rs1_special_case.qnan | args.rs1_special_case.snan;
|
||||
assign rs2_nan = args.rs2_special_case.qnan | args.rs2_special_case.snan;
|
||||
|
||||
//args.rm[0] = MAX, args.swap means rs2 > rs1
|
||||
always_comb begin
|
||||
case({rs1_nan, rs2_nan, args.rs1.d.sign, args.rs2.d.sign, args.rm[0], args.swap}) inside
|
||||
6'b11????: begin
|
||||
fminmax_rd = CANONICAL_NAN;
|
||||
fminmax_hidden = 1;
|
||||
end
|
||||
6'b01????,
|
||||
6'b00100?,
|
||||
6'b00011?,
|
||||
6'b000010,
|
||||
6'b000001,
|
||||
6'b001111,
|
||||
6'b001100: begin
|
||||
fminmax_rd = args.rs1;
|
||||
fminmax_hidden = ~args.rs1_special_case.zero;
|
||||
end
|
||||
default: begin
|
||||
fminmax_rd = args.rs2;
|
||||
fminmax_hidden = ~args.rs2_special_case.zero;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//I2F
|
||||
//Converts an integer to a FP number
|
||||
//The actual shifting is done in postprocessing
|
||||
fp_t i2f_rd;
|
||||
grs_t i2f_grs;
|
||||
logic[4:0] int_clz;
|
||||
fp_shift_amt_t i2f_clz;
|
||||
logic int_rs1_zero;
|
||||
|
||||
clz #(.WIDTH(32)) clz_inst (
|
||||
.clz_input(args.int_rs_abs),
|
||||
.clz(int_clz),
|
||||
.zero(int_rs1_zero)
|
||||
);
|
||||
|
||||
assign i2f_rd.d.sign = args.i2f_sign;
|
||||
always_comb begin
|
||||
if (int_rs1_zero) begin
|
||||
i2f_clz = '0;
|
||||
i2f_rd.d.expo = '0;
|
||||
end
|
||||
else begin
|
||||
i2f_clz = '0;
|
||||
i2f_clz[5:0] = int_clz + 1;
|
||||
i2f_rd.d.expo = BIAS+32;
|
||||
end
|
||||
end
|
||||
|
||||
//When the mantissa shrinks sufficiently, the integer can no longer fit in the mantissa so it spills into the grs bits
|
||||
generate if (FRAC_WIDTH >= 32) begin : gen_int_fits
|
||||
always_comb begin
|
||||
i2f_grs = '0;
|
||||
i2f_rd.d.frac = '0;
|
||||
i2f_rd.d.frac[FRAC_WIDTH-1-:32] = args.int_rs_abs;
|
||||
end
|
||||
end else begin : gen_int_in_grs
|
||||
always_comb begin
|
||||
i2f_rd.d.frac[FRAC_WIDTH-1:0] = args.int_rs_abs[31-:FRAC_WIDTH];
|
||||
i2f_grs = '0;
|
||||
i2f_grs[GRS_WIDTH-1-:32-FRAC_WIDTH] = args.int_rs_abs[31-FRAC_WIDTH:0];
|
||||
end
|
||||
end endgenerate
|
||||
|
||||
|
||||
//Multiplex outputs of different units
|
||||
always_comb begin
|
||||
wb.expo_overflow = 0;
|
||||
wb.fflags = '0;
|
||||
wb.carry = 0;
|
||||
wb.safe = 0;
|
||||
wb.hidden = 0;
|
||||
wb.grs = '0;
|
||||
wb.clz = '0;
|
||||
wb.right_shift = 0;
|
||||
wb.right_shift_amt = 'x;
|
||||
wb.subnormal = 0;
|
||||
wb.ignore_max_expo = 1;
|
||||
wb.d2s = 0;
|
||||
|
||||
if (args.fmv)
|
||||
wb.rd = fmv_rd;
|
||||
else if (args.d2s) begin
|
||||
wb.rd = d2s_rd;
|
||||
wb.hidden = args.rs1_hidden;
|
||||
wb.d2s = 1;
|
||||
wb.fflags.nv = args.rs1_special_case.snan;
|
||||
end
|
||||
else if (args.fsgnj) begin
|
||||
wb.hidden = 1;
|
||||
wb.rd = sgn_rd;
|
||||
end
|
||||
else if (args.fminmax) begin
|
||||
wb.rd = fminmax_rd;
|
||||
wb.hidden = fminmax_hidden;
|
||||
wb.fflags.nv = args.rs1_special_case.snan | args.rs2_special_case.snan;
|
||||
wb.d2s = args.single;
|
||||
end
|
||||
else if (args.i2f) begin
|
||||
wb.rd = i2f_rd;
|
||||
wb.grs = i2f_grs;
|
||||
wb.clz = i2f_clz;
|
||||
wb.d2s = args.single;
|
||||
end
|
||||
else begin
|
||||
wb.rd = s2d_rd;
|
||||
wb.hidden = args.rs1_hidden;
|
||||
wb.fflags.nv = args.rs1_special_case.snan;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
281
core/execution_units/fp_unit/fp_wb2int_misc.sv
Normal file
281
core/execution_units/fp_unit/fp_wb2int_misc.sv
Normal file
|
@ -0,0 +1,281 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fp_wb2int_misc
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input fp_wb2int_misc_inputs_t args,
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit wb,
|
||||
output fflags_t fflags
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Comparisons, classifications, conversions to integer, and moves sharing a single writeback port
|
||||
//Implemented as a 2 cycle pipeline (though only the conversion needs the second cycle)
|
||||
logic advance;
|
||||
assign advance = wb.ack | ~wb.done;
|
||||
assign issue.ready = advance;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
wb.done <= 0;
|
||||
else begin
|
||||
if (issue.new_request)
|
||||
wb.done <= 1;
|
||||
else if (wb.ack)
|
||||
wb.done <= 0;
|
||||
end
|
||||
|
||||
if (issue.new_request)
|
||||
wb.id <= issue.id;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FMV
|
||||
//Transfers bits unchanged from an FP register to an INT register
|
||||
//This instruction is meant to transfer single precision numbers, so in reduced precision only the single precision bits are used
|
||||
logic[31:0] fmv_rd;
|
||||
always_comb begin
|
||||
fmv_rd = '0;
|
||||
fmv_rd[FLEN_F-1:0] = args.rs1.raw[FLEN_F-1:0];
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FCLASS
|
||||
//Outputs a number indicating the type of the operand
|
||||
//Encoded one hot
|
||||
logic[31:0] fclass_rd;
|
||||
always_comb begin
|
||||
fclass_rd = '0;
|
||||
fclass_rd[0] = args.rs1.d.sign & args.rs1_special_case.inf;
|
||||
fclass_rd[1] = args.rs1.d.sign & args.rs1_original_hidden_bit & ~|args.rs1_special_case;
|
||||
fclass_rd[2] = args.rs1.d.sign & ~args.rs1_original_hidden_bit & ~args.rs1_special_case.zero;
|
||||
fclass_rd[3] = args.rs1.d.sign & args.rs1_special_case.zero;
|
||||
fclass_rd[4] = ~args.rs1.d.sign & args.rs1_special_case.zero;
|
||||
fclass_rd[5] = ~args.rs1.d.sign & ~args.rs1_original_hidden_bit & ~args.rs1_special_case.zero;
|
||||
fclass_rd[6] = ~args.rs1.d.sign & args.rs1_original_hidden_bit & ~|args.rs1_special_case;
|
||||
fclass_rd[7] = ~args.rs1.d.sign & args.rs1_special_case.inf;
|
||||
fclass_rd[8] = args.rs1_special_case.snan;
|
||||
fclass_rd[9] = args.rs1_special_case.qnan;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FCMP
|
||||
//Implements equal, less than, and less than or equal
|
||||
//For these instructions, +-0 are identical and flags can be raised for NaN operands
|
||||
logic[31:0] fcmp_rd;
|
||||
logic invalid_cmp;
|
||||
logic unordered;
|
||||
logic sign_eq;
|
||||
logic expo_eq;
|
||||
logic frac_eq;
|
||||
logic feq;
|
||||
logic flt;
|
||||
logic fle;
|
||||
|
||||
//FLT/FLE are signalling (raise on NaN)
|
||||
assign invalid_cmp = (~args.rm[1] & (args.rs1_special_case.qnan | args.rs2_special_case.qnan)) | args.rs1_special_case.snan | args.rs2_special_case.snan;
|
||||
assign unordered = args.rs1_special_case.qnan | args.rs1_special_case.snan | args.rs2_special_case.qnan | args.rs2_special_case.snan;
|
||||
|
||||
assign sign_eq = args.rs1.d.sign == args.rs2.d.sign;
|
||||
assign expo_eq = args.rs1.d.expo == args.rs2.d.expo;
|
||||
assign frac_eq = args.rs1.d.frac == args.rs2.d.frac;
|
||||
|
||||
assign feq = (args.rs1_special_case.zero & args.rs2_special_case.zero) | (sign_eq & expo_eq & frac_eq);
|
||||
assign flt = sign_eq ? (args.swap ^ args.rs1.d.sign) & ~(sign_eq & expo_eq & frac_eq) : args.rs1.d.sign & ~(args.rs1_special_case.zero & args.rs2_special_case.zero);
|
||||
assign fle = flt | feq;
|
||||
|
||||
always_comb begin
|
||||
fcmp_rd = '0;
|
||||
if (args.rm[1])
|
||||
fcmp_rd[0] = feq & ~unordered;
|
||||
else if (args.rm[0])
|
||||
fcmp_rd[0] = flt & ~unordered;
|
||||
else
|
||||
fcmp_rd[0] = fle & ~unordered;
|
||||
end
|
||||
|
||||
|
||||
//Choose between the three single cycle operations
|
||||
logic[31:0] single_rd;
|
||||
logic single_valid;
|
||||
logic single_invalid;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request) begin
|
||||
single_valid <= ~args.f2i;
|
||||
single_invalid <= args.fcmp & invalid_cmp;
|
||||
if (args.fcmp)
|
||||
single_rd <= fcmp_rd;
|
||||
else if (args.fclass)
|
||||
single_rd <= fclass_rd;
|
||||
else
|
||||
single_rd <= fmv_rd;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//F2I
|
||||
//First cycle detects edge cases and shifts args
|
||||
//Second cycle rounds
|
||||
logic[2:0] grs;
|
||||
logic[FRAC_WIDTH:0] rs1_frac;
|
||||
logic[FRAC_WIDTH:0] f2i_frac;
|
||||
logic[31:0] f2i_int;
|
||||
logic[32+FRAC_WIDTH-1:0] shift_in;
|
||||
logic[32+FRAC_WIDTH-1:0] f2i_int_dot_frac;
|
||||
logic rs1_expo_unbiased_greater_than_31;
|
||||
logic rs1_expo_unbiased_greater_than_30;
|
||||
logic subtract;
|
||||
logic roundup;
|
||||
|
||||
//Cycle 1 - calculate roundup and detect special and edge cases
|
||||
assign rs1_expo_unbiased_greater_than_31 = args.rs1.d.expo > (BIAS+31);
|
||||
assign rs1_expo_unbiased_greater_than_30 = args.rs1.d.expo > (BIAS+30);
|
||||
assign rs1_frac = {args.rs1_hidden, args.rs1.d.frac};
|
||||
assign shift_in = {{31{1'b0}}, rs1_frac};
|
||||
|
||||
//Left shift according to exponent
|
||||
assign f2i_int_dot_frac = shift_in << args.rs1_expo_unbiased;
|
||||
always_comb begin
|
||||
if (args.int_less_than_1) begin
|
||||
f2i_int = '0;
|
||||
f2i_frac = rs1_frac;
|
||||
end else
|
||||
{f2i_int, f2i_frac} = {f2i_int_dot_frac, 1'b0};
|
||||
end
|
||||
|
||||
//Calculate rounding bits and -roundup or +roundup
|
||||
logic sticky;
|
||||
assign sticky = |f2i_frac[FRAC_WIDTH-2:0];
|
||||
always_comb begin
|
||||
if (args.int_less_than_1) begin
|
||||
if (args.rs1.d.expo == expo_d_t'(BIAS-1))
|
||||
grs = {f2i_frac[FRAC_WIDTH-:2], sticky};
|
||||
else if (args.rs1.d.expo == expo_d_t'(BIAS-2))
|
||||
grs = {1'b0, f2i_frac[FRAC_WIDTH], f2i_frac[FRAC_WIDTH-1] | sticky};
|
||||
else
|
||||
grs = {2'b0, (|f2i_frac[FRAC_WIDTH-:2] | sticky)};
|
||||
end else
|
||||
grs = {f2i_frac[FRAC_WIDTH-:2], sticky};
|
||||
end
|
||||
|
||||
fp_roundup f2i_int_roundup (
|
||||
.sign(args.rs1.d.sign),
|
||||
.rm(args.rm),
|
||||
.grs(grs),
|
||||
.lsb(f2i_int[0]),
|
||||
.roundup(roundup),
|
||||
.result_if_overflow()
|
||||
);
|
||||
|
||||
assign subtract = args.rs1.d.sign & args.is_signed;
|
||||
|
||||
//Special case handling - this is sometimes the critical path in the FPU
|
||||
//This special case detection can be done in the second cycle, which may make that a new critical path
|
||||
//However, calculating the roundup takes approximately the same amount of time as these special cases
|
||||
logic inexact;
|
||||
logic all_frac;
|
||||
logic greater_than_largest_unsigned_int;
|
||||
logic smaller_than_smallest_unsigned_int;
|
||||
logic greater_than_largest_signed_int;
|
||||
logic smaller_than_smallest_signed_int;
|
||||
logic special;
|
||||
assign inexact = |grs;
|
||||
assign all_frac = &f2i_int[30:0];
|
||||
|
||||
assign greater_than_largest_unsigned_int = ~args.is_signed & (~args.rs1.d.sign | args.rs1_special_case.snan | args.rs1_special_case.qnan) & ((f2i_int[31] & all_frac & roundup) | rs1_expo_unbiased_greater_than_31);
|
||||
assign smaller_than_smallest_unsigned_int = ~args.is_signed & args.rs1.d.sign & ~args.rs1_special_case.zero & ~(args.int_less_than_1 & ~roundup);
|
||||
assign greater_than_largest_signed_int = args.is_signed & ((args.rs1_special_case.snan | args.rs1_special_case.qnan | ~args.rs1.d.sign) & ((~f2i_int[31] & all_frac & roundup) | rs1_expo_unbiased_greater_than_30));
|
||||
assign smaller_than_smallest_signed_int = args.is_signed & args.rs1.d.sign & ((f2i_int[31] & (|f2i_int[30:0] | roundup)) | rs1_expo_unbiased_greater_than_31);
|
||||
assign special = (~args.is_signed & (greater_than_largest_unsigned_int | smaller_than_smallest_unsigned_int)) | (args.is_signed & (greater_than_largest_signed_int | smaller_than_smallest_signed_int));
|
||||
|
||||
|
||||
//Cycle 2 - do the rounding and override special cases
|
||||
//Input negative -> -roundup - f2i_int
|
||||
//Input positive -> roundup + f2i_int
|
||||
logic r_greater_than_largest_unsigned_int;
|
||||
logic r_greater_than_largest_signed_int;
|
||||
logic r_smaller_than_smallest_signed_int;
|
||||
logic r_inexact;
|
||||
logic r_special;
|
||||
logic r_subtract;
|
||||
logic r_roundup;
|
||||
logic[31:0] r_f2i_int;
|
||||
logic[31:0] in1;
|
||||
logic[31:0] in2;
|
||||
logic[31:0] f2i_int_rounded;
|
||||
logic[31:0] special_case_result;
|
||||
logic carry_in;
|
||||
assign in1 = r_subtract ? -(32'(r_roundup)) : 32'(r_roundup);
|
||||
assign in2 = r_f2i_int ^ {32{r_subtract}};
|
||||
assign {f2i_int_rounded, carry_in} = {in1, 1'b1} + {in2, r_subtract};
|
||||
|
||||
always_comb begin
|
||||
if (r_greater_than_largest_unsigned_int)
|
||||
special_case_result = 32'hffffffff; //2^32 - 1;
|
||||
else if (r_greater_than_largest_signed_int)
|
||||
special_case_result = 32'h7fffffff; //2^31 - 1;
|
||||
else if (r_smaller_than_smallest_signed_int)
|
||||
special_case_result = 32'h80000000; //-2^31;
|
||||
else
|
||||
special_case_result = 0;
|
||||
end
|
||||
|
||||
//F2I pipeline
|
||||
always_ff @ (posedge clk) begin
|
||||
if (issue.new_request) begin
|
||||
r_greater_than_largest_unsigned_int <= greater_than_largest_unsigned_int;
|
||||
r_greater_than_largest_signed_int <= greater_than_largest_signed_int;
|
||||
r_smaller_than_smallest_signed_int <= smaller_than_smallest_signed_int;
|
||||
r_inexact <= inexact;
|
||||
r_special <= special;
|
||||
r_f2i_int <= f2i_int;
|
||||
r_subtract <= subtract;
|
||||
r_roundup <= roundup;
|
||||
end
|
||||
end
|
||||
|
||||
//Multiplex the outputs from f2i and the single cycle units
|
||||
always_comb begin
|
||||
fflags = '0;
|
||||
if (single_valid) begin
|
||||
wb.rd = single_rd;
|
||||
fflags.nv = single_invalid;
|
||||
end
|
||||
else begin
|
||||
wb.rd = r_special ? special_case_result : f2i_int_rounded;
|
||||
fflags.nv = r_special;
|
||||
fflags.nx = r_inexact & ~r_special;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
277
core/execution_units/fp_unit/fpu_top.sv
Normal file
277
core/execution_units/fp_unit/fpu_top.sv
Normal file
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module fpu_top
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
#(
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic[REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic[2:0] fp_uses_rs,
|
||||
output logic uses_rd,
|
||||
output logic fp_uses_rd,
|
||||
|
||||
input logic issue_stage_ready,
|
||||
input logic[2:0] dyn_rm,
|
||||
input logic[31:0] int_rf[REGFILE_READ_PORTS],
|
||||
input logic[FLEN-1:0] fp_rf[3],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit int_wb,
|
||||
unit_writeback_interface.unit fp_wb,
|
||||
output fflags_t fflags
|
||||
);
|
||||
|
||||
fp_madd_inputs_t madd_inputs;
|
||||
fp_div_inputs_t div_inputs;
|
||||
fp_sqrt_inputs_t sqrt_inputs;
|
||||
fp_wb2fp_misc_inputs_t wb2fp_inputs;
|
||||
fp_wb2int_misc_inputs_t wb2int_inputs;
|
||||
fflags_t int_fflags;
|
||||
fflags_t fp_fflags;
|
||||
unit_issue_interface intermediate_issue[4:0](); //FMA, FDIV, FSQRT, WB2FP, WB2INT
|
||||
fp_intermediate_wb_interface intermediate_unit_wb[3:0](); //FMADD, FMUL, FDIV/FSQRT, WB2FP
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//This unit instantiates the internal FPU components and connects them
|
||||
//It is also responsible for instruction decoding
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {
|
||||
SP_FCVT_S_W, SP_FCVT_S_WU, SP_FMV_W_X,
|
||||
DP_FCVT_D_W, DP_FCVT_D_WU
|
||||
};
|
||||
|
||||
fp_uses_rs = '0;
|
||||
fp_uses_rs[RS1] = decode_stage.instruction inside {
|
||||
SP_FMADD, SP_FMSUB, SP_FNMSUB, SP_FNMADD, SP_FADD, SP_FSUB, SP_FMUL,
|
||||
SP_FDIV, SP_FSQRT, SP_FSGNJ, SP_FSGNJN, SP_FSGNJX, SP_FMIN, SP_FMAX,
|
||||
SP_FCVT_W_S, SP_FCVT_WU_S, SP_FMV_X_W, SP_FEQ, SP_FLT, SP_FLE, SP_FCLASS,
|
||||
DP_FMADD, DP_FMSUB, DP_FNMSUB, DP_FNMADD, DP_FADD, DP_FSUB, DP_FMUL,
|
||||
DP_FDIV, DP_FSQRT, DP_FSGNJ, DP_FSGNJN, DP_FSGNJX, DP_FMIN, DP_FMAX,
|
||||
DP_FCVT_S_D, DP_FCVT_D_S, DP_FEQ, DP_FLT, DP_FLE, DP_FCLASS, DP_FCVT_W_D, DP_FCVT_WU_D
|
||||
};
|
||||
fp_uses_rs[RS2] = decode_stage.instruction inside {
|
||||
SP_FMADD, SP_FMSUB, SP_FNMSUB, SP_FNMADD, SP_FADD, SP_FSUB, SP_FMUL,
|
||||
SP_FDIV, SP_FSQRT, SP_FSGNJ, SP_FSGNJN, SP_FSGNJX, SP_FMIN, SP_FMAX,
|
||||
SP_FEQ, SP_FLT, SP_FLE,
|
||||
DP_FMADD, DP_FMSUB, DP_FNMSUB, DP_FNMADD, DP_FADD, DP_FSUB, DP_FMUL,
|
||||
DP_FDIV, DP_FSQRT, DP_FSGNJ, DP_FSGNJN, DP_FSGNJX, DP_FMIN, DP_FMAX,
|
||||
DP_FEQ, DP_FLT, DP_FLE
|
||||
};
|
||||
fp_uses_rs[RS3] = decode_stage.instruction inside {
|
||||
SP_FMADD, SP_FMSUB, SP_FNMSUB, SP_FNMADD,
|
||||
DP_FMADD, DP_FMSUB, DP_FNMSUB, DP_FNMADD
|
||||
};
|
||||
|
||||
uses_rd = decode_stage.instruction inside {
|
||||
SP_FCVT_W_S, SP_FCVT_WU_S, SP_FMV_X_W, SP_FEQ, SP_FLT, SP_FLE, SP_FCLASS,
|
||||
DP_FEQ, DP_FLT, DP_FLE, DP_FCLASS, DP_FCVT_W_D, DP_FCVT_WU_D
|
||||
};
|
||||
fp_uses_rd = decode_stage.instruction inside {
|
||||
SP_FMADD, SP_FMSUB, SP_FNMSUB, SP_FNMADD, SP_FADD, SP_FSUB, SP_FMUL,
|
||||
SP_FDIV, SP_FSQRT, SP_FSGNJ, SP_FSGNJN, SP_FSGNJX, SP_FMIN, SP_FMAX,
|
||||
SP_FCVT_S_W, SP_FCVT_S_WU, SP_FMV_W_X,
|
||||
DP_FMADD, DP_FMSUB, DP_FNMSUB, DP_FNMADD, DP_FADD, DP_FSUB, DP_FMUL,
|
||||
DP_FDIV, DP_FSQRT, DP_FSGNJ, DP_FSGNJN, DP_FSGNJX, DP_FMIN, DP_FMAX,
|
||||
DP_FCVT_S_D, DP_FCVT_D_S, DP_FCVT_D_W, DP_FCVT_D_WU
|
||||
};
|
||||
|
||||
unit_needed = decode_stage.instruction inside {
|
||||
SP_FMADD, SP_FMSUB, SP_FNMSUB, SP_FNMADD, SP_FADD, SP_FSUB, SP_FMUL,
|
||||
DP_FMADD, DP_FMSUB, DP_FNMSUB, DP_FNMADD, DP_FADD, DP_FSUB, DP_FMUL,
|
||||
SP_FDIV, SP_FSQRT,
|
||||
DP_FDIV, DP_FSQRT,
|
||||
SP_FSGNJ, SP_FSGNJN, SP_FSGNJX, SP_FMIN, SP_FMAX, SP_FCVT_S_W, SP_FCVT_S_WU, SP_FMV_W_X,
|
||||
DP_FSGNJ, DP_FSGNJN, DP_FSGNJX, DP_FMIN, DP_FMAX, DP_FCVT_S_D, DP_FCVT_D_S, DP_FCVT_D_W, DP_FCVT_D_WU,
|
||||
SP_FCVT_W_S, SP_FCVT_WU_S, SP_FMV_X_W, SP_FEQ, SP_FLT, SP_FLE, SP_FCLASS,
|
||||
DP_FEQ, DP_FLT, DP_FLE, DP_FCLASS, DP_FCVT_W_D, DP_FCVT_WU_D
|
||||
};
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Shared preprocessing
|
||||
logic is_single;
|
||||
//Instruction families
|
||||
logic is_fma;
|
||||
logic is_fmul;
|
||||
logic is_fadd;
|
||||
logic is_div;
|
||||
logic is_sqrt;
|
||||
logic is_i2f;
|
||||
logic is_mv_i2f;
|
||||
logic is_s2d;
|
||||
logic is_d2s;
|
||||
logic is_minmax;
|
||||
logic is_sign_inj;
|
||||
logic is_f2i;
|
||||
logic is_mv_f2i;
|
||||
logic is_fcmp;
|
||||
logic is_class;
|
||||
//Used to distinguish between instructions in a family
|
||||
logic add;
|
||||
logic neg_mul;
|
||||
logic conv_signed;
|
||||
logic is_sign_inj_single;
|
||||
rm_t rm_r;
|
||||
|
||||
fp_preprocessing_packet_t pkt;
|
||||
assign pkt.valid = issue.new_request;
|
||||
assign pkt.unit[0] = is_fma | is_fmul | is_fadd;
|
||||
assign pkt.unit[1] = is_div;
|
||||
assign pkt.unit[2] = is_sqrt;
|
||||
assign pkt.unit[3] = is_i2f | is_mv_i2f | is_minmax | is_sign_inj | is_s2d | is_d2s;
|
||||
assign pkt.unit[4] = is_f2i | is_mv_f2i | is_fcmp | is_class;
|
||||
assign pkt.rs1 = fp_rf[RS1];
|
||||
assign pkt.rs2 = fp_rf[RS2];
|
||||
assign pkt.rs3 = fp_rf[RS3];
|
||||
assign pkt.int_rs1 = int_rf[RS1];
|
||||
assign pkt.id = issue.id;
|
||||
assign pkt.is_single = is_single;
|
||||
assign pkt.is_fma = is_fma;
|
||||
assign pkt.is_fadd = is_fadd;
|
||||
assign pkt.is_i2f = is_i2f;
|
||||
assign pkt.is_d2s = is_d2s;
|
||||
assign pkt.is_minmax = is_minmax;
|
||||
assign pkt.is_sign_inj = is_sign_inj;
|
||||
assign pkt.is_sign_inj_single = is_sign_inj_single;
|
||||
assign pkt.is_f2i = is_f2i;
|
||||
assign pkt.is_mv_i2f = is_mv_i2f;
|
||||
assign pkt.is_fcmp = is_fcmp;
|
||||
assign pkt.is_class = is_class;
|
||||
assign pkt.add = add;
|
||||
assign pkt.neg_mul = neg_mul;
|
||||
assign pkt.conv_signed = conv_signed;
|
||||
assign pkt.rm = &rm_r ? dyn_rm : rm_r;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
//Only the instructions that convert their arguments from s->d
|
||||
is_single <= decode_stage.instruction inside {SP_FMADD, SP_FMSUB, SP_FNMSUB, SP_FNMADD, SP_FADD, SP_FSUB, SP_FMUL, SP_FDIV, SP_FSQRT, SP_FMIN, SP_FMAX, SP_FCVT_S_W, SP_FCVT_S_WU, DP_FCVT_D_S, SP_FCVT_W_S, SP_FCVT_WU_S, SP_FEQ, SP_FLT, SP_FLE, SP_FCLASS};
|
||||
//Partial decoding to distinguish instructions from each other
|
||||
is_fma <= ~decode_stage.instruction[4];
|
||||
is_fmul <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b0??10};
|
||||
is_fadd <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b0000?};
|
||||
is_div <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b00?11};
|
||||
is_sqrt <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b01?1?};
|
||||
is_i2f <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b1?01?};
|
||||
is_mv_f2i <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b1110?} & ~decode_stage.instruction[12];
|
||||
is_s2d <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b01?0?} & ~decode_stage.instruction[20];
|
||||
is_d2s <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b01?0?} & decode_stage.instruction[20];
|
||||
is_minmax <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b0?1?1};
|
||||
is_sign_inj <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b0?1?0};
|
||||
is_sign_inj_single <= ~decode_stage.instruction[25];
|
||||
is_f2i <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b1?00?};
|
||||
is_mv_i2f <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b1?11?};
|
||||
is_fcmp <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b10???};
|
||||
is_class <= decode_stage.instruction[4] & decode_stage.instruction[31:27] inside {5'b1110?} & decode_stage.instruction[12];
|
||||
//Double duty for both FADD and FMA
|
||||
add <= decode_stage.instruction[4] ? ~decode_stage.instruction[27] : decode_stage.instruction[3:2] inside {2'b00, 2'b10};
|
||||
neg_mul <= decode_stage.instruction[3];
|
||||
conv_signed <= ~decode_stage.instruction[20];
|
||||
rm_r <= decode_stage.instruction[14:12];
|
||||
end
|
||||
end
|
||||
|
||||
fp_preprocessing #(.CONFIG(CONFIG), .FP_NUM_UNITS(5)) fp_preprocessing_inst (
|
||||
.unit_issue(intermediate_issue),
|
||||
.pkt(pkt),
|
||||
.ready(issue.ready),
|
||||
.madd_args(madd_inputs),
|
||||
.div_args(div_inputs),
|
||||
.sqrt_args(sqrt_inputs),
|
||||
.wb2fp_args(wb2fp_inputs),
|
||||
.wb2int_args(wb2int_inputs),
|
||||
.*);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Execution Units
|
||||
fp_madd_wrapper #(.CONFIG(CONFIG)) fp_madd_inst (
|
||||
.args(madd_inputs),
|
||||
.issue(intermediate_issue[0]),
|
||||
.madd_wb(intermediate_unit_wb[1]),
|
||||
.mul_wb(intermediate_unit_wb[2]),
|
||||
.*);
|
||||
|
||||
fp_div_sqrt_wrapper div_sqrt_inst (
|
||||
.div_inputs(div_inputs),
|
||||
.sqrt_inputs(sqrt_inputs),
|
||||
.div_issue(intermediate_issue[1]),
|
||||
.sqrt_issue(intermediate_issue[2]),
|
||||
.wb(intermediate_unit_wb[0]),
|
||||
.*);
|
||||
|
||||
fp_wb2fp_misc wb2fp_misc_inst (
|
||||
.args(wb2fp_inputs),
|
||||
.issue(intermediate_issue[3]),
|
||||
.wb(intermediate_unit_wb[3])
|
||||
);
|
||||
|
||||
fp_wb2int_misc wb2int_misc_inst (
|
||||
.args(wb2int_inputs),
|
||||
.issue(intermediate_issue[4]),
|
||||
.wb(int_wb),
|
||||
.fflags(int_fflags),
|
||||
.*);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Normalization and rounding
|
||||
fp_normalize_rounding_top #(.NUM_WB_UNITS(4)) norm_round_inst (
|
||||
.intermediate_wb(intermediate_unit_wb),
|
||||
.wb(fp_wb),
|
||||
.fflags(fp_fflags),
|
||||
.*);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Updating flags
|
||||
//Combine both wb2int and wb2fp in one because they can writeback simultaneously
|
||||
logic fp_accepted;
|
||||
logic int_accepted;
|
||||
assign fp_accepted = fp_wb.done & fp_wb.ack;
|
||||
assign int_accepted = int_wb.done & int_wb.ack;
|
||||
|
||||
always_comb begin
|
||||
fflags = '0;
|
||||
if (fp_accepted & int_accepted)
|
||||
fflags = fp_fflags | int_fflags;
|
||||
else if (fp_accepted)
|
||||
fflags = fp_fflags;
|
||||
else if (int_accepted)
|
||||
fflags = int_fflags;
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -26,6 +26,7 @@ module gc_unit
|
|||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import csr_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
|
@ -36,8 +37,17 @@ module gc_unit
|
|||
input logic rst,
|
||||
|
||||
//Decode
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] constant_alu,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
input gc_inputs_t gc_inputs,
|
||||
|
||||
//Branch miss predict
|
||||
input logic branch_flush,
|
||||
|
@ -54,8 +64,6 @@ module gc_unit
|
|||
input logic [31:0] epc,
|
||||
|
||||
//Retire
|
||||
input retire_packet_t retire,
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
input id_t retire_ids_next [RETIRE_PORTS],
|
||||
input logic [$clog2(NUM_EXCEPTION_SOURCES)-1:0] current_exception_unit,
|
||||
|
||||
|
@ -110,6 +118,7 @@ module gc_unit
|
|||
|
||||
//LS exceptions (miss-aligned, TLB and MMU) (issue stage)
|
||||
//fetch flush, take exception. If execute or later exception occurs first, exception is overridden
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
typedef enum {RST_STATE, PRE_CLEAR_STATE, INIT_CLEAR_STATE, IDLE_STATE, TLB_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH, POST_ISSUE_DISCARD} gc_state;
|
||||
gc_state state;
|
||||
|
@ -118,7 +127,6 @@ module gc_unit
|
|||
logic init_clear_done;
|
||||
logic tlb_clear_done;
|
||||
|
||||
gc_inputs_t gc_inputs_r;
|
||||
logic post_issue_idle;
|
||||
logic ifence_in_progress;
|
||||
logic ret_in_progress;
|
||||
|
@ -135,8 +143,52 @@ module gc_unit
|
|||
logic gc_pc_override;
|
||||
logic [31:0] gc_pc;
|
||||
|
||||
typedef struct packed{
|
||||
logic [31:0] pc_p4;
|
||||
logic is_ifence;
|
||||
logic is_mret;
|
||||
logic is_sret;
|
||||
} gc_inputs_t;
|
||||
|
||||
gc_inputs_t gc_inputs;
|
||||
gc_inputs_t gc_inputs_r;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
logic is_ifence;
|
||||
logic is_mret;
|
||||
logic is_sret;
|
||||
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
assign unit_needed =
|
||||
(CONFIG.INCLUDE_M_MODE & decode_stage.instruction inside {MRET}) |
|
||||
(CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SRET, SFENCE_VMA}) |
|
||||
(CONFIG.INCLUDE_IFENCE & decode_stage.instruction inside {FENCE_I});
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SFENCE_VMA};
|
||||
uses_rd = 0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
is_ifence = (instruction.upper_opcode == FENCE_T) & CONFIG.INCLUDE_IFENCE;
|
||||
is_mret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == MRET_imm) & CONFIG.INCLUDE_M_MODE;
|
||||
is_sret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == SRET_imm) & CONFIG.INCLUDE_S_MODE;
|
||||
end
|
||||
end
|
||||
|
||||
assign gc_inputs.pc_p4 = constant_alu;
|
||||
assign gc_inputs.is_ifence = is_ifence;
|
||||
assign gc_inputs.is_mret = is_mret;
|
||||
assign gc_inputs.is_sret = is_sret;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
|
||||
//Input registering
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
|
@ -22,13 +22,12 @@
|
|||
|
||||
module addr_hash
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
#(
|
||||
parameter logic USE_BIT_3 = 1
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input logic [31:0] addr,
|
||||
output addr_hash_t addr_hash
|
||||
);
|
||||
|
@ -36,9 +35,9 @@ module addr_hash
|
|||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Xor addr in groups of 4-bits, truncating to the virtual/physical address invariant bits (11:0)
|
||||
//lower two bits are not used due to complications in determining overlap between byte
|
||||
//halfword and word operations.
|
||||
assign addr_hash[0] = addr[2] ^ addr[6] ^ addr[10];
|
||||
//lower two bits (and third in double) are not used due to complications in determining
|
||||
//overlap between byte doubleword, halfword and word operations.
|
||||
assign addr_hash[0] = (USE_BIT_3 & addr[2]) ^ addr[6] ^ addr[10];
|
||||
assign addr_hash[1] = addr[3] ^ addr[7] ^ addr[11];
|
||||
assign addr_hash[2] = addr[4] ^ addr[8];
|
||||
assign addr_hash[3] = addr[5] ^ addr[9];
|
327
core/execution_units/load_store_unit/dcache.sv
Normal file
327
core/execution_units/load_store_unit/dcache.sv
Normal file
|
@ -0,0 +1,327 @@
|
|||
/*
|
||||
* Copyright © 2022 Eric Matthews
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module dcache
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input logic dcache_on,
|
||||
l1_arbiter_request_interface.master l1_request,
|
||||
l1_arbiter_return_interface.master l1_response,
|
||||
input logic sc_complete,
|
||||
input logic sc_success,
|
||||
input logic clear_reservation,
|
||||
input amo_details_t amo,
|
||||
input logic uncacheable_load,
|
||||
input logic uncacheable_store,
|
||||
input logic is_load,
|
||||
input logic load_request,
|
||||
input logic store_request,
|
||||
output logic load_ready,
|
||||
output logic store_ready,
|
||||
input data_access_shared_inputs_t ls_load,
|
||||
input data_access_shared_inputs_t ls_store,
|
||||
memory_sub_unit_interface.responder ls
|
||||
);
|
||||
|
||||
localparam derived_cache_config_t SCONFIG = get_derived_cache_params(CONFIG, CONFIG.DCACHE, CONFIG.DCACHE_ADDR);
|
||||
localparam LOG2_WAYS = (CONFIG.DCACHE.WAYS == 1) ? 1 : $clog2(CONFIG.DCACHE.WAYS);
|
||||
|
||||
localparam bit [SCONFIG.SUB_LINE_ADDR_W-1:0] END_OF_LINE_COUNT = SCONFIG.SUB_LINE_ADDR_W'(CONFIG.DCACHE.LINE_W-1);
|
||||
|
||||
cache_functions_interface # (.LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils ();
|
||||
|
||||
typedef struct packed{
|
||||
logic [31:0] addr;
|
||||
logic uncacheable;
|
||||
} load_stage2_t;
|
||||
load_stage2_t stage2_load;
|
||||
|
||||
typedef struct packed{
|
||||
logic [31:0] addr;
|
||||
logic [3:0] be;
|
||||
logic [31:0] data;
|
||||
logic cache_op;
|
||||
logic uncacheable;
|
||||
} store_stage2_t;
|
||||
store_stage2_t stage2_store;
|
||||
|
||||
logic [CONFIG.DCACHE.WAYS-1:0] load_tag_hit_way;
|
||||
logic [CONFIG.DCACHE.WAYS-1:0] store_tag_hit_way;
|
||||
|
||||
logic [CONFIG.DCACHE.WAYS-1:0] replacement_way;
|
||||
logic [CONFIG.DCACHE.WAYS-1:0] replacement_way_r;
|
||||
|
||||
logic load_tag_check;
|
||||
logic load_hit;
|
||||
logic store_hit;
|
||||
logic [LOG2_WAYS-1:0] tag_hit_index;
|
||||
logic [LOG2_WAYS-1:0] replacement_index;
|
||||
logic [LOG2_WAYS-1:0] replacement_index_r;
|
||||
logic [LOG2_WAYS-1:0] load_sel;
|
||||
|
||||
logic is_target_word;
|
||||
logic [SCONFIG.SUB_LINE_ADDR_W-1:0] word_count;
|
||||
logic miss_data_valid;
|
||||
logic line_complete;
|
||||
|
||||
logic arb_load_sel;
|
||||
logic load_l1_arb_ack;
|
||||
logic store_l1_arb_ack;
|
||||
|
||||
logic [31:0] ram_load_data [CONFIG.DCACHE.WAYS-1:0];
|
||||
|
||||
typedef enum {
|
||||
LOAD_IDLE = 0,
|
||||
LOAD_HIT_CHECK = 1,
|
||||
LOAD_L1_REQUEST = 2,
|
||||
LOAD_FILL = 3
|
||||
} load_path_enum_t;
|
||||
logic [3:0] load_state, load_state_next;
|
||||
|
||||
typedef enum {
|
||||
STORE_IDLE = 0,
|
||||
STORE_L1_REQUEST = 1
|
||||
} store_path_enum_t;
|
||||
logic [1:0] store_state, store_state_next;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Load Path
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst) begin
|
||||
load_state <= 0;
|
||||
load_state[LOAD_IDLE] <= 1;
|
||||
end
|
||||
else
|
||||
load_state <= load_state_next;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
load_state_next[LOAD_IDLE] = (load_state[LOAD_IDLE] & ~load_request) | ((load_hit & ~load_request) | line_complete);
|
||||
load_state_next[LOAD_HIT_CHECK] = load_request;
|
||||
load_state_next[LOAD_L1_REQUEST] = (load_state[LOAD_L1_REQUEST] & ~load_l1_arb_ack) | (load_state[LOAD_HIT_CHECK] & ~load_hit);
|
||||
load_state_next[LOAD_FILL] = (load_state[LOAD_FILL] & ~line_complete) | (load_state[LOAD_L1_REQUEST] & load_l1_arb_ack);
|
||||
end
|
||||
|
||||
assign load_ready = (load_state[LOAD_IDLE] | load_hit) & (store_state[STORE_IDLE] | store_l1_arb_ack);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (load_request) begin
|
||||
stage2_load.addr <= ls_load.addr;
|
||||
stage2_load.uncacheable <= uncacheable_load;
|
||||
end
|
||||
end
|
||||
|
||||
assign load_tag_check = load_request & dcache_on & ~uncacheable_load;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Load Miss
|
||||
always_ff @ (posedge clk) begin
|
||||
if (load_request)
|
||||
word_count <= 0;
|
||||
else
|
||||
word_count <= word_count + SCONFIG.SUB_LINE_ADDR_W'(l1_response.data_valid);
|
||||
end
|
||||
assign is_target_word = (stage2_load.addr[2 +: SCONFIG.SUB_LINE_ADDR_W] == word_count) | stage2_load.uncacheable;
|
||||
|
||||
assign line_complete = l1_response.data_valid & ((word_count == END_OF_LINE_COUNT) | stage2_load.uncacheable);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Store Path
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst) begin
|
||||
store_state <= 0;
|
||||
store_state[STORE_IDLE] <= 1;
|
||||
end
|
||||
else
|
||||
store_state <= store_state_next;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
store_state_next[STORE_IDLE] = (store_state[STORE_IDLE] & (~store_request | (store_request & ls_store.cache_op))) | (store_l1_arb_ack & ~store_request);
|
||||
store_state_next[STORE_L1_REQUEST] = (store_state[STORE_L1_REQUEST] & ~store_l1_arb_ack) | (store_request & ~ls_store.cache_op);
|
||||
end
|
||||
assign store_ready = (store_state[STORE_IDLE] | store_l1_arb_ack) & (load_state[LOAD_IDLE] | load_hit);
|
||||
|
||||
assign ls.ready = is_load ? load_ready : store_ready;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (store_request) begin
|
||||
stage2_store.addr <= ls_store.addr;
|
||||
stage2_store.uncacheable <= uncacheable_store;
|
||||
stage2_store.be <= ls_store.be;
|
||||
stage2_store.data <= ls_store.data_in;
|
||||
stage2_store.cache_op <= ls_store.cache_op;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//L1 Arbiter Interface
|
||||
//Priority to oldest request
|
||||
fifo_interface #(.DATA_TYPE(logic)) request_order();
|
||||
|
||||
assign request_order.data_in = load_request;
|
||||
assign request_order.push = load_request | (store_request & ~ls_store.cache_op);
|
||||
assign request_order.potential_push = request_order.push;
|
||||
|
||||
assign request_order.pop = l1_request.ack | load_hit;
|
||||
|
||||
cva5_fifo #(.DATA_TYPE(logic), .FIFO_DEPTH(2))
|
||||
request_order_fifo (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.fifo (request_order)
|
||||
);
|
||||
|
||||
assign arb_load_sel = request_order.data_out;
|
||||
|
||||
assign l1_request.addr = arb_load_sel ? stage2_load.addr : stage2_store.addr;//Memory interface aligns request to burst size (done there to support AMO line-read word-write)
|
||||
assign l1_request.data = stage2_store.data;
|
||||
assign l1_request.rnw = arb_load_sel;
|
||||
assign l1_request.be = stage2_store.be;
|
||||
assign l1_request.size = (arb_load_sel & ~stage2_load.uncacheable) ? 5'(CONFIG.DCACHE.LINE_W-1) : 0;//LR and AMO ops are included in load
|
||||
assign l1_request.is_amo = 0;
|
||||
assign l1_request.amo = 0;
|
||||
|
||||
assign l1_request.request = load_state[LOAD_L1_REQUEST] | store_state[STORE_L1_REQUEST];
|
||||
|
||||
assign load_l1_arb_ack = l1_request.ack & arb_load_sel;
|
||||
assign store_l1_arb_ack = l1_request.ack & ~arb_load_sel;
|
||||
////////////////////////////////////////////////////
|
||||
//Replacement policy (free runing one-hot cycler, i.e. pseudo random)
|
||||
cycler #(CONFIG.DCACHE.WAYS) replacement_policy (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.en (1'b1),
|
||||
.one_hot (replacement_way)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Tag banks
|
||||
dcache_tag_banks #(.CONFIG(CONFIG), .SCONFIG(SCONFIG))
|
||||
tag_banks (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.load_addr (ls_load.addr),
|
||||
.load_req (load_tag_check),
|
||||
.miss_addr (stage2_load.addr),
|
||||
.miss_req (load_l1_arb_ack),
|
||||
.miss_way (replacement_way),
|
||||
.inv_addr ({l1_response.inv_addr, 2'b0}),
|
||||
.extern_inv (l1_response.inv_valid),
|
||||
.extern_inv_complete (l1_response.inv_ack),
|
||||
.store_addr (ls_store.addr),
|
||||
.store_addr_r (stage2_store.addr),
|
||||
.store_req (store_request),
|
||||
.cache_op_req (ls_store.cache_op),
|
||||
.load_tag_hit (load_hit),
|
||||
.load_tag_hit_way (load_tag_hit_way),
|
||||
.store_tag_hit (store_hit),
|
||||
.store_tag_hit_way (store_tag_hit_way)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Data Bank(s)
|
||||
logic [SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:0] data_read_addr;
|
||||
assign data_read_addr = load_state[LOAD_FILL] ? {addr_utils.getTagLineAddr(stage2_load.addr), word_count} : addr_utils.getDataLineAddr(ls_load.addr);
|
||||
|
||||
generate for (genvar i=0; i < CONFIG.DCACHE.WAYS; i++) begin : data_bank_gen
|
||||
byte_en_bram #(CONFIG.DCACHE.LINES*CONFIG.DCACHE.LINE_W) data_bank (
|
||||
.clk(clk),
|
||||
.addr_a(data_read_addr),
|
||||
.addr_b(addr_utils.getDataLineAddr(stage2_store.addr)),
|
||||
.en_a(load_tag_check | (replacement_way_r[i] & l1_response.data_valid)),
|
||||
.en_b(store_tag_hit_way[i]),
|
||||
.be_a({4{(replacement_way_r[i] & l1_response.data_valid)}}),
|
||||
.be_b(stage2_store.be),
|
||||
.data_in_a(l1_response.data),
|
||||
.data_in_b(stage2_store.data),
|
||||
.data_out_a(ram_load_data[i]),
|
||||
.data_out_b()
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
//One-hot tag hit / update logic to binary int
|
||||
one_hot_to_integer #(CONFIG.DCACHE.WAYS)
|
||||
hit_way_conv (
|
||||
.one_hot (load_tag_hit_way),
|
||||
.int_out (tag_hit_index)
|
||||
);
|
||||
one_hot_to_integer #(CONFIG.DCACHE.WAYS)
|
||||
replacment_way_conv (
|
||||
.one_hot (replacement_way),
|
||||
.int_out (replacement_index)
|
||||
);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (load_l1_arb_ack) begin
|
||||
replacement_way_r <= replacement_way;
|
||||
replacement_index_r <= replacement_index;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) miss_data_valid <= l1_response.data_valid & is_target_word;
|
||||
|
||||
logic collision;
|
||||
logic [31:0] saved_data;
|
||||
logic [3:0] saved_be;
|
||||
|
||||
assign collision = store_state[STORE_L1_REQUEST] & (stage2_store.addr[31:2] == ls_load.addr[31:2]);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (load_request) begin
|
||||
saved_data <= stage2_store.data;
|
||||
saved_be <= {4{collision}} & stage2_store.be;
|
||||
end
|
||||
end
|
||||
|
||||
assign load_sel = load_state[LOAD_HIT_CHECK] ? tag_hit_index : replacement_index_r;
|
||||
always_comb for (int i = 0; i < 4; i++)
|
||||
ls.data_out[8*i+:8] = saved_be[i] ? saved_data[8*i+:8] : ram_load_data[load_sel][8*i+:8];
|
||||
assign ls.data_valid = load_hit | miss_data_valid;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
dcache_request_when_not_ready_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) load_request |-> load_ready)
|
||||
else $error("dcache received request when not ready");
|
||||
|
||||
dache_suprious_l1_ack_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) l1_request.ack |-> (load_state[LOAD_L1_REQUEST] | store_state[STORE_L1_REQUEST]))
|
||||
else $error("dcache received ack without a request");
|
||||
|
||||
endmodule
|
114
core/execution_units/load_store_unit/dcache_tag_banks.sv
Normal file
114
core/execution_units/load_store_unit/dcache_tag_banks.sv
Normal file
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Copyright © 2022 Eric Matthews
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module dcache_tag_banks
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG,
|
||||
parameter derived_cache_config_t SCONFIG = '{LINE_ADDR_W : 9, SUB_LINE_ADDR_W : 2, TAG_W : 15}
|
||||
)
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
//Port A
|
||||
input logic[31:0] load_addr,
|
||||
input logic load_req,
|
||||
input logic[31:0] miss_addr,
|
||||
input logic miss_req,
|
||||
input logic[CONFIG.DCACHE.WAYS-1:0] miss_way,
|
||||
input logic[31:0] inv_addr,
|
||||
input logic extern_inv,
|
||||
output logic extern_inv_complete,
|
||||
|
||||
//Port B
|
||||
input logic[31:0] store_addr,
|
||||
input logic[31:0] store_addr_r,
|
||||
input logic store_req,
|
||||
input logic cache_op_req,
|
||||
|
||||
output logic load_tag_hit,
|
||||
output logic store_tag_hit,
|
||||
output logic[CONFIG.DCACHE.WAYS-1:0] load_tag_hit_way,
|
||||
output logic[CONFIG.DCACHE.WAYS-1:0] store_tag_hit_way
|
||||
);
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [SCONFIG.TAG_W-1:0] tag;
|
||||
} dtag_entry_t;
|
||||
|
||||
cache_functions_interface # (.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils ();
|
||||
|
||||
dtag_entry_t tag_line_a [CONFIG.DCACHE.WAYS-1:0];
|
||||
dtag_entry_t tag_line_b [CONFIG.DCACHE.WAYS-1:0];
|
||||
|
||||
dtag_entry_t new_tagline;
|
||||
|
||||
logic [SCONFIG.LINE_ADDR_W-1:0] porta_addr;
|
||||
logic [SCONFIG.LINE_ADDR_W-1:0] portb_addr;
|
||||
|
||||
logic external_inv;
|
||||
logic load_req_r;
|
||||
logic store_req_r;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
always_ff @ (posedge clk) load_req_r <= load_req;
|
||||
always_ff @ (posedge clk) store_req_r <= store_req & ~cache_op_req;
|
||||
|
||||
assign external_inv = extern_inv & CONFIG.DCACHE.USE_EXTERNAL_INVALIDATIONS;
|
||||
|
||||
assign porta_addr = miss_req ? addr_utils.getTagLineAddr(miss_addr) : external_inv ? addr_utils.getTagLineAddr(inv_addr) : addr_utils.getTagLineAddr(store_addr);
|
||||
assign portb_addr = addr_utils.getTagLineAddr(load_addr);
|
||||
|
||||
assign extern_inv_complete = external_inv & ~miss_req;
|
||||
|
||||
assign new_tagline = '{valid: miss_req, tag: addr_utils.getTag(miss_addr)};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Memory instantiation and hit detection
|
||||
generate for (genvar i = 0; i < CONFIG.DCACHE.WAYS; i++) begin : tag_bank_gen
|
||||
dual_port_bram #(.WIDTH($bits(dtag_entry_t)), .LINES(CONFIG.DCACHE.LINES)) dtag_bank (
|
||||
.clk (clk),
|
||||
.en_a (store_req | (miss_req & miss_way[i]) | external_inv),
|
||||
.wen_a ((miss_req & miss_way[i]) | external_inv | (store_req & cache_op_req)),
|
||||
.addr_a (porta_addr),
|
||||
.data_in_a (new_tagline),
|
||||
.data_out_a (tag_line_a[i]),
|
||||
.en_b (load_req),
|
||||
.wen_b ('0),
|
||||
.addr_b (portb_addr),
|
||||
.data_in_b ('0),
|
||||
.data_out_b(tag_line_b[i])
|
||||
);
|
||||
assign store_tag_hit_way[i] = ({store_req_r, 1'b1, addr_utils.getTag(store_addr_r)} == {1'b1, tag_line_a[i]});
|
||||
assign load_tag_hit_way[i] = ({load_req_r, 1'b1, addr_utils.getTag(miss_addr)} == {1'b1, tag_line_b[i]});
|
||||
end endgenerate
|
||||
|
||||
assign load_tag_hit = |load_tag_hit_way;
|
||||
assign store_tag_hit = |store_tag_hit_way;
|
||||
|
||||
endmodule
|
270
core/execution_units/load_store_unit/load_store_queue.sv
Normal file
270
core/execution_units/load_store_unit/load_store_queue.sv
Normal file
|
@ -0,0 +1,270 @@
|
|||
/*
|
||||
* Copyright © 2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module load_store_queue //ID-based input buffer for Load/Store Unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input gc_outputs_t gc,
|
||||
|
||||
load_store_queue_interface.queue lsq,
|
||||
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] store_forward_wb_group,
|
||||
input logic [1:0] fp_store_forward_wb_group,
|
||||
//Writeback snooping
|
||||
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
|
||||
input fp_wb_packet_t fp_wb_packet [2],
|
||||
|
||||
//Retire release
|
||||
input retire_packet_t store_retire
|
||||
);
|
||||
localparam LOG2_SQ_DEPTH = $clog2(CONFIG.SQ_DEPTH);
|
||||
localparam DOUBLE_MIN_WIDTH = FLEN >= 32 ? 32 : FLEN;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] addr;
|
||||
logic [2:0] fn3;
|
||||
logic fp;
|
||||
logic double;
|
||||
id_t id;
|
||||
logic store_collision;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index;
|
||||
} lq_entry_t;
|
||||
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_oldest;
|
||||
addr_hash_t addr_hash;
|
||||
logic potential_store_conflict;
|
||||
|
||||
logic load_pop;
|
||||
logic load_addr_bit_3;
|
||||
logic [2:0] load_fn3;
|
||||
fp_ls_op_t load_type;
|
||||
logic store_pop;
|
||||
logic store_addr_bit_3;
|
||||
logic [31:0] store_data;
|
||||
|
||||
fifo_interface #(.DATA_TYPE(lq_entry_t)) lq();
|
||||
store_queue_interface sq();
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
//Can accept requests so long as store queue is not full
|
||||
//To allow additional loads with a full store queue would require
|
||||
//extra logic to handle the case where there is a collision and the
|
||||
//sq is full
|
||||
assign lsq.full = sq.full;
|
||||
|
||||
//Address hash for load-store collision checking
|
||||
addr_hash #(.USE_BIT_3(~CONFIG.INCLUDE_UNIT.FPU))
|
||||
lsq_addr_hash (
|
||||
.addr (lsq.data_in.addr),
|
||||
.addr_hash (addr_hash)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Load Queue
|
||||
cva5_fifo #(.DATA_TYPE(lq_entry_t), .FIFO_DEPTH(MAX_IDS))
|
||||
load_queue_fifo (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.fifo(lq)
|
||||
);
|
||||
|
||||
//FIFO control signals
|
||||
assign lq.push = lsq.push & lsq.data_in.load;
|
||||
assign lq.potential_push = lsq.potential_push;
|
||||
assign lq.pop = load_pop;
|
||||
|
||||
//FIFO data ports
|
||||
assign lq.data_in = '{
|
||||
addr : lsq.data_in.addr,
|
||||
fn3 : lsq.data_in.fn3,
|
||||
fp : lsq.data_in.fp,
|
||||
double : lsq.data_in.double,
|
||||
id : lsq.data_in.id,
|
||||
store_collision : potential_store_conflict,
|
||||
sq_index : sq_index
|
||||
};
|
||||
////////////////////////////////////////////////////
|
||||
//Store Queue
|
||||
assign sq.push = lsq.push & (lsq.data_in.store | lsq.data_in.cache_op);
|
||||
assign sq.pop = store_pop;
|
||||
assign sq.data_in = lsq.data_in;
|
||||
|
||||
store_queue # (.CONFIG(CONFIG)) sq_block (
|
||||
.clk (clk),
|
||||
.rst (rst | gc.sq_flush),
|
||||
.sq (sq),
|
||||
.store_forward_wb_group (store_forward_wb_group),
|
||||
.fp_store_forward_wb_group (fp_store_forward_wb_group),
|
||||
.addr_hash (addr_hash),
|
||||
.potential_store_conflict (potential_store_conflict),
|
||||
.sq_index (sq_index),
|
||||
.sq_oldest (sq_oldest),
|
||||
.wb_packet (wb_packet),
|
||||
.fp_wb_packet (fp_wb_packet),
|
||||
.store_retire (store_retire)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
//Priority is for loads over stores.
|
||||
//A store will be selected only if no loads are ready
|
||||
|
||||
generate
|
||||
if (CONFIG.INCLUDE_UNIT.FPU) begin : gen_fpu_split
|
||||
if (FLEN > 32) begin : gen_load_split
|
||||
//Double precision loads are done across two cycles, higher word first
|
||||
logic load_p2;
|
||||
logic load_fp_hold;
|
||||
|
||||
assign load_fp_hold = ~load_p2 & lq.data_out.double;
|
||||
assign load_pop = lsq.load_pop & ~load_fp_hold;
|
||||
assign load_addr_bit_3 = load_fp_hold | lq.data_out.addr[2];
|
||||
assign load_fn3 = lq.data_out.fp ? LS_W_fn3 : lq.data_out.fn3;
|
||||
|
||||
always_comb begin
|
||||
if (~lq.data_out.fp)
|
||||
load_type = INT_DONE;
|
||||
else if (~lq.data_out.double)
|
||||
load_type = SINGLE_DONE;
|
||||
else if (load_p2)
|
||||
load_type = DOUBLE_DONE;
|
||||
else
|
||||
load_type = DOUBLE_HOLD;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
load_p2 <= 0;
|
||||
else if (lsq.load_pop)
|
||||
load_p2 <= load_fp_hold;
|
||||
end
|
||||
end else begin : gen_no_load_split
|
||||
//All loads are single cycle (load only the upper word)
|
||||
assign load_pop = lsq.load_pop;
|
||||
assign load_addr_bit_3 = lq.data_out.addr[2] | lq.data_out.double;
|
||||
assign load_fn3 = lq.data_out.fp ? LS_W_fn3 : lq.data_out.fn3;
|
||||
always_comb begin
|
||||
if (lq.data_out.double)
|
||||
load_type = DOUBLE_DONE;
|
||||
else if (lq.data_out.fp)
|
||||
load_type = SINGLE_DONE;
|
||||
else
|
||||
load_type = INT_DONE;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Stores
|
||||
//Mux between integer stores, single precision stores, and double precision stores
|
||||
//Double precision stores take 2 cycles, with the lowest 32 bits on the first cycle (even if FLEN <= 32)
|
||||
//This is because some functions load double-precision data as integers and operate on them
|
||||
//Therefore, reduced FP numbers must be stored as if they were full size
|
||||
logic store_p2;
|
||||
logic store_fp_hold;
|
||||
|
||||
assign store_fp_hold = ~store_p2 & sq.data_out.double;
|
||||
assign store_pop = lsq.store_pop & ~store_fp_hold;
|
||||
assign store_addr_bit_3 = sq.data_out.double ? store_p2 : sq.data_out.addr[2];
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
store_p2 <= 0;
|
||||
else if (lsq.store_pop)
|
||||
store_p2 <= store_fp_hold;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
store_data = '0;
|
||||
if (sq.data_out.fp & ~sq.data_out.double) //Store single in upper bits
|
||||
store_data[31-:FLEN_F] = sq.data_out.fp_data[FLEN_F-1:0];
|
||||
else if (store_fp_hold) //First cycle of double - store lower bits (may just be 0)
|
||||
store_data = 32'(sq.data_out.fp_data[DOUBLE_MIN_WIDTH-1:0]) << 64-FLEN;
|
||||
else if (store_p2) //Second cycle of double - store upper bits
|
||||
store_data[31-:DOUBLE_MIN_WIDTH] = sq.data_out.fp_data[FLEN-1-:DOUBLE_MIN_WIDTH];
|
||||
else //Not FP
|
||||
store_data = sq.data_out.data;
|
||||
end
|
||||
end else begin : gen_no_fpu
|
||||
//Plain integer memory operations
|
||||
assign load_pop = lsq.load_pop;
|
||||
assign load_addr_bit_3 = lq.data_out.addr[2];
|
||||
assign load_fn3 = lq.data_out.fn3;
|
||||
assign load_type = INT_DONE;
|
||||
assign store_pop = lsq.store_pop;
|
||||
assign store_addr_bit_3 = sq.data_out.addr[2];
|
||||
assign store_data = sq.data_out.data;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
logic load_blocked;
|
||||
assign load_blocked = (lq.data_out.store_collision & (lq.data_out.sq_index != sq_oldest));
|
||||
|
||||
assign lsq.load_valid = lq.valid & ~load_blocked;
|
||||
assign lsq.store_valid = sq.valid;
|
||||
|
||||
assign lsq.load_data_out = '{
|
||||
addr : {lq.data_out.addr[31:3], load_addr_bit_3, lq.data_out.addr[1:0]},
|
||||
load : 1,
|
||||
store : 0,
|
||||
cache_op : 0,
|
||||
be : 'x,
|
||||
fn3 : load_fn3,
|
||||
data_in : 'x,
|
||||
id : lq.data_out.id,
|
||||
fp_op : load_type
|
||||
};
|
||||
|
||||
assign lsq.store_data_out = '{
|
||||
addr : {sq.data_out.addr[31:3], store_addr_bit_3, sq.data_out.addr[1:0]},
|
||||
load : 0,
|
||||
store : 1,
|
||||
cache_op : sq.data_out.cache_op,
|
||||
be : sq.data_out.be,
|
||||
fn3 : 'x,
|
||||
data_in : store_data,
|
||||
id : 'x,
|
||||
fp_op : fp_ls_op_t'('x)
|
||||
};
|
||||
|
||||
assign lsq.sq_empty = sq.empty;
|
||||
assign lsq.no_released_stores_pending = sq.no_released_stores_pending;
|
||||
assign lsq.empty = ~lq.valid & sq.empty;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
endmodule
|
|
@ -25,6 +25,8 @@ module load_store_unit
|
|||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
|
@ -35,7 +37,26 @@ module load_store_unit
|
|||
input logic rst,
|
||||
input gc_outputs_t gc,
|
||||
|
||||
input load_store_inputs_t ls_inputs,
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic [2:0] fp_uses_rs,
|
||||
output logic uses_rd,
|
||||
output logic fp_uses_rd,
|
||||
output logic decode_is_store,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic instruction_issued_with_rd,
|
||||
input logic fp_instruction_issued_with_rd,
|
||||
input logic rs2_inuse,
|
||||
input logic fp_rs2_inuse,
|
||||
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group,
|
||||
input logic fp_issue_rd_wb_group,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
input logic[FLEN-1:0] fp_rf[3],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
|
||||
input logic dcache_on,
|
||||
|
@ -55,17 +76,16 @@ module load_store_unit
|
|||
local_memory_interface.master data_bram,
|
||||
|
||||
//Writeback-Store Interface
|
||||
input wb_packet_t wb_snoop,
|
||||
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
|
||||
input fp_wb_packet_t fp_wb_packet [2],
|
||||
|
||||
//Retire release
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
input logic retire_port_valid [RETIRE_PORTS],
|
||||
input retire_packet_t store_retire,
|
||||
|
||||
exception_interface.unit exception,
|
||||
output load_store_status_t load_store_status,
|
||||
unit_writeback_interface.unit wb,
|
||||
|
||||
output logic tr_load_conflict_delay
|
||||
unit_writeback_interface.unit fp_wb
|
||||
);
|
||||
|
||||
localparam NUM_SUB_UNITS = int'(CONFIG.INCLUDE_DLOCAL_MEM) + int'(CONFIG.INCLUDE_PERIPHERAL_BUS) + int'(CONFIG.INCLUDE_DCACHE);
|
||||
|
@ -76,7 +96,7 @@ module load_store_unit
|
|||
localparam DCACHE_ID = int'(CONFIG.INCLUDE_DLOCAL_MEM) + int'(CONFIG.INCLUDE_PERIPHERAL_BUS);
|
||||
|
||||
//Should be equal to pipeline depth of longest load/store subunit
|
||||
localparam ATTRIBUTES_DEPTH = 2;//CONFIG.INCLUDE_DCACHE ? 2 : 1;
|
||||
localparam ATTRIBUTES_DEPTH = 1;
|
||||
|
||||
//Subunit signals
|
||||
addr_utils_interface #(CONFIG.DLOCAL_MEM_ADDR.L, CONFIG.DLOCAL_MEM_ADDR.H) dlocal_mem_addr_utils ();
|
||||
|
@ -93,15 +113,19 @@ module load_store_unit
|
|||
logic [NUM_SUB_UNITS-1:0] unit_ready;
|
||||
logic [NUM_SUB_UNITS-1:0] unit_data_valid;
|
||||
logic [NUM_SUB_UNITS-1:0] last_unit;
|
||||
logic [NUM_SUB_UNITS-1:0] current_unit;
|
||||
|
||||
logic units_ready;
|
||||
logic sub_unit_ready;
|
||||
logic [NUM_SUB_UNITS_W-1:0] subunit_id;
|
||||
|
||||
logic unit_switch;
|
||||
logic unit_switch_in_progress;
|
||||
logic unit_switch_hold;
|
||||
|
||||
logic sel_load;
|
||||
logic sub_unit_issue;
|
||||
logic sub_unit_load_issue;
|
||||
logic sub_unit_store_issue;
|
||||
|
||||
logic load_complete;
|
||||
|
||||
logic [31:0] virtual_address;
|
||||
|
@ -110,46 +134,127 @@ module load_store_unit
|
|||
logic [31:0] aligned_load_data;
|
||||
logic [31:0] final_load_data;
|
||||
|
||||
|
||||
logic unaligned_addr;
|
||||
logic load_exception_complete;
|
||||
logic exception_is_fp;
|
||||
logic fence_hold;
|
||||
|
||||
typedef struct packed{
|
||||
logic is_halfword;
|
||||
logic is_signed;
|
||||
logic [1:0] byte_addr;
|
||||
logic [1:0] sign_sel;
|
||||
logic [1:0] final_mux_sel;
|
||||
id_t id;
|
||||
logic [NUM_SUB_UNITS_W-1:0] subunit_id;
|
||||
fp_ls_op_t fp_op;
|
||||
} load_attributes_t;
|
||||
load_attributes_t mem_attr, wb_attr;
|
||||
load_attributes_t wb_attr;
|
||||
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
logic [3:0] be;
|
||||
//FIFOs
|
||||
fifo_interface #(.DATA_WIDTH($bits(load_attributes_t))) load_attributes();
|
||||
fifo_interface #(.DATA_TYPE(load_attributes_t)) load_attributes();
|
||||
|
||||
load_store_queue_interface lsq();
|
||||
logic tr_possible_load_conflict_delay;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
assign unit_needed = instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE} |
|
||||
(CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH}) |
|
||||
(CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD});
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW} |
|
||||
(CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH}) |
|
||||
(CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD});
|
||||
uses_rs[RS2] = CONFIG.INCLUDE_FORWARDING_TO_STORES ? 0 : instruction inside {SB, SH, SW};
|
||||
uses_rd = instruction inside {LB, LH, LW, LBU, LHU};
|
||||
fp_uses_rs = '0;
|
||||
fp_uses_rs[RS2] = ~CONFIG.INCLUDE_FORWARDING_TO_STORES & CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FSW, DP_FSD};
|
||||
fp_uses_rd = CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, DP_FLD};
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//LS specific decode support
|
||||
typedef struct packed{
|
||||
logic is_load;
|
||||
logic is_store;
|
||||
logic is_fence;
|
||||
logic is_cbo;
|
||||
logic is_fpu;
|
||||
logic is_double;
|
||||
logic [11:0] offset;
|
||||
} ls_attr_t;
|
||||
ls_attr_t decode_attr;
|
||||
ls_attr_t issue_attr;
|
||||
|
||||
logic [11:0] load_offset;
|
||||
logic [11:0] store_offset;
|
||||
assign load_offset = instruction[31:20];
|
||||
assign store_offset = {instruction[31:25], instruction[11:7]};
|
||||
|
||||
|
||||
assign decode_attr = '{
|
||||
is_load : instruction inside {LB, LH, LW, LBU, LHU} | CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, DP_FLD},
|
||||
is_store : instruction inside {SB, SH, SW} | CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FSW, DP_FSD},
|
||||
is_fence : instruction inside {FENCE},
|
||||
is_cbo : CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH},
|
||||
is_fpu : CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD},
|
||||
is_double : CONFIG.INCLUDE_UNIT.FPU & instruction inside {DP_FLD, DP_FSD},
|
||||
offset : instruction[5] ? store_offset : ((CONFIG.INCLUDE_CBO & instruction[2]) ? '0 : load_offset)
|
||||
};
|
||||
assign decode_is_store = decode_attr.is_store | decode_attr.is_cbo;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready)
|
||||
issue_attr <= decode_attr;
|
||||
end
|
||||
|
||||
typedef struct packed{
|
||||
id_t id;
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] wb_group;
|
||||
logic fp_wb_group;
|
||||
} rd_attributes_t;
|
||||
rd_attributes_t rd_attributes;
|
||||
|
||||
//Store FP instructions in 32-64
|
||||
lutram_1w_1r #(.DATA_TYPE(rd_attributes_t), .DEPTH(64))
|
||||
rd_to_id_table (
|
||||
.clk(clk),
|
||||
.waddr({fp_instruction_issued_with_rd, issue_stage.rd_addr}),
|
||||
.raddr({issue_attr.is_fpu, issue_rs_addr[RS2]}),
|
||||
.ram_write(instruction_issued_with_rd | fp_instruction_issued_with_rd),
|
||||
.new_ram_data('{
|
||||
id : issue_stage.id,
|
||||
wb_group : issue_rd_wb_group,
|
||||
fp_wb_group : fp_issue_rd_wb_group
|
||||
}),
|
||||
.ram_data_out(rd_attributes)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Alignment Exception
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_ls_exceptions
|
||||
logic new_exception;
|
||||
always_comb begin
|
||||
case(ls_inputs.fn3)
|
||||
LS_H_fn3, L_HU_fn3 : unaligned_addr = virtual_address[0];
|
||||
LS_W_fn3 : unaligned_addr = |virtual_address[1:0];
|
||||
default : unaligned_addr = 0;
|
||||
endcase
|
||||
if (issue_stage.fn3 == LS_H_fn3 | issue_stage.fn3 == L_HU_fn3)
|
||||
unaligned_addr = virtual_address[0];
|
||||
else if (issue_stage.fn3 == LS_W_fn3)
|
||||
unaligned_addr = |virtual_address[1:0];
|
||||
//Double-precision operations raise if not aligned on 8 byte boundary even though they are decomposed into 4 byte operations
|
||||
//This is because the operation might straddle two memory regions
|
||||
else if (CONFIG.INCLUDE_UNIT.FPU & issue_stage.fn3 == LS_D_fn3)
|
||||
unaligned_addr = |virtual_address[2:0];
|
||||
else
|
||||
unaligned_addr = 0;
|
||||
end
|
||||
|
||||
assign new_exception = unaligned_addr & issue.new_request & ~ls_inputs.fence;
|
||||
assign new_exception = unaligned_addr & issue.new_request & ~issue_attr.is_fence;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
exception.valid <= 0;
|
||||
|
@ -157,9 +262,16 @@ module load_store_unit
|
|||
exception.valid <= (exception.valid & ~exception.ack) | new_exception;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
exception_is_fp <= 0;
|
||||
else if (new_exception)
|
||||
exception_is_fp <= CONFIG.INCLUDE_UNIT.FPU & issue_attr.is_fpu;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (new_exception & ~exception.valid) begin
|
||||
exception.code <= ls_inputs.store ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
|
||||
exception.code <= issue_attr.is_store ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
|
||||
exception.tval <= virtual_address;
|
||||
exception.id <= issue.id;
|
||||
end
|
||||
|
@ -178,17 +290,17 @@ module load_store_unit
|
|||
assign load_store_status = '{
|
||||
sq_empty : lsq.sq_empty,
|
||||
no_released_stores_pending : lsq.no_released_stores_pending,
|
||||
idle : lsq.empty & (~load_attributes.valid) & units_ready
|
||||
idle : lsq.empty & (~load_attributes.valid) & (&unit_ready)
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//TLB interface
|
||||
assign virtual_address = ls_inputs.rs1 + 32'(signed'(ls_inputs.offset));
|
||||
assign virtual_address = rf[RS1] + 32'(signed'(issue_attr.offset));
|
||||
|
||||
assign tlb.virtual_address = virtual_address;
|
||||
assign tlb.new_request = tlb_on & issue.new_request;
|
||||
assign tlb.execute = 0;
|
||||
assign tlb.rnw = ls_inputs.load & ~ls_inputs.store;
|
||||
assign tlb.rnw = issue_attr.is_load & ~issue_attr.is_store;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Byte enable generation
|
||||
|
@ -198,7 +310,7 @@ module load_store_unit
|
|||
// SB: specific byte
|
||||
always_comb begin
|
||||
be = 0;
|
||||
case(ls_inputs.fn3[1:0])
|
||||
case(issue_stage.fn3[1:0])
|
||||
LS_B_fn3[1:0] : be[virtual_address[1:0]] = 1;
|
||||
LS_H_fn3[1:0] : begin
|
||||
be[virtual_address[1:0]] = 1;
|
||||
|
@ -212,44 +324,46 @@ module load_store_unit
|
|||
//Load Store Queue
|
||||
assign lsq.data_in = '{
|
||||
addr : tlb_on ? tlb.physical_address : virtual_address,
|
||||
fn3 : ls_inputs.fn3,
|
||||
fn3 : issue_stage.fn3,
|
||||
be : be,
|
||||
data : ls_inputs.rs2,
|
||||
load : ls_inputs.load,
|
||||
store : ls_inputs.store,
|
||||
data : rf[RS2],
|
||||
load : issue_attr.is_load,
|
||||
store : issue_attr.is_store,
|
||||
cache_op : issue_attr.is_cbo,
|
||||
id : issue.id,
|
||||
forwarded_store : ls_inputs.forwarded_store,
|
||||
id_needed : ls_inputs.store_forward_id
|
||||
id_needed : rd_attributes.id,
|
||||
fp : issue_attr.is_fpu,
|
||||
double : issue_attr.is_double,
|
||||
fp_data : fp_rf[RS2]
|
||||
};
|
||||
|
||||
assign lsq.potential_push = issue.possible_issue;
|
||||
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~ls_inputs.fence;
|
||||
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~issue_attr.is_fence;
|
||||
|
||||
load_store_queue # (.CONFIG(CONFIG)) lsq_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.gc (gc),
|
||||
.lsq (lsq),
|
||||
.wb_snoop (wb_snoop),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_port_valid (retire_port_valid),
|
||||
.tr_possible_load_conflict_delay (tr_possible_load_conflict_delay)
|
||||
.store_forward_wb_group (rs2_inuse ? rd_attributes.wb_group : '0),
|
||||
.fp_store_forward_wb_group ({fp_rs2_inuse & rd_attributes.fp_wb_group, fp_rs2_inuse & ~rd_attributes.fp_wb_group}),
|
||||
.wb_packet (wb_packet),
|
||||
.fp_wb_packet (fp_wb_packet),
|
||||
.store_retire (store_retire)
|
||||
);
|
||||
assign shared_inputs = lsq.data_out;
|
||||
assign lsq.pop = sub_unit_issue;
|
||||
|
||||
assign shared_inputs = sel_load ? lsq.load_data_out : lsq.store_data_out;
|
||||
assign lsq.load_pop = sub_unit_load_issue;
|
||||
assign lsq.store_pop = sub_unit_store_issue;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit tracking
|
||||
assign current_unit = sub_unit_address_match;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (load_attributes.push)
|
||||
last_unit <= sub_unit_address_match;
|
||||
end
|
||||
|
||||
//When switching units, ensure no outstanding loads so that there can be no timing collisions with results
|
||||
assign unit_switch = (current_unit != last_unit) & load_attributes.valid;
|
||||
assign unit_switch = lsq.load_valid & (sub_unit_address_match != last_unit) & load_attributes.valid;
|
||||
always_ff @ (posedge clk) begin
|
||||
unit_switch_in_progress <= (unit_switch_in_progress | unit_switch) & ~load_attributes.valid;
|
||||
end
|
||||
|
@ -257,23 +371,27 @@ module load_store_unit
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Primary Control Signals
|
||||
assign units_ready = &unit_ready & (~unit_switch_hold);
|
||||
assign sel_load = lsq.load_valid;
|
||||
|
||||
assign sub_unit_ready = unit_ready[subunit_id] & (~unit_switch_hold);
|
||||
assign load_complete = |unit_data_valid;
|
||||
|
||||
assign issue.ready = (~tlb_on | tlb.ready) & (~lsq.full) & (~fence_hold) & (~exception.valid);
|
||||
assign sub_unit_issue = lsq.valid & units_ready;
|
||||
|
||||
assign sub_unit_load_issue = sel_load & lsq.load_valid & sub_unit_ready & sub_unit_address_match[subunit_id];
|
||||
assign sub_unit_store_issue = (lsq.store_valid & ~sel_load) & sub_unit_ready & sub_unit_address_match[subunit_id];
|
||||
assign sub_unit_issue = sub_unit_load_issue | sub_unit_store_issue;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
fence_hold <= 0;
|
||||
else
|
||||
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & ls_inputs.fence);
|
||||
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & issue_attr.is_fence);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Load attributes FIFO
|
||||
logic [1:0] final_mux_sel;
|
||||
logic [NUM_SUB_UNITS_W-1:0] subunit_id;
|
||||
|
||||
one_hot_to_integer #(NUM_SUB_UNITS)
|
||||
sub_unit_select (
|
||||
|
@ -282,27 +400,26 @@ module load_store_unit
|
|||
);
|
||||
|
||||
always_comb begin
|
||||
case(shared_inputs.fn3)
|
||||
case(lsq.load_data_out.fn3)
|
||||
LS_B_fn3, L_BU_fn3 : final_mux_sel = 0;
|
||||
LS_H_fn3, L_HU_fn3 : final_mux_sel = 1;
|
||||
default : final_mux_sel = 2; //LS_W_fn3
|
||||
endcase
|
||||
end
|
||||
|
||||
assign mem_attr = '{
|
||||
is_halfword : shared_inputs.fn3[0],
|
||||
is_signed : ~|shared_inputs.fn3[2:1],
|
||||
byte_addr : shared_inputs.addr[1:0],
|
||||
assign load_attributes.data_in = '{
|
||||
is_signed : ~|lsq.load_data_out.fn3[2:1],
|
||||
byte_addr : lsq.load_data_out.addr[1:0],
|
||||
sign_sel : lsq.load_data_out.addr[1:0] | {1'b0, lsq.load_data_out.fn3[0]},//halfword
|
||||
final_mux_sel : final_mux_sel,
|
||||
id : shared_inputs.id,
|
||||
subunit_id : subunit_id
|
||||
id : lsq.load_data_out.id,
|
||||
subunit_id : subunit_id,
|
||||
fp_op : lsq.load_data_out.fp_op
|
||||
};
|
||||
|
||||
assign load_attributes.data_in = mem_attr;
|
||||
assign load_attributes.push = sub_unit_issue & shared_inputs.load;
|
||||
assign load_attributes.push = sub_unit_load_issue;
|
||||
assign load_attributes.potential_push = load_attributes.push;
|
||||
|
||||
cva5_fifo #(.DATA_WIDTH($bits(load_attributes_t)), .FIFO_DEPTH(ATTRIBUTES_DEPTH))
|
||||
cva5_fifo #(.DATA_TYPE(load_attributes_t), .FIFO_DEPTH(ATTRIBUTES_DEPTH))
|
||||
attributes_fifo (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
|
@ -367,9 +484,20 @@ module load_store_unit
|
|||
endgenerate
|
||||
|
||||
generate if (CONFIG.INCLUDE_DCACHE) begin : gen_ls_dcache
|
||||
logic uncacheable;
|
||||
logic load_ready;
|
||||
logic store_ready;
|
||||
logic uncacheable_load;
|
||||
logic uncacheable_store;
|
||||
logic dcache_load_request;
|
||||
logic dcache_store_request;
|
||||
|
||||
assign sub_unit_address_match[DCACHE_ID] = dcache_addr_utils.address_range_check(shared_inputs.addr);
|
||||
assign uncacheable = uncacheable_utils.address_range_check(shared_inputs.addr);
|
||||
|
||||
assign uncacheable_load = CONFIG.DCACHE.USE_NON_CACHEABLE & uncacheable_utils.address_range_check(shared_inputs.addr);
|
||||
assign uncacheable_store = CONFIG.DCACHE.USE_NON_CACHEABLE & uncacheable_utils.address_range_check(shared_inputs.addr);
|
||||
|
||||
assign dcache_load_request = sub_unit_load_issue & sub_unit_address_match[DCACHE_ID];
|
||||
assign dcache_store_request = sub_unit_store_issue & sub_unit_address_match[DCACHE_ID];
|
||||
|
||||
dcache # (.CONFIG(CONFIG))
|
||||
data_cache (
|
||||
|
@ -381,8 +509,16 @@ module load_store_unit
|
|||
.sc_complete (sc_complete),
|
||||
.sc_success (sc_success),
|
||||
.clear_reservation (clear_reservation),
|
||||
.amo (ls_inputs.amo),
|
||||
.uncacheable (uncacheable),
|
||||
.amo (),
|
||||
.uncacheable_load (uncacheable_load),
|
||||
.uncacheable_store (uncacheable_store),
|
||||
.is_load (sel_load),
|
||||
.load_ready (load_ready),
|
||||
.store_ready (store_ready),
|
||||
.load_request (dcache_load_request),
|
||||
.store_request (dcache_store_request),
|
||||
.ls_load (lsq.load_data_out),
|
||||
.ls_store (lsq.store_data_out),
|
||||
.ls (sub_unit[DCACHE_ID])
|
||||
);
|
||||
end
|
||||
|
@ -391,7 +527,6 @@ module load_store_unit
|
|||
////////////////////////////////////////////////////
|
||||
//Output Muxing
|
||||
logic sign_bit_data [4];
|
||||
logic [1:0] sign_bit_sel;
|
||||
logic sign_bit;
|
||||
|
||||
assign unit_muxed_load_data = unit_data_array[wb_attr.subunit_id];
|
||||
|
@ -402,8 +537,7 @@ module load_store_unit
|
|||
assign aligned_load_data[7:0] = unit_muxed_load_data[wb_attr.byte_addr*8 +: 8];
|
||||
|
||||
assign sign_bit_data = '{unit_muxed_load_data[7], unit_muxed_load_data[15], unit_muxed_load_data[23], unit_muxed_load_data[31]};
|
||||
assign sign_bit_sel = wb_attr.byte_addr | {1'b0, wb_attr.is_halfword};
|
||||
assign sign_bit = wb_attr.is_signed & sign_bit_data[sign_bit_sel];
|
||||
assign sign_bit = wb_attr.is_signed & sign_bit_data[wb_attr.sign_sel];
|
||||
|
||||
//Sign extending
|
||||
always_comb begin
|
||||
|
@ -414,12 +548,47 @@ module load_store_unit
|
|||
endcase
|
||||
end
|
||||
|
||||
//FP buffering first load result
|
||||
logic[FLEN-1:0] fp_result;
|
||||
generate if (CONFIG.INCLUDE_UNIT.FPU && FLEN > 32) begin : gen_fp_load_buffering
|
||||
logic[31:0] saved_msb;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
saved_msb <= '1;
|
||||
else begin
|
||||
if (load_complete & wb_attr.fp_op == DOUBLE_HOLD)
|
||||
saved_msb <= unit_muxed_load_data;
|
||||
else if (load_complete) //Boxing
|
||||
saved_msb <= '1;
|
||||
end
|
||||
end
|
||||
always_comb begin
|
||||
fp_result = '1;
|
||||
fp_result[FLEN-1-:32] = saved_msb;
|
||||
if (wb_attr.fp_op == SINGLE_DONE)
|
||||
fp_result[FLEN_F-1:0] = unit_muxed_load_data[31-:FLEN_F];
|
||||
else
|
||||
fp_result[FLEN-33:0] = unit_muxed_load_data[31-:FLEN-32];
|
||||
end
|
||||
end else if (CONFIG.INCLUDE_UNIT.FPU) begin : gen_fpu_no_buffering
|
||||
//No buffering ever required - all results are final
|
||||
assign fp_result = wb_attr.fp_op == SINGLE_DONE ? {{(FLEN-FLEN_F){1'b1}}, unit_muxed_load_data[31-:FLEN_F]} : unit_muxed_load_data[31-:FLEN];
|
||||
end
|
||||
else begin : gen_no_fpu
|
||||
assign fp_result = 'x;
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output bank
|
||||
assign wb.rd = final_load_data;
|
||||
assign wb.done = load_complete | load_exception_complete;
|
||||
assign wb.done = (load_complete & (~CONFIG.INCLUDE_UNIT.FPU | wb_attr.fp_op == INT_DONE)) | (load_exception_complete & ~exception_is_fp);
|
||||
//TODO: exceptions seemingly clobber load data if it appears on the same cycle
|
||||
assign wb.id = load_exception_complete ? exception.id : wb_attr.id;
|
||||
|
||||
assign fp_wb.rd = fp_result;
|
||||
assign fp_wb.done = (load_complete & (wb_attr.fp_op == SINGLE_DONE | wb_attr.fp_op == DOUBLE_DONE)) | (load_exception_complete & exception_is_fp);
|
||||
assign fp_wb.id = load_exception_complete ? exception.id : wb_attr.id;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -430,17 +599,5 @@ module load_store_unit
|
|||
assert property (@(posedge clk) disable iff (rst) load_complete |-> (load_attributes.valid && unit_data_valid[wb_attr.subunit_id]))
|
||||
else $error("Spurious load complete detected!");
|
||||
|
||||
// `ifdef ENABLE_SIMULATION_ASSERTIONS
|
||||
// invalid_ls_address_assertion:
|
||||
// assert property (@(posedge clk) disable iff (rst) (sub_unit_issue & ~ls_inputs.fence) |-> |sub_unit_address_match)
|
||||
// else $error("invalid L/S address");
|
||||
// `endif
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Trace Interface
|
||||
generate if (ENABLE_TRACE_INTERFACE) begin : gen_ls_trace
|
||||
assign tr_load_conflict_delay = tr_possible_load_conflict_delay & units_ready;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
332
core/execution_units/load_store_unit/store_queue.sv
Normal file
332
core/execution_units/load_store_unit/store_queue.sv
Normal file
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* Copyright © 2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module store_queue
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
store_queue_interface.queue sq,
|
||||
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] store_forward_wb_group,
|
||||
input logic [1:0] fp_store_forward_wb_group,
|
||||
|
||||
//Address hash (shared by loads and stores)
|
||||
input addr_hash_t addr_hash,
|
||||
//hash check on adding a load to the queue
|
||||
output logic [$clog2(CONFIG.SQ_DEPTH)-1:0] sq_index,
|
||||
output logic [$clog2(CONFIG.SQ_DEPTH)-1:0] sq_oldest,
|
||||
output logic potential_store_conflict,
|
||||
|
||||
//Writeback snooping
|
||||
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
|
||||
input fp_wb_packet_t fp_wb_packet [2],
|
||||
|
||||
//Retire
|
||||
input retire_packet_t store_retire
|
||||
);
|
||||
|
||||
localparam FINAL_TABLE_WIDTH = CONFIG.INCLUDE_UNIT.FPU && FLEN > 32 ? FLEN : 32;
|
||||
localparam LOG2_SQ_DEPTH = $clog2(CONFIG.SQ_DEPTH);
|
||||
localparam NUM_OF_FORWARDING_PORTS = CONFIG.NUM_WB_GROUPS - 1;
|
||||
typedef logic [LOG2_SQ_DEPTH-1:0] sq_index_t;
|
||||
|
||||
typedef struct packed {
|
||||
id_t id_needed;
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] wb_group;
|
||||
logic [1:0] fp_wb_group;
|
||||
logic fp;
|
||||
sq_index_t sq_index;
|
||||
} retire_table_t;
|
||||
retire_table_t retire_table_out;
|
||||
|
||||
wb_packet_t wb_snoop [CONFIG.NUM_WB_GROUPS];
|
||||
fp_wb_packet_t fp_wb_snoop [2];
|
||||
|
||||
//Register-based memory blocks
|
||||
logic [CONFIG.SQ_DEPTH-1:0] valid;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] valid_next;
|
||||
addr_hash_t [CONFIG.SQ_DEPTH-1:0] hashes;
|
||||
|
||||
//LUTRAM-based memory blocks
|
||||
sq_entry_t output_entry;
|
||||
sq_entry_t output_entry_r;
|
||||
logic [1:0] retire_alignment;
|
||||
|
||||
sq_index_t sq_index_next;
|
||||
sq_index_t sq_oldest_next;
|
||||
logic [LOG2_SQ_DEPTH:0] released_count;
|
||||
|
||||
logic [CONFIG.SQ_DEPTH-1:0] new_request_one_hot;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] issued_one_hot;
|
||||
|
||||
logic [31:0] data_pre_alignment;
|
||||
logic [31:0] marshalled_data;
|
||||
logic [FLEN-1:0] fp_marshalled_data;
|
||||
logic [FINAL_TABLE_WIDTH-1:0] sq_data_in;
|
||||
logic [FINAL_TABLE_WIDTH-1:0] sq_data_out;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
//Store Queue indicies
|
||||
assign sq_index_next = sq_index + LOG2_SQ_DEPTH'(sq.push);
|
||||
assign sq_oldest_next = sq_oldest + LOG2_SQ_DEPTH'(sq.pop);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst) begin
|
||||
sq_index <= 0;
|
||||
sq_oldest <= 0;
|
||||
end else begin
|
||||
sq_index <= sq_index_next;
|
||||
sq_oldest <= sq_oldest_next;
|
||||
end
|
||||
end
|
||||
|
||||
assign new_request_one_hot = CONFIG.SQ_DEPTH'(sq.push) << sq_index;
|
||||
assign issued_one_hot = CONFIG.SQ_DEPTH'(sq.pop) << sq_oldest;
|
||||
|
||||
assign valid_next = (valid | new_request_one_hot) & ~issued_one_hot;
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst) begin
|
||||
valid <= '0;
|
||||
sq.full <= 0;
|
||||
end else begin
|
||||
valid <= valid_next;
|
||||
sq.full <= &valid_next;
|
||||
end
|
||||
end
|
||||
assign sq.empty = ~|valid;
|
||||
|
||||
//SQ attributes and issue data
|
||||
lutram_1w_1r #(.DATA_TYPE(sq_entry_t), .DEPTH(CONFIG.SQ_DEPTH))
|
||||
store_attr (
|
||||
.clk(clk),
|
||||
.waddr(sq_index),
|
||||
.raddr(sq_oldest_next),
|
||||
.ram_write(sq.push),
|
||||
.new_ram_data('{
|
||||
addr : sq.data_in.addr,
|
||||
be : sq.data_in.be,
|
||||
cache_op : sq.data_in.cache_op,
|
||||
data : '0,
|
||||
fp : sq.data_in.fp,
|
||||
double : sq.data_in.double,
|
||||
fp_data : '0
|
||||
}),
|
||||
.ram_data_out(output_entry)
|
||||
);
|
||||
always_ff @ (posedge clk) begin
|
||||
output_entry_r <= output_entry;
|
||||
end
|
||||
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[1:0]), .DEPTH(MAX_IDS))
|
||||
store_alignment (
|
||||
.clk(clk),
|
||||
.waddr(sq.data_in.id),
|
||||
.raddr(store_retire.id),
|
||||
.ram_write(sq.push),
|
||||
.new_ram_data(sq.data_in.addr[1:0]),
|
||||
.ram_data_out(retire_alignment)
|
||||
);
|
||||
//Compare store addr-hashes against new load addr-hash
|
||||
always_comb begin
|
||||
potential_store_conflict = 0;
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++)
|
||||
potential_store_conflict |= {(valid[i] & ~issued_one_hot[i]), addr_hash} == {1'b1, hashes[i]};
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Register-based storage
|
||||
//Address hashes
|
||||
always_ff @ (posedge clk) begin
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
|
||||
if (new_request_one_hot[i])
|
||||
hashes[i] <= addr_hash;
|
||||
end
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Release Handling
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
released_count <= 0;
|
||||
else
|
||||
released_count <= released_count + (LOG2_SQ_DEPTH + 1)'(store_retire.valid) - (LOG2_SQ_DEPTH + 1)'(sq.pop);
|
||||
end
|
||||
|
||||
assign sq.no_released_stores_pending = ~|released_count;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Forwarding and Store Data
|
||||
//Forwarding is only needed from multi-cycle writeback ports
|
||||
//Currently this is the LS port [1] and the MUL/DIV/CSR port [2]
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
wb_snoop <= wb_packet;
|
||||
fp_wb_snoop <= fp_wb_packet;
|
||||
end
|
||||
|
||||
lutram_1w_1r #(.DATA_TYPE(retire_table_t), .DEPTH(MAX_IDS))
|
||||
store_retire_table_lutram (
|
||||
.clk(clk),
|
||||
.waddr(sq.data_in.id),
|
||||
.raddr(store_retire.id),
|
||||
.ram_write(sq.push),
|
||||
.new_ram_data('{
|
||||
id_needed : sq.data_in.id_needed,
|
||||
wb_group : store_forward_wb_group,
|
||||
fp_wb_group : fp_store_forward_wb_group,
|
||||
fp : sq.data_in.fp,
|
||||
sq_index : sq_index
|
||||
}),
|
||||
.ram_data_out(retire_table_out)
|
||||
);
|
||||
|
||||
logic [31:0] wb_data [NUM_OF_FORWARDING_PORTS+1];
|
||||
logic [FLEN-1:0] fp_wb_data [3];
|
||||
|
||||
//Data issued with the store can be stored by store-id
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS))
|
||||
non_forwarded_port (
|
||||
.clk(clk),
|
||||
.waddr(sq.data_in.id),
|
||||
.raddr(store_retire.id),
|
||||
.ram_write(sq.push),
|
||||
.new_ram_data(sq.data_in.data),
|
||||
.ram_data_out(wb_data[0])
|
||||
);
|
||||
|
||||
//Data from wb ports is stored by ID and then accessed by store-id to store-id-needed translation
|
||||
generate
|
||||
for (genvar i = 0; i < NUM_OF_FORWARDING_PORTS; i++) begin : lutrams
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS))
|
||||
writeback_port (
|
||||
.clk(clk),
|
||||
.waddr(wb_snoop[i+1].id),
|
||||
.raddr(retire_table_out.id_needed),
|
||||
.ram_write(wb_snoop[i+1].valid),
|
||||
.new_ram_data(wb_snoop[i+1].data),
|
||||
.ram_data_out(wb_data[i+1])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
if (CONFIG.INCLUDE_UNIT.FPU) begin : gen_fp_issue_data_storage
|
||||
//FP data issued with the store and data from the FP writeback ports is saved
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[FLEN-1:0]), .DEPTH(MAX_IDS))
|
||||
fp_non_forwarded_port (
|
||||
.clk(clk),
|
||||
.waddr(sq.data_in.id),
|
||||
.raddr(store_retire.id),
|
||||
.ram_write(sq.push),
|
||||
.new_ram_data(sq.data_in.fp_data),
|
||||
.ram_data_out(fp_wb_data[0])
|
||||
);
|
||||
end
|
||||
for (genvar i = 0; i < 2; i++) begin : gen_fp_wb_data_storage
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[FLEN-1:0]), .DEPTH(MAX_IDS))
|
||||
writeback_port (
|
||||
.clk(clk),
|
||||
.waddr(fp_wb_snoop[i].id),
|
||||
.raddr(retire_table_out.id_needed),
|
||||
.ram_write(fp_wb_snoop[i].valid),
|
||||
.new_ram_data(fp_wb_snoop[i].data),
|
||||
.ram_data_out(fp_wb_data[i+1])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Data Marshalling
|
||||
assign fp_marshalled_data = fp_wb_data[retire_table_out.fp_wb_group];
|
||||
assign data_pre_alignment = wb_data[retire_table_out.wb_group];
|
||||
always_comb begin
|
||||
//Input: ABCD
|
||||
//Assuming aligned requests,
|
||||
//Possible byte selections: (A/C/D, B/D, C/D, D)
|
||||
marshalled_data[7:0] = data_pre_alignment[7:0];
|
||||
marshalled_data[15:8] = (retire_alignment[1:0] == 2'b01) ? data_pre_alignment[7:0] : data_pre_alignment[15:8];
|
||||
marshalled_data[23:16] = (retire_alignment[1:0] == 2'b10) ? data_pre_alignment[7:0] : data_pre_alignment[23:16];
|
||||
case(retire_alignment[1:0])
|
||||
2'b10 : marshalled_data[31:24] = data_pre_alignment[15:8];
|
||||
2'b11 : marshalled_data[31:24] = data_pre_alignment[7:0];
|
||||
default : marshalled_data[31:24] = data_pre_alignment[31:24];
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
//Final storage table for the store queue (includes FP data)
|
||||
//SQ-index addressed
|
||||
generate
|
||||
if (CONFIG.INCLUDE_UNIT.FPU && FLEN > 32) begin : gen_upper_always_fp
|
||||
assign sq_data_in[FLEN-1:32] = fp_marshalled_data[FLEN-1:32];
|
||||
assign sq_data_in[31:0] = retire_table_out.fp ? fp_marshalled_data[31:0] : marshalled_data[31:0];
|
||||
end else if (CONFIG.INCLUDE_UNIT.FPU && FLEN == 32) begin : gen_no_upper
|
||||
assign sq_data_in = retire_table_out.fp ? fp_marshalled_data : marshalled_data;
|
||||
end else if (CONFIG.INCLUDE_UNIT.FPU && FLEN < 32) begin : gen_upper_always_int
|
||||
assign sq_data_in[31:FLEN] = marshalled_data[31:FLEN];
|
||||
assign sq_data_in[FLEN-1:0] = retire_table_out.fp ? fp_marshalled_data[FLEN-1:0] : marshalled_data[FLEN-1:0];
|
||||
end else begin : gen_no_fpu
|
||||
assign sq_data_in = marshalled_data;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[FINAL_TABLE_WIDTH-1:0]), .DEPTH(CONFIG.SQ_DEPTH))
|
||||
sq_data_lutram (
|
||||
.clk(clk),
|
||||
.waddr(retire_table_out.sq_index),
|
||||
.raddr(sq_oldest),
|
||||
.ram_write(store_retire.valid),
|
||||
.new_ram_data(sq_data_in),
|
||||
.ram_data_out(sq_data_out)
|
||||
);
|
||||
|
||||
assign sq.valid = |released_count;
|
||||
assign sq.data_out = '{
|
||||
addr : output_entry_r.addr,
|
||||
be : output_entry_r.be,
|
||||
cache_op : output_entry_r.cache_op,
|
||||
data : sq_data_out[31:0],
|
||||
fp : output_entry_r.fp,
|
||||
double : output_entry_r.double,
|
||||
fp_data : FLEN'(sq_data_out[(CONFIG.INCLUDE_UNIT.FPU ? FLEN : 32)-1:0])
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
sq_overflow_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) sq.push |-> (~sq.full | sq.pop)) else $error("sq overflow");
|
||||
fifo_underflow_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) sq.pop |-> sq.valid) else $error("sq underflow");
|
||||
|
||||
|
||||
endmodule
|
|
@ -25,22 +25,32 @@ module mul_unit
|
|||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input mul_inputs_t mul_inputs,
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit wb
|
||||
);
|
||||
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
logic signed [63:0] result;
|
||||
logic mulh [2];
|
||||
logic valid [2];
|
||||
id_t id [2];
|
||||
|
||||
logic rs1_is_signed, rs2_is_signed;
|
||||
logic rs1_is_signed, rs2_is_signed, is_mulhx;
|
||||
logic signed [32:0] rs1_ext, rs2_ext;
|
||||
logic signed [32:0] rs1_r, rs2_r;
|
||||
|
||||
|
@ -48,11 +58,29 @@ module mul_unit
|
|||
logic stage2_advance;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
assign rs1_is_signed = mul_inputs.op[1:0] inside {MULH_fn3[1:0], MULHSU_fn3[1:0]};//MUL doesn't matter
|
||||
assign rs2_is_signed = mul_inputs.op[1:0] inside {MUL_fn3[1:0], MULH_fn3[1:0]};//MUL doesn't matter
|
||||
|
||||
assign rs1_ext = signed'({mul_inputs.rs1[31] & rs1_is_signed, mul_inputs.rs1});
|
||||
assign rs2_ext = signed'({mul_inputs.rs2[31] & rs2_is_signed, mul_inputs.rs2});
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign unit_needed = decode_stage.instruction inside {MUL, MULH, MULHSU, MULHU};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = unit_needed;
|
||||
uses_rs[RS2] = unit_needed;
|
||||
uses_rd = unit_needed;
|
||||
end
|
||||
|
||||
assign instruction = decode_stage.instruction;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
rs1_is_signed <= instruction.fn3[1:0] inside {MULH_fn3[1:0], MULHSU_fn3[1:0]};
|
||||
rs2_is_signed <= instruction.fn3[1:0] inside {MULH_fn3[1:0]};
|
||||
is_mulhx <= instruction.fn3[1:0] inside {MULH_fn3[1:0], MULHSU_fn3[1:0], MULHU_fn3[1:0]};
|
||||
end
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
assign rs1_ext = signed'({rs1_is_signed & rf[RS1][31], rf[RS1]});
|
||||
assign rs2_ext = signed'({rs2_is_signed & rf[RS2][31], rf[RS2]});
|
||||
|
||||
//Pipeline advancement control signals
|
||||
assign issue.ready = stage1_advance;
|
||||
|
@ -73,7 +101,7 @@ module mul_unit
|
|||
//Attribute Pipeline
|
||||
always_ff @ (posedge clk) begin
|
||||
if (stage1_advance) begin
|
||||
mulh[0] <= (mul_inputs.op[1:0] != MUL_fn3[1:0]);
|
||||
mulh[0] <= is_mulhx;
|
||||
id[0] <= issue.id;
|
||||
end
|
||||
if (stage2_advance) begin
|
|
@ -64,10 +64,7 @@ module branch_predictor
|
|||
|
||||
localparam BRANCH_ADDR_W = $clog2(CONFIG.BP.ENTRIES);
|
||||
localparam BTAG_W = get_memory_width() - BRANCH_ADDR_W - 2;
|
||||
|
||||
function logic[BTAG_W-1:0] get_tag (input logic[31:0] pc);
|
||||
return pc[BRANCH_ADDR_W+2 +: BTAG_W];
|
||||
endfunction
|
||||
cache_functions_interface #(.TAG_W(BTAG_W), .LINE_W(BRANCH_ADDR_W), .SUB_LINE_W(0)) addr_utils();
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
|
@ -86,8 +83,6 @@ module branch_predictor
|
|||
logic branch_prediction_used;
|
||||
logic [CONFIG.BP.WAYS-1:0] branch_predictor_update_way;
|
||||
} branch_metadata_t;
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [$bits(branch_metadata_t)-1:0] branch_metadata_table [MAX_IDS];
|
||||
branch_metadata_t branch_metadata_if;
|
||||
branch_metadata_t branch_metadata_ex;
|
||||
|
||||
logic branch_predictor_direction_changed;
|
||||
|
@ -101,43 +96,52 @@ module branch_predictor
|
|||
logic [$clog2(CONFIG.BP.WAYS > 1 ? CONFIG.BP.WAYS : 2)-1:0] hit_way;
|
||||
logic tag_match;
|
||||
logic use_predicted_pc;
|
||||
|
||||
addr_utils_interface #(CONFIG.IBUS_ADDR.L, CONFIG.IBUS_ADDR.H) ibus_addr_utils ();
|
||||
|
||||
/////////////////////////////////////////
|
||||
|
||||
genvar i;
|
||||
generate if (CONFIG.INCLUDE_BRANCH_PREDICTOR)
|
||||
for (i=0; i<CONFIG.BP.WAYS; i++) begin : gen_branch_tag_banks
|
||||
branch_predictor_ram #(.C_DATA_WIDTH($bits(branch_table_entry_t)), .C_DEPTH(CONFIG.BP.ENTRIES))
|
||||
dual_port_bram #(.WIDTH($bits(branch_table_entry_t)), .LINES(CONFIG.BP.ENTRIES))
|
||||
tag_bank (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.write_addr (br_results.pc[2 +: BRANCH_ADDR_W]),
|
||||
.write_en (tag_update_way[i]),
|
||||
.write_data (ex_entry),
|
||||
.read_addr (bp.next_pc[2 +: BRANCH_ADDR_W]),
|
||||
.read_en (bp.new_mem_request),
|
||||
.read_data (if_entry[i]));
|
||||
.clk (clk),
|
||||
.en_a (tag_update_way[i]),
|
||||
.wen_a (tag_update_way[i]),
|
||||
.addr_a (addr_utils.getHashedLineAddr(br_results.pc, i)),
|
||||
.data_in_a (ex_entry),
|
||||
.data_out_a (),
|
||||
.en_b (bp.new_mem_request),
|
||||
.wen_b (0),
|
||||
.addr_b (addr_utils.getHashedLineAddr(bp.next_pc, i)),
|
||||
.data_in_b ('0),
|
||||
.data_out_b (if_entry[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate if (CONFIG.INCLUDE_BRANCH_PREDICTOR)
|
||||
for (i=0; i<CONFIG.BP.WAYS; i++) begin : gen_branch_table_banks
|
||||
branch_predictor_ram #(.C_DATA_WIDTH(32), .C_DEPTH(CONFIG.BP.ENTRIES))
|
||||
dual_port_bram #(.WIDTH(32), .LINES(CONFIG.BP.ENTRIES))
|
||||
addr_table (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.write_addr(br_results.pc[2 +: BRANCH_ADDR_W]),
|
||||
.write_en(target_update_way[i]),
|
||||
.write_data(br_results.target_pc),
|
||||
.read_addr(bp.next_pc[2 +: BRANCH_ADDR_W]),
|
||||
.read_en(bp.new_mem_request),
|
||||
.read_data(predicted_pc[i])
|
||||
.clk (clk),
|
||||
.en_a (target_update_way[i]),
|
||||
.wen_a (target_update_way[i]),
|
||||
.addr_a (addr_utils.getHashedLineAddr(br_results.pc, i)),
|
||||
.data_in_a (br_results.target_pc),
|
||||
.data_out_a (),
|
||||
.en_b (bp.new_mem_request),
|
||||
.wen_b (0),
|
||||
.addr_b (addr_utils.getHashedLineAddr(bp.next_pc, i)),
|
||||
.data_in_b ('0),
|
||||
.data_out_b (predicted_pc[i])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate if (CONFIG.INCLUDE_BRANCH_PREDICTOR)
|
||||
for (i=0; i<CONFIG.BP.WAYS; i++) begin : gen_branch_hit_detection
|
||||
assign tag_matches[i] = ({if_entry[i].valid, if_entry[i].tag} == {1'b1, get_tag(bp.if_pc)});
|
||||
assign tag_matches[i] = ({if_entry[i].valid, if_entry[i].tag} == {1'b1, addr_utils.getTag(bp.if_pc)});
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -172,20 +176,25 @@ module branch_predictor
|
|||
.en (1'b1),
|
||||
.one_hot (replacement_way)
|
||||
);
|
||||
assign branch_metadata_if.branch_predictor_metadata = if_entry[hit_way].metadata;
|
||||
assign branch_metadata_if.branch_prediction_used = use_predicted_pc;
|
||||
assign branch_metadata_if.branch_predictor_update_way = tag_match ? tag_matches : replacement_way;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (bp.pc_id_assigned)
|
||||
branch_metadata_table[bp.pc_id] <= branch_metadata_if;
|
||||
end
|
||||
assign branch_metadata_ex = branch_metadata_table[br_results.id];
|
||||
lutram_1w_1r #(.DATA_TYPE(branch_metadata_t), .DEPTH(MAX_IDS))
|
||||
branch_metadata_table (
|
||||
.clk(clk),
|
||||
.waddr(bp.pc_id),
|
||||
.raddr(br_results.id),
|
||||
.ram_write(bp.pc_id_assigned),
|
||||
.new_ram_data('{
|
||||
branch_predictor_metadata : if_entry[hit_way].metadata,
|
||||
branch_prediction_used : use_predicted_pc,
|
||||
branch_predictor_update_way : tag_match ? tag_matches : replacement_way
|
||||
}),
|
||||
.ram_data_out(branch_metadata_ex)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Execution stage update
|
||||
assign ex_entry.valid = 1;
|
||||
assign ex_entry.tag = get_tag(br_results.pc);
|
||||
assign ex_entry.tag = addr_utils.getTag(br_results.pc);
|
||||
assign ex_entry.is_branch = br_results.is_branch;
|
||||
assign ex_entry.is_return = br_results.is_return;
|
||||
assign ex_entry.is_call = br_results.is_call;
|
|
@ -36,7 +36,6 @@ module fetch
|
|||
|
||||
input logic branch_flush,
|
||||
input gc_outputs_t gc,
|
||||
input logic tlb_on,
|
||||
input logic exception,
|
||||
|
||||
//ID Support
|
||||
|
@ -60,10 +59,7 @@ module fetch
|
|||
wishbone_interface.master iwishbone,
|
||||
input logic icache_on,
|
||||
l1_arbiter_request_interface.master l1_request,
|
||||
l1_arbiter_return_interface.master l1_response,
|
||||
|
||||
//Trace Interface
|
||||
output logic tr_early_branch_correction
|
||||
l1_arbiter_return_interface.master l1_response
|
||||
);
|
||||
|
||||
localparam NUM_SUB_UNITS = int'(CONFIG.INCLUDE_ILOCAL_MEM) + int'(CONFIG.INCLUDE_ICACHE) + int'(CONFIG.INCLUDE_IBUS);
|
||||
|
@ -97,7 +93,6 @@ module fetch
|
|||
logic mmu_fault;
|
||||
logic [NUM_SUB_UNITS_W-1:0] subunit_id;
|
||||
} fetch_attributes_t;
|
||||
fetch_attributes_t fetch_attr_next;
|
||||
fetch_attributes_t fetch_attr;
|
||||
|
||||
logic [MAX_OUTSTANDING_REQUESTS_W:0] inflight_count;
|
||||
|
@ -113,15 +108,13 @@ module fetch
|
|||
logic [31:0] pc;
|
||||
|
||||
logic flush_or_rst;
|
||||
fifo_interface #(.DATA_WIDTH($bits(fetch_attributes_t))) fetch_attr_fifo();
|
||||
fifo_interface #(.DATA_TYPE(fetch_attributes_t)) fetch_attr_fifo();
|
||||
|
||||
logic update_pc;
|
||||
logic new_mem_request;
|
||||
logic exception_pending;
|
||||
logic internal_fetch_complete;
|
||||
|
||||
logic [31:0] translated_address;
|
||||
|
||||
genvar i;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
@ -164,50 +157,55 @@ module fetch
|
|||
assign bp.pc_id = pc_id;
|
||||
assign bp.pc_id_assigned = pc_id_assigned;
|
||||
|
||||
assign ras.pop = bp.use_prediction & bp.is_return & ~branch_flush & ~gc.pc_override & new_mem_request & (~early_branch_flush);
|
||||
assign ras.push = bp.use_prediction & bp.is_call & ~branch_flush & ~gc.pc_override & new_mem_request & (~early_branch_flush);
|
||||
////////////////////////////////////////////////////
|
||||
//RAS support
|
||||
logic ras_update_permitted;
|
||||
assign ras_update_permitted = bp.use_prediction & new_mem_request & ~(branch_flush | gc.pc_override | early_branch_flush);
|
||||
|
||||
assign ras.pop = bp.is_return & ras_update_permitted;
|
||||
assign ras.push = bp.is_call & ras_update_permitted;
|
||||
assign ras.branch_fetched = bp.is_branch & ras_update_permitted;
|
||||
assign ras.new_addr = pc_plus_4;
|
||||
assign ras.branch_fetched = bp.use_prediction & bp.is_branch & new_mem_request & (~early_branch_flush); //flush not needed as FIFO resets inside of RAS
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//TLB
|
||||
assign tlb.virtual_address = pc;
|
||||
assign tlb.execute = 1;
|
||||
assign tlb.rnw = 0;
|
||||
assign tlb.new_request = tlb.ready & (CONFIG.INCLUDE_S_MODE & tlb_on);
|
||||
assign translated_address = (CONFIG.INCLUDE_S_MODE & tlb_on) ? tlb.physical_address : pc;
|
||||
assign tlb.new_request = tlb.ready;
|
||||
|
||||
//////////////////////////////////////////////
|
||||
//Issue Control Signals
|
||||
assign flush_or_rst = (rst | gc.fetch_flush | early_branch_flush);
|
||||
|
||||
assign new_mem_request = (~tlb_on | tlb.done) & pc_id_available & ~fetch_attr_fifo.full & units_ready & (~gc.fetch_hold) & (~exception_pending);
|
||||
assign new_mem_request = tlb.done & pc_id_available & ~fetch_attr_fifo.full & units_ready & (~gc.fetch_hold) & (~exception_pending);
|
||||
assign pc_id_assigned = new_mem_request | tlb.is_fault;
|
||||
|
||||
//////////////////////////////////////////////
|
||||
//Subunit Tracking
|
||||
assign fetch_attr_fifo.push = pc_id_assigned;
|
||||
assign fetch_attr_fifo.potential_push = pc_id_assigned;
|
||||
assign fetch_attr_fifo.pop = internal_fetch_complete;
|
||||
logic [NUM_SUB_UNITS_W-1:0] subunit_id;
|
||||
one_hot_to_integer #(NUM_SUB_UNITS)
|
||||
hit_way_conv (
|
||||
.one_hot (sub_unit_address_match),
|
||||
.int_out (fetch_attr_next.subunit_id)
|
||||
.int_out (subunit_id)
|
||||
);
|
||||
assign fetch_attr_next.is_predicted_branch_or_jump = bp.use_prediction;
|
||||
assign fetch_attr_next.is_branch = bp.use_prediction & bp.is_branch;
|
||||
assign fetch_attr_next.address_valid = address_valid;
|
||||
assign fetch_attr_next.mmu_fault = tlb.is_fault;
|
||||
assign fetch_attr_fifo.data_in = '{
|
||||
is_predicted_branch_or_jump : bp.use_prediction,
|
||||
is_branch : (bp.use_prediction & bp.is_branch),
|
||||
address_valid : address_valid,
|
||||
mmu_fault : tlb.is_fault,
|
||||
subunit_id : subunit_id
|
||||
};
|
||||
assign fetch_attr_fifo.push = pc_id_assigned;
|
||||
assign fetch_attr_fifo.potential_push = pc_id_assigned;
|
||||
assign fetch_attr_fifo.pop = internal_fetch_complete;
|
||||
|
||||
assign fetch_attr_fifo.data_in = fetch_attr_next;
|
||||
|
||||
cva5_fifo #(.DATA_WIDTH($bits(fetch_attributes_t)), .FIFO_DEPTH(MAX_OUTSTANDING_REQUESTS))
|
||||
cva5_fifo #(.DATA_TYPE(fetch_attributes_t), .FIFO_DEPTH(MAX_OUTSTANDING_REQUESTS))
|
||||
attributes_fifo (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.fifo (fetch_attr_fifo)
|
||||
);
|
||||
|
||||
assign fetch_attr = fetch_attr_fifo.data_out;
|
||||
|
||||
assign inflight_count_next = inflight_count + MAX_OUTSTANDING_REQUESTS_W'(fetch_attr_fifo.push) - MAX_OUTSTANDING_REQUESTS_W'(fetch_attr_fifo.pop);
|
||||
|
@ -234,7 +232,7 @@ module fetch
|
|||
//In either case, data_valid must NOT be asserted.
|
||||
generate for (i=0; i < NUM_SUB_UNITS; i++) begin : gen_fetch_sources
|
||||
assign sub_unit[i].new_request = fetch_attr_fifo.push & sub_unit_address_match[i];
|
||||
assign sub_unit[i].addr = translated_address;
|
||||
assign sub_unit[i].addr = tlb.physical_address;
|
||||
assign sub_unit[i].re = 1;
|
||||
assign sub_unit[i].we = 0;
|
||||
assign sub_unit[i].be = '0;
|
||||
|
@ -247,7 +245,7 @@ module fetch
|
|||
endgenerate
|
||||
|
||||
generate if (CONFIG.INCLUDE_ILOCAL_MEM) begin : gen_fetch_local_mem
|
||||
assign sub_unit_address_match[LOCAL_MEM_ID] = ilocal_mem_addr_utils.address_range_check(translated_address);
|
||||
assign sub_unit_address_match[LOCAL_MEM_ID] = ilocal_mem_addr_utils.address_range_check(tlb.physical_address);
|
||||
local_mem_sub_unit i_local_mem (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
|
@ -258,7 +256,7 @@ module fetch
|
|||
endgenerate
|
||||
|
||||
generate if (CONFIG.INCLUDE_IBUS) begin : gen_fetch_ibus
|
||||
assign sub_unit_address_match[BUS_ID] = ibus_addr_utils.address_range_check(translated_address);
|
||||
assign sub_unit_address_match[BUS_ID] = ibus_addr_utils.address_range_check(tlb.physical_address);
|
||||
wishbone_master iwishbone_bus (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
|
@ -269,7 +267,7 @@ module fetch
|
|||
endgenerate
|
||||
|
||||
generate if (CONFIG.INCLUDE_ICACHE) begin : gen_fetch_icache
|
||||
assign sub_unit_address_match[ICACHE_ID] = icache_addr_utils.address_range_check(translated_address);
|
||||
assign sub_unit_address_match[ICACHE_ID] = icache_addr_utils.address_range_check(tlb.physical_address);
|
||||
icache #(.CONFIG(CONFIG))
|
||||
i_cache (
|
||||
.clk (clk),
|
||||
|
@ -308,8 +306,6 @@ module fetch
|
|||
assign is_branch_or_jump = fetch_instruction[6:2] inside {JAL_T, JALR_T, BRANCH_T};
|
||||
assign early_branch_flush = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_predicted_branch_or_jump & (~is_branch_or_jump);
|
||||
assign early_branch_flush_ras_adjust = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_branch & (~is_branch_or_jump);
|
||||
if (ENABLE_TRACE_INTERFACE)
|
||||
assign tr_early_branch_correction = early_branch_flush;
|
||||
end endgenerate
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
|
@ -42,6 +42,9 @@ module icache
|
|||
);
|
||||
|
||||
localparam derived_cache_config_t SCONFIG = get_derived_cache_params(CONFIG, CONFIG.ICACHE, CONFIG.ICACHE_ADDR);
|
||||
localparam bit [SCONFIG.SUB_LINE_ADDR_W-1:0] END_OF_LINE_COUNT = SCONFIG.SUB_LINE_ADDR_W'(CONFIG.ICACHE.LINE_W-1);
|
||||
|
||||
cache_functions_interface #(.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils();
|
||||
|
||||
logic tag_hit;
|
||||
logic [CONFIG.ICACHE.WAYS-1:0] tag_hit_way;
|
||||
|
@ -51,20 +54,24 @@ module icache
|
|||
logic [CONFIG.ICACHE.WAYS-1:0] tag_update_way;
|
||||
|
||||
logic [SCONFIG.SUB_LINE_ADDR_W-1:0] word_count;
|
||||
logic [SCONFIG.SUB_LINE_ADDR_W-1:0] target_word;
|
||||
logic is_target_word;
|
||||
|
||||
logic line_complete;
|
||||
|
||||
logic [31:0] data_out [CONFIG.ICACHE.WAYS-1:0];
|
||||
logic [31:0] miss_data;
|
||||
|
||||
logic miss_in_progress;
|
||||
logic linefill_in_progress;
|
||||
logic request_in_progress;
|
||||
|
||||
logic miss_data_valid;
|
||||
logic second_cycle;
|
||||
logic [31:0] second_cycle_addr;
|
||||
|
||||
logic idle;
|
||||
logic memory_complete;
|
||||
fifo_interface #(.DATA_TYPE(logic[31:0])) input_fifo();
|
||||
|
||||
logic new_request;
|
||||
logic [31:0] new_request_addr;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
|
@ -72,18 +79,44 @@ module icache
|
|||
//On the second cycle of a request hit/miss determination is performed
|
||||
//On a miss, the memory request starts on the third cycle
|
||||
|
||||
assign new_request = (fetch_sub.new_request | input_fifo.valid) & ((~request_in_progress | tag_hit) & ~linefill_in_progress);
|
||||
|
||||
assign input_fifo.push = fetch_sub.new_request & (~new_request | input_fifo.valid);
|
||||
assign input_fifo.potential_push = input_fifo.push;
|
||||
assign input_fifo.pop = new_request & input_fifo.valid;
|
||||
assign input_fifo.data_in = fetch_sub.addr;
|
||||
|
||||
assign new_request_addr = input_fifo.valid ? input_fifo.data_out : fetch_sub.addr;
|
||||
|
||||
cva5_fifo #(.DATA_TYPE(logic[31:0]), .FIFO_DEPTH(2))
|
||||
cache_input_fifo (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.fifo (input_fifo)
|
||||
);
|
||||
////////////////////////////////////////////////////
|
||||
//Ready determination
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
request_in_progress <= 0;
|
||||
else
|
||||
request_in_progress <= (request_in_progress & ~fetch_sub.data_valid) | new_request;
|
||||
end
|
||||
|
||||
assign fetch_sub.ready = ~input_fifo.full;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//General Control Logic
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
second_cycle <= 0;
|
||||
else
|
||||
second_cycle <= fetch_sub.new_request;
|
||||
second_cycle <= new_request;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (fetch_sub.new_request)
|
||||
second_cycle_addr <= fetch_sub.addr;
|
||||
if (new_request)
|
||||
second_cycle_addr <= new_request_addr;
|
||||
end
|
||||
|
||||
//As request can be aborted on any cycle, only update tags if memory request is in progress
|
||||
|
@ -96,13 +129,13 @@ module icache
|
|||
|
||||
//Replacement policy is psuedo random
|
||||
cycler #(CONFIG.ICACHE.WAYS) replacement_policy (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.en (1'b1),
|
||||
.one_hot (replacement_way)
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.en (1'b1),
|
||||
.one_hot (replacement_way)
|
||||
);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (second_cycle)
|
||||
if (second_cycle & ~linefill_in_progress)
|
||||
tag_update_way <= replacement_way;
|
||||
end
|
||||
|
||||
|
@ -132,102 +165,84 @@ module icache
|
|||
//Miss state tracking
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
miss_in_progress <= 0;
|
||||
linefill_in_progress <= 0;
|
||||
else
|
||||
miss_in_progress <= l1_request.ack | (miss_in_progress & ~line_complete);
|
||||
linefill_in_progress <= (linefill_in_progress & ~line_complete) | l1_request.ack;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Tag banks
|
||||
itag_banks #(.CONFIG(CONFIG), .SCONFIG(SCONFIG))
|
||||
icache_tag_banks (
|
||||
.clk(clk),
|
||||
.rst(rst), //clears the read_hit_allowed flag
|
||||
.stage1_addr(fetch_sub.addr),
|
||||
.stage2_addr(second_cycle_addr),
|
||||
.update_way(tag_update_way),
|
||||
.update(tag_update),
|
||||
.stage1_adv(fetch_sub.new_request & icache_on),
|
||||
.tag_hit(tag_hit),
|
||||
.tag_hit_way(tag_hit_way)
|
||||
.clk(clk),
|
||||
.rst(rst), //clears the read_hit_allowed flag
|
||||
.stage1_line_addr(addr_utils.getTagLineAddr(new_request_addr)),
|
||||
.stage2_line_addr(addr_utils.getTagLineAddr(second_cycle_addr)),
|
||||
.stage2_tag(addr_utils.getTag(second_cycle_addr)),
|
||||
.update_way(tag_update_way),
|
||||
.update(tag_update),
|
||||
.stage1_adv(new_request & icache_on),
|
||||
.tag_hit(tag_hit),
|
||||
.tag_hit_way(tag_hit_way)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Data Banks
|
||||
genvar i;
|
||||
generate for (i=0; i < CONFIG.ICACHE.WAYS; i++) begin : idata_bank_gen
|
||||
byte_en_BRAM #(CONFIG.ICACHE.LINES*CONFIG.ICACHE.LINE_W) idata_bank (
|
||||
dual_port_bram #(.WIDTH(32), .LINES(CONFIG.ICACHE.LINES*CONFIG.ICACHE.LINE_W)) idata_bank (
|
||||
.clk(clk),
|
||||
.addr_a(fetch_sub.addr[2 +: SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W]),
|
||||
.addr_b({second_cycle_addr[(2+SCONFIG.SUB_LINE_ADDR_W) +: SCONFIG.LINE_ADDR_W], word_count}),
|
||||
.en_a(fetch_sub.new_request),
|
||||
.en_b(tag_update_way[i] & l1_response.data_valid),
|
||||
.be_a('0),
|
||||
.be_b('1),
|
||||
.en_a(new_request),
|
||||
.wen_a(0),
|
||||
.addr_a(addr_utils.getDataLineAddr(new_request_addr)),
|
||||
.data_in_a('0),
|
||||
.data_in_b(l1_response.data),
|
||||
.data_out_a(data_out[i]),
|
||||
.en_b(1),
|
||||
.wen_b(tag_update_way[i] & l1_response.data_valid),
|
||||
.addr_b(addr_utils.getDataLineAddr({second_cycle_addr[31:SCONFIG.SUB_LINE_ADDR_W+2], word_count, 2'b0})),
|
||||
.data_in_b(l1_response.data),
|
||||
.data_out_b()
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Miss data path
|
||||
assign target_word = second_cycle_addr[2 +: SCONFIG.SUB_LINE_ADDR_W];
|
||||
assign is_target_word = (target_word == word_count);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
word_count <= 0;
|
||||
else if (l1_response.data_valid)
|
||||
word_count <= word_count + 1;
|
||||
end
|
||||
|
||||
assign is_target_word = (second_cycle_addr[2 +: SCONFIG.SUB_LINE_ADDR_W] == word_count);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (l1_response.data_valid & is_target_word)
|
||||
miss_data <= l1_response.data;
|
||||
else
|
||||
miss_data <= 0;
|
||||
word_count <= word_count + SCONFIG.SUB_LINE_ADDR_W'(l1_response.data_valid);
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
miss_data_valid <= 0;
|
||||
else
|
||||
miss_data_valid <= miss_in_progress & l1_response.data_valid & is_target_word;
|
||||
end
|
||||
|
||||
assign line_complete = (l1_response.data_valid && (word_count == SCONFIG.SUB_LINE_ADDR_W'(CONFIG.ICACHE.LINE_W-1)));
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
memory_complete <= 0;
|
||||
else
|
||||
memory_complete <= line_complete;
|
||||
end
|
||||
assign miss_data_valid = request_in_progress & l1_response.data_valid & is_target_word;
|
||||
assign line_complete = l1_response.data_valid & (word_count == END_OF_LINE_COUNT);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output muxing
|
||||
localparam OMUX_W = CONFIG.ICACHE.WAYS+1;
|
||||
logic [OMUX_W-1:0] priority_vector;
|
||||
logic [$clog2(OMUX_W)-1:0] output_sel;
|
||||
logic [31:0] output_array [OMUX_W];
|
||||
always_comb begin
|
||||
fetch_sub.data_out = miss_data;//zero if not a miss
|
||||
priority_vector[0] = miss_data_valid;
|
||||
output_array[0] = l1_response.data;
|
||||
for (int i = 0; i < CONFIG.ICACHE.WAYS; i++) begin
|
||||
fetch_sub.data_out = fetch_sub.data_out | (data_out[i] & {32{tag_hit_way[i]}});
|
||||
priority_vector[i+1] = tag_hit_way[i];
|
||||
output_array[i+1] = data_out[i];
|
||||
end
|
||||
end
|
||||
|
||||
priority_encoder #(.WIDTH(OMUX_W))
|
||||
arb_encoder
|
||||
(
|
||||
.priority_vector (priority_vector),
|
||||
.encoded_result (output_sel)
|
||||
);
|
||||
assign fetch_sub.data_out = output_array[output_sel];
|
||||
assign fetch_sub.data_valid = miss_data_valid | tag_hit;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Ready determination
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
idle <= 1;
|
||||
else if (fetch_sub.new_request)
|
||||
idle <= 0;
|
||||
else if (memory_complete | tag_hit) //read miss OR write through complete
|
||||
idle <= 1;
|
||||
end
|
||||
|
||||
assign fetch_sub.ready = tag_hit | memory_complete | idle;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -239,7 +254,7 @@ module icache
|
|||
else $error("Spurious icache ack received from arbiter!");
|
||||
|
||||
icache_l1_arb_data_valid_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) l1_response.data_valid |-> miss_in_progress)
|
||||
assert property (@(posedge clk) disable iff (rst) l1_response.data_valid |-> linefill_in_progress)
|
||||
else $error("Spurious icache data received from arbiter!");
|
||||
|
||||
endmodule
|
|
@ -34,8 +34,9 @@ module itag_banks
|
|||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input logic[31:0] stage1_addr,
|
||||
input logic[31:0] stage2_addr,
|
||||
input logic[SCONFIG.LINE_ADDR_W-1:0] stage1_line_addr,
|
||||
input logic[SCONFIG.LINE_ADDR_W-1:0] stage2_line_addr,
|
||||
input logic[SCONFIG.TAG_W-1:0] stage2_tag,
|
||||
|
||||
input logic[CONFIG.ICACHE.WAYS-1:0] update_way,
|
||||
input logic update,
|
||||
|
@ -46,22 +47,11 @@ module itag_banks
|
|||
output logic[CONFIG.ICACHE.WAYS-1:0] tag_hit_way
|
||||
);
|
||||
|
||||
//Valid + tag
|
||||
typedef logic [SCONFIG.TAG_W : 0] itag_entry_t;
|
||||
|
||||
function logic[SCONFIG.TAG_W-1:0] getTag(logic[31:0] addr);
|
||||
return addr[2+SCONFIG.SUB_LINE_ADDR_W+SCONFIG.LINE_ADDR_W +: SCONFIG.TAG_W];
|
||||
endfunction
|
||||
|
||||
function logic[SCONFIG.LINE_ADDR_W-1:0] getLineAddr(logic[31:0] addr);
|
||||
return addr[SCONFIG.LINE_ADDR_W + SCONFIG.SUB_LINE_ADDR_W + 1 : SCONFIG.SUB_LINE_ADDR_W + 2];
|
||||
endfunction
|
||||
|
||||
logic hit_allowed;
|
||||
itag_entry_t tag_line[CONFIG.ICACHE.WAYS-1:0];
|
||||
|
||||
itag_entry_t stage2_tag;
|
||||
assign stage2_tag = {1'b1, getTag(stage2_addr)};
|
||||
|
||||
logic hit_allowed;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
|
@ -73,23 +63,23 @@ module itag_banks
|
|||
genvar i;
|
||||
generate
|
||||
for (i=0; i < CONFIG.ICACHE.WAYS; i++) begin : tag_bank_gen
|
||||
|
||||
tag_bank #(SCONFIG.TAG_W+1, CONFIG.ICACHE.LINES) itag_bank (.*,
|
||||
.en_a(stage1_adv), .wen_a('0),
|
||||
.addr_a(getLineAddr(stage1_addr)),
|
||||
.data_in_a('0), .data_out_a(tag_line[i]),
|
||||
|
||||
.en_b(update), .wen_b(update_way[i]),
|
||||
.addr_b(getLineAddr(stage2_addr)),
|
||||
.data_in_b(stage2_tag), .data_out_b()
|
||||
dual_port_bram #(.WIDTH(SCONFIG.TAG_W+1), .LINES(CONFIG.ICACHE.LINES)) itag_bank (.*,
|
||||
.clk(clk),
|
||||
.en_a(stage1_adv),
|
||||
.wen_a('0),
|
||||
.addr_a(stage1_line_addr),
|
||||
.data_in_a('0),
|
||||
.data_out_a(tag_line[i]),
|
||||
.en_b(update),
|
||||
.wen_b(update_way[i]),
|
||||
.addr_b(stage2_line_addr),
|
||||
.data_in_b({1'b1, stage2_tag}),
|
||||
.data_out_b()
|
||||
);
|
||||
|
||||
assign tag_hit_way[i] = ({hit_allowed,stage2_tag} == {1'b1,tag_line[i]});
|
||||
|
||||
assign tag_hit_way[i] = ({hit_allowed, 1'b1, stage2_tag} == {1'b1, tag_line[i]});
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign tag_hit = |tag_hit_way;
|
||||
|
||||
|
||||
endmodule
|
|
@ -38,33 +38,36 @@ module ras
|
|||
ras_interface.self ras
|
||||
);
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic[31:0] lut_ram [CONFIG.BP.RAS_ENTRIES];
|
||||
|
||||
localparam RAS_DEPTH_W = $clog2(CONFIG.BP.RAS_ENTRIES);
|
||||
logic [RAS_DEPTH_W-1:0] read_index;
|
||||
logic [RAS_DEPTH_W-1:0] new_index;
|
||||
fifo_interface #(.DATA_WIDTH(RAS_DEPTH_W)) ri_fifo();
|
||||
///////////////////////////////////////////////////////
|
||||
//For simulation purposes
|
||||
initial lut_ram = '{default: 0};
|
||||
fifo_interface #(.DATA_TYPE(logic[RAS_DEPTH_W-1:0])) ri_fifo();
|
||||
///////////////////////////////////////////////////////
|
||||
assign ras.addr = lut_ram[read_index];
|
||||
|
||||
//On a speculative branch, save the current stack pointer
|
||||
//Restored if branch is misspredicted (gc_fetch_flush)
|
||||
cva5_fifo #(.DATA_WIDTH(RAS_DEPTH_W), .FIFO_DEPTH(MAX_IDS))
|
||||
read_index_fifo (.clk, .rst(rst | gc.fetch_flush | early_branch_flush_ras_adjust), .fifo(ri_fifo));
|
||||
cva5_fifo #(.DATA_TYPE(logic[RAS_DEPTH_W-1:0]), .FIFO_DEPTH(MAX_IDS))
|
||||
read_index_fifo (
|
||||
.clk,
|
||||
.rst(rst | gc.fetch_flush | early_branch_flush_ras_adjust),
|
||||
.fifo(ri_fifo)
|
||||
);
|
||||
|
||||
assign ri_fifo.data_in = read_index;
|
||||
assign ri_fifo.push = ras.branch_fetched;
|
||||
assign ri_fifo.potential_push = ras.branch_fetched;
|
||||
assign ri_fifo.pop = ras.branch_retired & ri_fifo.valid; //Prevent popping from fifo if reset due to early_branch_flush_ras_adjust
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (ras.push)
|
||||
lut_ram[new_index] <= ras.new_addr;
|
||||
end
|
||||
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(CONFIG.BP.RAS_ENTRIES))
|
||||
ras_stack (
|
||||
.clk(clk),
|
||||
.waddr(new_index),
|
||||
.raddr(read_index),
|
||||
.ram_write(ras.push),
|
||||
.new_ram_data(ras.new_addr),
|
||||
.ram_data_out(ras.addr)
|
||||
);
|
||||
|
||||
//Rolls over when full, most recent calls will be correct, but calls greater than depth
|
||||
//will be lost.
|
||||
logic [RAS_DEPTH_W-1:0] new_index_base;
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2019 Eric Matthews, Lesley Shannon
|
||||
* Copyright © 2023 Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -17,31 +17,29 @@
|
|||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module shift_counter
|
||||
module fp_writeback
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
|
||||
#(parameter DEPTH = 16)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input logic start,
|
||||
output logic done
|
||||
//Unit writeback
|
||||
unit_writeback_interface.wb unit_wb[2],
|
||||
//WB output
|
||||
output fp_wb_packet_t wb_packet[2]
|
||||
);
|
||||
|
||||
logic [DEPTH-1:0] counter;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Because there are two writeback ports for the FP register file, no arbitration is needed
|
||||
assign wb_packet[0].id = unit_wb[0].id;
|
||||
assign wb_packet[0].valid = unit_wb[0].done;
|
||||
assign wb_packet[0].data = unit_wb[0].rd;
|
||||
assign unit_wb[0].ack = unit_wb[0].done;
|
||||
|
||||
//TLB_CLEAR state shift reg
|
||||
always_ff @ (posedge clk) begin
|
||||
counter[0] <= start;
|
||||
counter[DEPTH-1:1] <= counter[DEPTH-2:0];
|
||||
end
|
||||
assign done = counter[DEPTH-1];
|
||||
assign wb_packet[1].id = unit_wb[1].id;
|
||||
assign wb_packet[1].valid = unit_wb[1].done;
|
||||
assign wb_packet[1].data = unit_wb[1].rd;
|
||||
assign unit_wb[1].ack = unit_wb[1].done;
|
||||
|
||||
endmodule
|
|
@ -51,25 +51,34 @@ module instruction_metadata_and_id_management
|
|||
output decode_packet_t decode,
|
||||
input logic decode_advance,
|
||||
input logic decode_uses_rd,
|
||||
input logic fp_decode_uses_rd,
|
||||
input rs_addr_t decode_rd_addr,
|
||||
input exception_sources_t decode_exception_unit,
|
||||
input logic decode_is_store,
|
||||
//renamer
|
||||
input phys_addr_t decode_phys_rd_addr,
|
||||
input phys_addr_t fp_decode_phys_rd_addr,
|
||||
|
||||
//Issue stage
|
||||
input issue_packet_t issue,
|
||||
input logic instruction_issued,
|
||||
input logic instruction_issued_with_rd,
|
||||
input logic fp_instruction_issued_with_rd,
|
||||
|
||||
//WB
|
||||
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
|
||||
output commit_packet_t commit_packet [CONFIG.NUM_WB_GROUPS],
|
||||
input fp_wb_packet_t fp_wb_packet [2],
|
||||
output phys_addr_t wb_phys_addr [CONFIG.NUM_WB_GROUPS],
|
||||
output phys_addr_t fp_wb_phys_addr [2],
|
||||
|
||||
//Retirer
|
||||
output retire_packet_t retire,
|
||||
output retire_packet_t wb_retire,
|
||||
output retire_packet_t fp_wb_retire,
|
||||
output retire_packet_t store_retire,
|
||||
output id_t retire_ids [RETIRE_PORTS],
|
||||
output id_t retire_ids_next [RETIRE_PORTS],
|
||||
output logic retire_port_valid [RETIRE_PORTS],
|
||||
output logic [LOG2_RETIRE_PORTS : 0] retire_count,
|
||||
|
||||
//CSR
|
||||
output logic [LOG2_MAX_IDS:0] post_issue_count,
|
||||
|
@ -78,16 +87,19 @@ module instruction_metadata_and_id_management
|
|||
output logic [$clog2(NUM_EXCEPTION_SOURCES)-1:0] current_exception_unit
|
||||
);
|
||||
//////////////////////////////////////////
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [31:0] pc_table [MAX_IDS];
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [31:0] instruction_table [MAX_IDS];
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [0:0] valid_fetch_addr_table [MAX_IDS];
|
||||
localparam NUM_WB_GROUPS = CONFIG.NUM_WB_GROUPS + 32'(CONFIG.INCLUDE_UNIT.FPU) + 32'(CONFIG.INCLUDE_UNIT.FPU);
|
||||
logic [31:0] decode_pc;
|
||||
logic [31:0] decode_instruction;
|
||||
fetch_metadata_t decode_fetch_metadata;
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) phys_addr_t phys_addr_table [MAX_IDS];
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [0:0] uses_rd_table [MAX_IDS];
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [$bits(fetch_metadata_t)-1:0] fetch_metadata_table [MAX_IDS];
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [$bits(exception_sources_t)-1:0] exception_unit_table [MAX_IDS];
|
||||
typedef enum logic[1:0] {
|
||||
NONE = 2'b00,
|
||||
RD = 2'b01,
|
||||
STORE = 2'b10,
|
||||
FP_RD = 2'b11
|
||||
} instruction_type_t;
|
||||
instruction_type_t decode_type;
|
||||
instruction_type_t retire_type [RETIRE_PORTS];
|
||||
|
||||
id_t decode_id;
|
||||
id_t oldest_pre_issue_id;
|
||||
|
@ -99,60 +111,128 @@ module instruction_metadata_and_id_management
|
|||
logic [LOG2_MAX_IDS:0] post_issue_count_next;
|
||||
logic [LOG2_MAX_IDS:0] inflight_count;
|
||||
|
||||
retire_packet_t retire_next;
|
||||
logic retire_port_valid_next [RETIRE_PORTS];
|
||||
retire_packet_t wb_retire_next;
|
||||
retire_packet_t fp_wb_retire_next;
|
||||
retire_packet_t store_retire_next;
|
||||
|
||||
genvar i;
|
||||
logic retire_port_valid_next [RETIRE_PORTS];
|
||||
logic [LOG2_RETIRE_PORTS : 0] retire_count_next;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Instruction Metadata
|
||||
//PC table
|
||||
//Number of read ports = 1 or 2 (decode stage + exception logic (if enabled))
|
||||
always_ff @ (posedge clk) begin
|
||||
if (pc_id_assigned)
|
||||
pc_table[pc_id] <= if_pc;
|
||||
end
|
||||
//PC table(s)
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS))
|
||||
pc_table (
|
||||
.clk(clk),
|
||||
.waddr(pc_id),
|
||||
.raddr(decode_id),
|
||||
.ram_write(pc_id_assigned),
|
||||
.new_ram_data(if_pc),
|
||||
.ram_data_out(decode_pc)
|
||||
);
|
||||
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_pc_id_exception_support
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS))
|
||||
pc_table_exception (
|
||||
.clk(clk),
|
||||
.waddr(pc_id),
|
||||
.raddr(retire_ids_next[0]),
|
||||
.ram_write(pc_id_assigned),
|
||||
.new_ram_data(if_pc),
|
||||
.ram_data_out(oldest_pc)
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Instruction table
|
||||
//Number of read ports = 1 (decode stage)
|
||||
always_ff @ (posedge clk) begin
|
||||
if (fetch_complete)
|
||||
instruction_table[fetch_id] <= fetch_instruction;
|
||||
end
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS))
|
||||
instruction_table (
|
||||
.clk(clk),
|
||||
.waddr(fetch_id),
|
||||
.raddr(decode_id),
|
||||
.ram_write(fetch_complete),
|
||||
.new_ram_data(fetch_instruction),
|
||||
.ram_data_out(decode_instruction)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Valid fetched address table
|
||||
//Number of read ports = 1 (decode stage)
|
||||
always_ff @ (posedge clk) begin
|
||||
if (fetch_complete)
|
||||
fetch_metadata_table[fetch_id] <= fetch_metadata;
|
||||
end
|
||||
lutram_1w_1r #(.DATA_TYPE(fetch_metadata_t), .DEPTH(MAX_IDS))
|
||||
fetch_metadata_table (
|
||||
.clk(clk),
|
||||
.waddr(fetch_id),
|
||||
.raddr(decode_id),
|
||||
.ram_write(fetch_complete),
|
||||
.new_ram_data(fetch_metadata),
|
||||
.ram_data_out(decode_fetch_metadata)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Phys rd table
|
||||
//Number of read ports = (NUM_WB_GROUPS - 1) (ALU WB group uses issue_phys_rd_addr)
|
||||
always_ff @ (posedge clk) begin
|
||||
if (decode_advance)
|
||||
phys_addr_table[decode_id] <= decode_phys_rd_addr;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Uses rd table
|
||||
//Retire Instruction Type Table
|
||||
//Number of read ports = RETIRE_PORTS
|
||||
always_ff @ (posedge clk) begin
|
||||
if (decode_advance)
|
||||
uses_rd_table[decode_id] <= decode_uses_rd & |decode_rd_addr;
|
||||
always_comb begin
|
||||
if (decode_uses_rd & |decode_rd_addr)
|
||||
decode_type = RD;
|
||||
else if (decode_is_store)
|
||||
decode_type = STORE;
|
||||
else if (fp_decode_uses_rd)
|
||||
decode_type = FP_RD;
|
||||
else
|
||||
decode_type = NONE;
|
||||
end
|
||||
lutram_1w_mr #(.DATA_TYPE(logic[1:0]), .DEPTH(MAX_IDS), .NUM_READ_PORTS(RETIRE_PORTS))
|
||||
retire_instruction_type_table (
|
||||
.clk(clk),
|
||||
.waddr(decode_id),
|
||||
.raddr(retire_ids_next),
|
||||
.ram_write(decode_advance),
|
||||
.new_ram_data(decode_type),
|
||||
.ram_data_out(retire_type)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//id_to_phys_rd_table
|
||||
//Number of read ports = WB_GROUPS
|
||||
id_t wb_ids [NUM_WB_GROUPS];
|
||||
phys_addr_t wb_phys_addrs [NUM_WB_GROUPS];
|
||||
always_comb begin
|
||||
wb_ids[NUM_WB_GROUPS-2] = fp_wb_packet[0].id;
|
||||
wb_ids[NUM_WB_GROUPS-1] = fp_wb_packet[1].id;
|
||||
fp_wb_phys_addr[0] = wb_phys_addrs[NUM_WB_GROUPS-2];
|
||||
fp_wb_phys_addr[1] = wb_phys_addrs[NUM_WB_GROUPS-1];
|
||||
|
||||
for (int i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin
|
||||
//This will overwrite the FP packets if the configuration does not include it
|
||||
wb_ids[i] = wb_packet[i].id;
|
||||
wb_phys_addr[i] = wb_phys_addrs[i];
|
||||
end
|
||||
end
|
||||
|
||||
lutram_1w_mr #(.DATA_TYPE(phys_addr_t), .DEPTH(MAX_IDS), .NUM_READ_PORTS(NUM_WB_GROUPS))
|
||||
id_to_phys_rd_table (
|
||||
.clk(clk),
|
||||
.waddr(decode_id),
|
||||
.raddr(wb_ids),
|
||||
.ram_write(decode_advance),
|
||||
.new_ram_data(fp_decode_uses_rd ? fp_decode_phys_rd_addr : decode_phys_rd_addr),
|
||||
.ram_data_out(wb_phys_addrs)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Exception unit table
|
||||
always_ff @ (posedge clk) begin
|
||||
if (decode_advance)
|
||||
exception_unit_table[decode_id] <= decode_exception_unit;
|
||||
end
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_id_exception_support
|
||||
lutram_1w_1r #(.DATA_TYPE(logic[$bits(exception_sources_t)-1:0]), .DEPTH(MAX_IDS))
|
||||
exception_unit_table (
|
||||
.clk(clk),
|
||||
.waddr(decode_id),
|
||||
.raddr(retire_ids_next[0]),
|
||||
.ram_write(decode_advance),
|
||||
.new_ram_data(decode_exception_unit),
|
||||
.ram_data_out(current_exception_unit)
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//ID Management
|
||||
|
@ -182,19 +262,18 @@ module instruction_metadata_and_id_management
|
|||
end
|
||||
//Retire IDs
|
||||
//Each retire port lags behind the previous one by one index (eg. [3, 2, 1, 0])
|
||||
generate for (i = 0; i < RETIRE_PORTS; i++) begin :gen_retire_ids
|
||||
generate for (genvar i = 0; i < RETIRE_PORTS; i++) begin :gen_retire_ids
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
retire_ids_next[i] <= LOG2_MAX_IDS'(i);
|
||||
else
|
||||
retire_ids_next[i] <= retire_ids_next[i] + LOG2_MAX_IDS'(retire_next.count);
|
||||
retire_ids_next[i] <= retire_ids_next[i] + LOG2_MAX_IDS'(retire_count_next);
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (~gc.retire_hold)
|
||||
retire_ids[i] <= retire_ids_next[i];
|
||||
end
|
||||
|
||||
end endgenerate
|
||||
|
||||
//Represented as a negative value so that the MSB indicates that the decode stage is valid
|
||||
|
@ -216,7 +295,7 @@ module instruction_metadata_and_id_management
|
|||
pre_issue_count <= pre_issue_count_next;
|
||||
end
|
||||
|
||||
assign post_issue_count_next = post_issue_count + ID_COUNTER_W'(instruction_issued) - ID_COUNTER_W'(retire_next.count);
|
||||
assign post_issue_count_next = post_issue_count + ID_COUNTER_W'(instruction_issued) - ID_COUNTER_W'(retire_count_next);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
post_issue_count <= 0;
|
||||
|
@ -238,19 +317,36 @@ module instruction_metadata_and_id_management
|
|||
//Non-writeback instructions not included as current instruction set
|
||||
//complete in their first cycle of the execute stage, or do not cause an
|
||||
//exception after that point
|
||||
|
||||
logic id_waiting_toggle [NUM_WB_GROUPS];
|
||||
id_t id_waiting_toggle_addr [NUM_WB_GROUPS];
|
||||
always_comb begin
|
||||
id_waiting_toggle[0] = (instruction_issued_with_rd & issue.is_multicycle) | fp_instruction_issued_with_rd;
|
||||
id_waiting_toggle_addr[0] = issue.id;
|
||||
|
||||
id_waiting_toggle[NUM_WB_GROUPS-2] = fp_wb_packet[0].valid;
|
||||
id_waiting_toggle_addr[NUM_WB_GROUPS-2] = fp_wb_packet[0].id;
|
||||
id_waiting_toggle[NUM_WB_GROUPS-1] = fp_wb_packet[1].valid;
|
||||
id_waiting_toggle_addr[NUM_WB_GROUPS-1] = fp_wb_packet[1].id;
|
||||
|
||||
//This will overwrite the FP packets if the configuration does not include it
|
||||
for (int i = 1; i < CONFIG.NUM_WB_GROUPS; i++) begin
|
||||
id_waiting_toggle[i] = wb_packet[i].valid;
|
||||
id_waiting_toggle_addr[i] = wb_packet[i].id;
|
||||
end
|
||||
end
|
||||
|
||||
toggle_memory_set # (
|
||||
.DEPTH (MAX_IDS),
|
||||
.NUM_WRITE_PORTS (2),
|
||||
.NUM_READ_PORTS (RETIRE_PORTS),
|
||||
.WRITE_INDEX_FOR_RESET (0),
|
||||
.READ_INDEX_FOR_RESET (0)
|
||||
.NUM_WRITE_PORTS (NUM_WB_GROUPS),
|
||||
.NUM_READ_PORTS (RETIRE_PORTS)
|
||||
) id_waiting_for_writeback_toggle_mem_set
|
||||
(
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.init_clear (gc.init_clear),
|
||||
.toggle ('{(instruction_issued_with_rd & issue.is_multicycle), wb_packet[1].valid}),
|
||||
.toggle_addr ('{issue.id, wb_packet[1].id}),
|
||||
.toggle (id_waiting_toggle),
|
||||
.toggle_addr (id_waiting_toggle_addr),
|
||||
.read_addr (retire_ids_next),
|
||||
.in_use (id_waiting_for_writeback)
|
||||
);
|
||||
|
@ -260,14 +356,9 @@ module instruction_metadata_and_id_management
|
|||
logic contiguous_retire;
|
||||
logic id_is_post_issue [RETIRE_PORTS];
|
||||
logic id_ready_to_retire [RETIRE_PORTS];
|
||||
logic [LOG2_RETIRE_PORTS-1:0] phys_id_sel;
|
||||
logic [RETIRE_PORTS-1:0] retire_id_uses_rd;
|
||||
logic [RETIRE_PORTS-1:0] retire_id_waiting_for_writeback;
|
||||
|
||||
generate for (i = 0; i < RETIRE_PORTS; i++) begin : gen_retire_writeback
|
||||
assign retire_id_uses_rd[i] = uses_rd_table[retire_ids_next[i]];
|
||||
assign retire_id_waiting_for_writeback[i] = id_waiting_for_writeback[i];
|
||||
end endgenerate
|
||||
logic [LOG2_RETIRE_PORTS-1:0] retire_with_rd_sel;
|
||||
logic [LOG2_RETIRE_PORTS-1:0] retire_with_fp_rd_sel;
|
||||
logic [LOG2_RETIRE_PORTS-1:0] retire_with_store_sel;
|
||||
|
||||
//Supports retiring up to RETIRE_PORTS instructions. The retired block of instructions must be
|
||||
//contiguous and must start with the first retire port. Additionally, only one register file writing
|
||||
|
@ -275,40 +366,64 @@ module instruction_metadata_and_id_management
|
|||
//If an exception is pending, only retire a single intrustuction per cycle. As such, the pending
|
||||
//exception will have to become the oldest instruction retire_ids[0] before it can retire.
|
||||
logic retire_with_rd_found;
|
||||
logic retire_with_fp_rd_found;
|
||||
logic retire_with_store_found;
|
||||
always_comb begin
|
||||
contiguous_retire = ~gc.retire_hold;
|
||||
retire_with_rd_found = 0;
|
||||
retire_with_fp_rd_found = 0;
|
||||
retire_with_store_found = 0;
|
||||
|
||||
retire_with_rd_sel = 0;
|
||||
retire_with_fp_rd_sel = 0;
|
||||
retire_with_store_sel = 0;
|
||||
for (int i = 0; i < RETIRE_PORTS; i++) begin
|
||||
id_is_post_issue[i] = post_issue_count > ID_COUNTER_W'(i);
|
||||
|
||||
id_ready_to_retire[i] = (id_is_post_issue[i] & contiguous_retire & ~id_waiting_for_writeback[i]);
|
||||
retire_port_valid_next[i] = id_ready_to_retire[i] & ~(retire_id_uses_rd[i] & retire_with_rd_found);
|
||||
retire_port_valid_next[i] = id_ready_to_retire[i] & ~((retire_type[i] == RD & retire_with_rd_found) | (retire_type[i] == STORE & retire_with_store_found) | (retire_type[i] == FP_RD & retire_with_fp_rd_found));
|
||||
|
||||
retire_with_rd_found |= retire_port_valid_next[i] & retire_id_uses_rd[i];
|
||||
retire_with_rd_found |= retire_port_valid_next[i] & retire_type[i] == RD;
|
||||
retire_with_fp_rd_found |= retire_port_valid_next[i] & retire_type[i] == FP_RD;
|
||||
retire_with_store_found |= retire_port_valid_next[i] & retire_type[i] == STORE;
|
||||
contiguous_retire &= retire_port_valid_next[i] & ~gc.exception_pending;
|
||||
|
||||
if (retire_port_valid_next[i] & retire_type[i] == RD)
|
||||
retire_with_rd_sel = LOG2_RETIRE_PORTS'(i);
|
||||
if (retire_port_valid_next[i] & retire_type[i] == FP_RD)
|
||||
retire_with_fp_rd_sel = LOG2_RETIRE_PORTS'(i);
|
||||
if (retire_port_valid_next[i] & retire_type[i] == STORE)
|
||||
retire_with_store_sel = LOG2_RETIRE_PORTS'(i);
|
||||
end
|
||||
end
|
||||
|
||||
//retire_next packet
|
||||
priority_encoder #(.WIDTH(RETIRE_PORTS))
|
||||
phys_id_sel_encoder (
|
||||
.priority_vector (retire_id_uses_rd),
|
||||
.encoded_result (phys_id_sel)
|
||||
);
|
||||
assign retire_next.phys_id = retire_ids_next[phys_id_sel];
|
||||
assign retire_next.valid = retire_with_rd_found;
|
||||
//retire_next packets
|
||||
assign wb_retire_next = '{
|
||||
id : retire_ids_next[retire_with_rd_sel],
|
||||
valid : retire_with_rd_found
|
||||
};
|
||||
assign fp_wb_retire_next = '{
|
||||
id : retire_ids_next[retire_with_fp_rd_sel],
|
||||
valid : retire_with_fp_rd_found
|
||||
};
|
||||
assign store_retire_next = '{
|
||||
id : retire_ids_next[retire_with_store_sel],
|
||||
valid : retire_with_store_found
|
||||
};
|
||||
|
||||
always_comb begin
|
||||
retire_next.count = 0;
|
||||
retire_count_next = 0;
|
||||
for (int i = 0; i < RETIRE_PORTS; i++) begin
|
||||
retire_next.count += retire_port_valid_next[i];
|
||||
retire_count_next += retire_port_valid_next[i];
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
retire.valid <= retire_next.valid;
|
||||
retire.phys_id <= retire_next.phys_id;
|
||||
retire.count <= gc.writeback_supress ? '0 : retire_next.count;
|
||||
wb_retire <= wb_retire_next;
|
||||
fp_wb_retire <= fp_wb_retire_next;
|
||||
store_retire <= store_retire_next;
|
||||
|
||||
retire_count <= gc.writeback_supress ? '0 : retire_count_next;
|
||||
for (int i = 0; i < RETIRE_PORTS; i++)
|
||||
retire_port_valid[i] <= retire_port_valid_next[i] & ~gc.writeback_supress;
|
||||
end
|
||||
|
@ -318,31 +433,14 @@ module instruction_metadata_and_id_management
|
|||
assign pc_id_available = ~inflight_count[LOG2_MAX_IDS];
|
||||
|
||||
//Decode
|
||||
assign decode.id = decode_id;
|
||||
assign decode.valid = fetched_count_neg[LOG2_MAX_IDS];
|
||||
assign decode.pc = pc_table[decode_id];
|
||||
assign decode.instruction = instruction_table[decode_id];
|
||||
assign decode.fetch_metadata = CONFIG.INCLUDE_M_MODE ? fetch_metadata_table[decode_id] : '{ok : 1, error_code : INST_ACCESS_FAULT};
|
||||
|
||||
//Writeback/Commit support
|
||||
phys_addr_t commit_phys_addr [CONFIG.NUM_WB_GROUPS];
|
||||
assign commit_phys_addr[0] = issue.phys_rd_addr;
|
||||
generate for (i = 1; i < CONFIG.NUM_WB_GROUPS; i++) begin : gen_commit_phys_addr
|
||||
assign commit_phys_addr[i] = phys_addr_table[wb_packet[i].id];
|
||||
end endgenerate
|
||||
|
||||
generate for (i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin : gen_commit_packet
|
||||
assign commit_packet[i].id = wb_packet[i].id;
|
||||
assign commit_packet[i].phys_addr = commit_phys_addr[i];
|
||||
assign commit_packet[i].valid = wb_packet[i].valid & |commit_phys_addr[i];
|
||||
assign commit_packet[i].data = wb_packet[i].data;
|
||||
end endgenerate
|
||||
|
||||
//Exception Support
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_id_exception_support
|
||||
assign oldest_pc = pc_table[retire_ids_next[0]];
|
||||
assign current_exception_unit = exception_unit_table[retire_ids_next[0]];
|
||||
end endgenerate
|
||||
localparam fetch_metadata_t ADDR_OK = '{ok : 1, error_code : INST_ADDR_MISSALIGNED};
|
||||
assign decode = '{
|
||||
id : decode_id,
|
||||
valid : fetched_count_neg[LOG2_MAX_IDS],
|
||||
pc : decode_pc,
|
||||
instruction : decode_instruction,
|
||||
fetch_metadata : CONFIG.INCLUDE_M_MODE ? decode_fetch_metadata : ADDR_OK
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
|
|
|
@ -50,7 +50,7 @@ module l1_arbiter
|
|||
logic [L1_CONNECTIONS-1:0] acks;
|
||||
logic [((L1_CONNECTIONS == 1) ? 0 : ($clog2(L1_CONNECTIONS)-1)) : 0] arb_sel;
|
||||
|
||||
logic push_ready;
|
||||
logic fifos_full;
|
||||
logic request_exists;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
@ -69,14 +69,14 @@ module l1_arbiter
|
|||
assign sc_success = CONFIG.INCLUDE_AMO & l2.con_result;
|
||||
|
||||
//Arbiter can pop address FIFO at a different rate than the data FIFO, so check that both have space.
|
||||
assign push_ready = ~(l2.request_full | l2.data_full);
|
||||
assign fifos_full = l2.request_full | l2.data_full;
|
||||
assign request_exists = |requests;
|
||||
|
||||
assign l2.request_push = push_ready & request_exists;
|
||||
assign l2.request_push = request_exists & ~fifos_full;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Dcache Specific
|
||||
assign l2.wr_data_push = CONFIG.INCLUDE_DCACHE & (push_ready & l1_request[L1_DCACHE_ID].request & ~l1_request[L1_DCACHE_ID].rnw); //Assumes data cache has highest priority
|
||||
assign l2.wr_data_push = l2.request_push & ~l2.rnw;
|
||||
assign l2.wr_data = l1_request[L1_DCACHE_ID].data;
|
||||
assign l2.wr_data_be = l1_request[L1_DCACHE_ID].be;
|
||||
|
||||
|
@ -87,29 +87,38 @@ module l1_arbiter
|
|||
////////////////////////////////////////////////////
|
||||
//Interface mapping
|
||||
generate for (genvar i = 0; i < L1_CONNECTIONS; i++) begin : gen_l2_requests
|
||||
always_comb begin
|
||||
l2_requests[i].addr = l1_request[i].addr[31:2];
|
||||
l2_requests[i].rnw = l1_request[i].rnw;
|
||||
l2_requests[i].is_amo = l1_request[i].is_amo;
|
||||
l2_requests[i].amo_type_or_burst_size = l1_request[i].size;
|
||||
l2_requests[i].sub_id = L2_SUB_ID_W'(i);
|
||||
end
|
||||
assign l2_requests[i] = '{
|
||||
addr : l1_request[i].addr[31:2],
|
||||
rnw : l1_request[i].rnw,
|
||||
is_amo : l1_request[i].is_amo,
|
||||
amo_type_or_burst_size : l1_request[i].size,
|
||||
sub_id : L2_SUB_ID_W'(i)
|
||||
};
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Arbitration
|
||||
priority_encoder
|
||||
#(.WIDTH(L1_CONNECTIONS))
|
||||
arb_encoder
|
||||
(
|
||||
.priority_vector (requests),
|
||||
.encoded_result (arb_sel)
|
||||
);
|
||||
logic [$clog2(L1_CONNECTIONS)-1:0] state;
|
||||
logic [$clog2(L1_CONNECTIONS)-1:0] muxes [L1_CONNECTIONS-1:0];
|
||||
|
||||
always_comb begin
|
||||
acks = '0;
|
||||
acks[arb_sel] = l2.request_push;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
state <= 0;
|
||||
else if (l2.request_push)
|
||||
state <= arb_sel;
|
||||
end
|
||||
always_comb begin
|
||||
for (int i = 0; i < L1_CONNECTIONS; i++) begin
|
||||
muxes[i] = $clog2(L1_CONNECTIONS)'(i);
|
||||
for (int j = 0; j < L1_CONNECTIONS; j++) begin
|
||||
if (requests[(i + j) % L1_CONNECTIONS])
|
||||
muxes[i] = $clog2(L1_CONNECTIONS)'((i + j) % L1_CONNECTIONS);
|
||||
end
|
||||
end
|
||||
end
|
||||
assign arb_sel = muxes[state];
|
||||
|
||||
assign acks = L1_CONNECTIONS'(l2.request_push) << arb_sel;
|
||||
|
||||
assign l2.addr = l2_requests[arb_sel].addr;
|
||||
assign l2.rnw = l2_requests[arb_sel].rnw;
|
||||
|
@ -119,7 +128,7 @@ module l1_arbiter
|
|||
|
||||
generate for (genvar i = 0; i < L1_CONNECTIONS; i++) begin : gen_l1_responses
|
||||
assign l1_response[i].data = l2.rd_data;
|
||||
assign l1_response[i].data_valid = l2.rd_data_valid && (l2.rd_sub_id == i);
|
||||
assign l1_response[i].data_valid = l2.rd_data_valid & (l2.rd_sub_id == i);
|
||||
end endgenerate
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,159 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module load_store_queue //ID-based input buffer for Load/Store Unit
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input gc_outputs_t gc,
|
||||
|
||||
load_store_queue_interface.queue lsq,
|
||||
//Writeback snooping
|
||||
input wb_packet_t wb_snoop,
|
||||
|
||||
//Retire release
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
input logic retire_port_valid [RETIRE_PORTS],
|
||||
|
||||
output logic tr_possible_load_conflict_delay
|
||||
);
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] addr;
|
||||
logic [2:0] fn3;
|
||||
id_t id;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts;
|
||||
} lq_entry_t;
|
||||
|
||||
addr_hash_t addr_hash;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts;
|
||||
sq_entry_t sq_entry;
|
||||
logic store_conflict;
|
||||
logic load_selected;
|
||||
|
||||
lq_entry_t lq_data_in;
|
||||
lq_entry_t lq_data_out;
|
||||
|
||||
fifo_interface #(.DATA_WIDTH($bits(lq_entry_t))) lq();
|
||||
store_queue_interface sq();
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
//Can accept requests so long as store queue is not needed or is not full
|
||||
assign lsq.full = lsq.data_in.store & sq.full;
|
||||
|
||||
//Address hash for load-store collision checking
|
||||
addr_hash lsq_addr_hash (
|
||||
.clk (clk),
|
||||
.rst (rst | gc.sq_flush),
|
||||
.addr (lsq.data_in.addr),
|
||||
.addr_hash (addr_hash)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Load Queue
|
||||
cva5_fifo #(.DATA_WIDTH($bits(lq_entry_t)), .FIFO_DEPTH(MAX_IDS))
|
||||
load_queue_fifo (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.fifo(lq)
|
||||
);
|
||||
|
||||
//FIFO control signals
|
||||
assign lq.push = lsq.push & lsq.data_in.load;
|
||||
assign lq.potential_push = lsq.potential_push;
|
||||
assign lq.pop = lsq.pop & load_selected;
|
||||
|
||||
//FIFO data ports
|
||||
assign lq_data_in = '{
|
||||
addr : lsq.data_in.addr,
|
||||
fn3 : lsq.data_in.fn3,
|
||||
id : lsq.data_in.id,
|
||||
potential_store_conflicts : potential_store_conflicts
|
||||
};
|
||||
assign lq.data_in = lq_data_in;
|
||||
assign lq_data_out = lq.data_out;
|
||||
////////////////////////////////////////////////////
|
||||
//Store Queue
|
||||
assign sq.push = lsq.push & lsq.data_in.store;
|
||||
assign sq.pop = lsq.pop & ~load_selected;
|
||||
assign sq.data_in = lsq.data_in;
|
||||
|
||||
store_queue # (.CONFIG(CONFIG)) sq_block (
|
||||
.clk (clk),
|
||||
.rst (rst | gc.sq_flush),
|
||||
.lq_push (lq.push),
|
||||
.lq_pop (lq.pop),
|
||||
.sq (sq),
|
||||
.addr_hash (addr_hash),
|
||||
.potential_store_conflicts (potential_store_conflicts),
|
||||
.prev_store_conflicts (lq_data_out.potential_store_conflicts),
|
||||
.store_conflict (store_conflict),
|
||||
.wb_snoop (wb_snoop),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_port_valid (retire_port_valid)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
//Priority is for loads over stores.
|
||||
//A store will be selected only if either no loads are ready, OR if the store queue is full and a store is ready
|
||||
assign load_selected = lq.valid & ~store_conflict;// & ~(sq_full & sq.valid);
|
||||
|
||||
assign lsq.valid = load_selected | sq.valid;
|
||||
assign lsq.data_out = '{
|
||||
addr : load_selected ? lq_data_out.addr : sq.data_out.addr,
|
||||
load : load_selected,
|
||||
store : ~load_selected,
|
||||
be : load_selected ? '0 : sq.data_out.be,
|
||||
fn3 : load_selected ? lq_data_out.fn3 : sq.data_out.fn3,
|
||||
data_in : sq.data_out.data,
|
||||
id : lq_data_out.id
|
||||
};
|
||||
|
||||
assign lsq.sq_empty = sq.empty;
|
||||
assign lsq.no_released_stores_pending = sq.no_released_stores_pending;
|
||||
assign lsq.empty = ~lq.valid & sq.empty;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Trace Interface
|
||||
generate if (ENABLE_TRACE_INTERFACE) begin : gen_lsq_trace
|
||||
assign tr_possible_load_conflict_delay = lq.valid & (store_conflict | (sq.full & sq.valid));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
|
@ -1,69 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017-2019 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module one_hot_occupancy
|
||||
#(parameter DEPTH = 4)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input logic push,
|
||||
input logic pop,
|
||||
output logic almost_full,
|
||||
output logic full,
|
||||
output logic empty,
|
||||
output logic almost_empty,
|
||||
output logic valid
|
||||
);
|
||||
|
||||
logic [DEPTH:0] valid_chain;
|
||||
|
||||
//Occupancy Tracking
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst) begin
|
||||
valid_chain[0] <= 1;
|
||||
valid_chain[DEPTH:1] <= 0;
|
||||
end
|
||||
else begin
|
||||
case({push,pop})
|
||||
2'b10 : valid_chain <= {valid_chain[DEPTH-1:0], 1'b0};
|
||||
2'b01 : valid_chain <= {1'b0, valid_chain[DEPTH:1]};
|
||||
default : valid_chain <= valid_chain;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign empty = valid_chain[0];
|
||||
assign almost_empty = valid_chain[1];
|
||||
|
||||
assign valid = ~valid_chain[0];
|
||||
assign full = valid_chain[DEPTH];
|
||||
|
||||
assign almost_full = valid_chain[DEPTH-1];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
always_ff @ (posedge clk) begin
|
||||
assert (!(~rst & valid_chain[DEPTH] & push)) else $error("overflow");
|
||||
assert (!(~rst & valid_chain[0] & pop)) else $error("underflow");
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -1,16 +0,0 @@
|
|||
module placer_randomizer # (
|
||||
parameter logic [7:0] PLACER_SEED = 8'h2B
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic [7:0] samples,
|
||||
output logic result
|
||||
);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
result <= |(samples & PLACER_SEED);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
|
@ -1,109 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2019 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
|
||||
module reg_inuse (
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input logic clr,
|
||||
input logic [4:0] rs1_addr,
|
||||
input logic [4:0] rs2_addr,
|
||||
input logic [4:0] issued_rd_addr,
|
||||
input logic [4:0] retired_rd_addr,
|
||||
input logic issued,
|
||||
input logic retired,
|
||||
output logic rs1_inuse,
|
||||
output logic rs2_inuse
|
||||
);
|
||||
////////////////////////////////////////////////////
|
||||
//Memory organized as 2 sets of dual-ported memories
|
||||
logic bankA [32];
|
||||
logic bankB [32];
|
||||
|
||||
logic [4:0] w_clear;
|
||||
logic [4:0] wb_rd_addr_muxed;
|
||||
|
||||
logic wb_collision;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
//////////////////////////////////////////
|
||||
//Initialize to all inuse (0,1) for simulation,
|
||||
//will be cleared by GC after reset in hardware
|
||||
// synthesis translate_off
|
||||
initial bankA = '{default: 0};
|
||||
initial bankB = '{default: 0};
|
||||
// synthesis translate_on
|
||||
|
||||
//After reset, clear is held for at least 32 cycles to reset memory block
|
||||
assign wb_rd_addr_muxed = clr ? w_clear : retired_rd_addr;
|
||||
|
||||
|
||||
//reset is for simulation purposes only, not needed for actual design
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
w_clear <= 0;
|
||||
else
|
||||
w_clear <= w_clear + 5'(clr);
|
||||
end
|
||||
|
||||
assign wb_collision = retired && (issued_rd_addr == retired_rd_addr);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (issued)
|
||||
bankA[issued_rd_addr] <= wb_collision ? ~bankA[wb_rd_addr_muxed] : ~bankB[issued_rd_addr];
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (retired | clr)
|
||||
bankB[wb_rd_addr_muxed] <= bankA[wb_rd_addr_muxed];
|
||||
end
|
||||
|
||||
assign rs1_inuse = bankA[rs1_addr] ^ bankB[rs1_addr];
|
||||
assign rs2_inuse = bankA[rs2_addr] ^ bankB[rs2_addr];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Simulation Only
|
||||
// synthesis translate_off
|
||||
logic sim_inuse [32];
|
||||
always_comb begin
|
||||
foreach (sim_inuse[i])
|
||||
sim_inuse[i] = bankA[i] ^ bankB[i];
|
||||
end
|
||||
// synthesis translate_on
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module register_bank
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
#(
|
||||
parameter NUM_READ_PORTS = 2
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
//Writeback
|
||||
input phys_addr_t write_addr,
|
||||
input logic [31:0] new_data,
|
||||
input logic commit,
|
||||
|
||||
//Issue
|
||||
input phys_addr_t read_addr [NUM_READ_PORTS],
|
||||
output logic [31:0] data [NUM_READ_PORTS]
|
||||
);
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [31:0] register_file_bank [64];
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Register File
|
||||
//Assign zero to r0 and initialize all registers to zero for simulation
|
||||
initial register_file_bank = '{default: 0};
|
||||
always_ff @ (posedge clk) begin
|
||||
if (commit)
|
||||
register_file_bank[write_addr] <= new_data;
|
||||
end
|
||||
|
||||
generate for (genvar i = 0; i < NUM_READ_PORTS; i++)
|
||||
assign data[i] = register_file_bank[read_addr[i]];
|
||||
endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
write_to_zero_reg_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) !(commit & write_addr == 0))
|
||||
else $error("Write to zero reg occured!");
|
||||
|
||||
endmodule
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2020 Eric Matthews, Lesley Shannon
|
||||
* Copyright © 2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -26,8 +26,12 @@ module register_file
|
|||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
#(
|
||||
parameter NUM_WB_GROUPS = 2,
|
||||
parameter READ_PORTS = 2,
|
||||
parameter PORT_ZERO_ABSENT = 0,
|
||||
parameter USE_ZERO = 0,
|
||||
parameter type WB_PACKET_TYPE = wb_packet_t
|
||||
)
|
||||
|
||||
(
|
||||
|
@ -36,25 +40,34 @@ module register_file
|
|||
input gc_outputs_t gc,
|
||||
|
||||
//decode write interface
|
||||
input phys_addr_t decode_phys_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] decode_rs_wb_group [REGFILE_READ_PORTS],
|
||||
input phys_addr_t decode_phys_rs_addr [READ_PORTS],
|
||||
input logic [$clog2(NUM_WB_GROUPS)-1:0] decode_rs_wb_group [READ_PORTS],
|
||||
input phys_addr_t decode_phys_rd_addr,
|
||||
input logic decode_advance,
|
||||
input logic decode_uses_rd,
|
||||
input rs_addr_t decode_rd_addr, //Ignored if USE_ZERO
|
||||
|
||||
//Issue interface
|
||||
register_file_issue_interface.register_file rf_issue,
|
||||
|
||||
//Writeback
|
||||
input commit_packet_t commit [CONFIG.NUM_WB_GROUPS]
|
||||
input WB_PACKET_TYPE commit [NUM_WB_GROUPS],
|
||||
input phys_addr_t wb_phys_addr [NUM_WB_GROUPS]
|
||||
);
|
||||
typedef logic [31:0] rs_data_set_t [REGFILE_READ_PORTS];
|
||||
rs_data_set_t rs_data_set [CONFIG.NUM_WB_GROUPS];
|
||||
localparam TOGGLE_PORTS = NUM_WB_GROUPS+1+32'(PORT_ZERO_ABSENT);
|
||||
localparam DATA_WIDTH = $bits(commit[0].data);
|
||||
typedef logic [DATA_WIDTH-1:0] rs_data_t [READ_PORTS];
|
||||
rs_data_t regfile_rs_data [NUM_WB_GROUPS];
|
||||
rs_data_t regfile_rs_data_r;
|
||||
rs_data_t commit_rs_data [NUM_WB_GROUPS];
|
||||
logic bypass [READ_PORTS];
|
||||
|
||||
logic decode_inuse [REGFILE_READ_PORTS];
|
||||
logic decode_inuse_r [REGFILE_READ_PORTS];
|
||||
logic decode_inuse [READ_PORTS];
|
||||
|
||||
genvar i;
|
||||
phys_addr_t inuse_read_addr [READ_PORTS*2];
|
||||
logic inuse [READ_PORTS*2];
|
||||
logic toggle [TOGGLE_PORTS];
|
||||
phys_addr_t toggle_addr [TOGGLE_PORTS];
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
|
@ -62,83 +75,106 @@ module register_file
|
|||
//Phys register inuse
|
||||
//toggle ports: decode advance, single-cycle/fetch_flush, multi-cycle commit
|
||||
//read ports: rs-decode, rs-issue
|
||||
always_comb begin
|
||||
for (int i = 0; i < READ_PORTS; i++) begin
|
||||
inuse_read_addr[i] = decode_phys_rs_addr[i];
|
||||
inuse_read_addr[i+READ_PORTS] = rf_issue.phys_rs_addr[i];
|
||||
decode_inuse[i] = inuse[i];
|
||||
rf_issue.inuse[i] = inuse[i+READ_PORTS];
|
||||
end
|
||||
|
||||
toggle[0] = decode_advance & decode_uses_rd & (USE_ZERO | |decode_rd_addr) & ~gc.fetch_flush;
|
||||
toggle_addr[0] = decode_phys_rd_addr;
|
||||
|
||||
toggle[1] = rf_issue.single_cycle_or_flush;
|
||||
toggle_addr[1] = rf_issue.phys_rd_addr;
|
||||
for (int i = 1; i < NUM_WB_GROUPS+PORT_ZERO_ABSENT; i++) begin
|
||||
toggle[i+1] = commit[i-PORT_ZERO_ABSENT].valid & (USE_ZERO | |wb_phys_addr[i-PORT_ZERO_ABSENT]);
|
||||
toggle_addr[i+1] = wb_phys_addr[i-PORT_ZERO_ABSENT];
|
||||
end
|
||||
end
|
||||
toggle_memory_set # (
|
||||
.DEPTH (64),
|
||||
.NUM_WRITE_PORTS (3),
|
||||
.NUM_READ_PORTS (REGFILE_READ_PORTS*2),
|
||||
.WRITE_INDEX_FOR_RESET (0),
|
||||
.READ_INDEX_FOR_RESET (0)
|
||||
.NUM_WRITE_PORTS (TOGGLE_PORTS),
|
||||
.NUM_READ_PORTS (READ_PORTS*2)
|
||||
) id_inuse_toggle_mem_set
|
||||
(
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.init_clear (gc.init_clear),
|
||||
.toggle ('{
|
||||
(decode_advance & decode_uses_rd & |decode_phys_rd_addr & ~gc.fetch_flush),
|
||||
rf_issue.single_cycle_or_flush,
|
||||
commit[1].valid
|
||||
}),
|
||||
.toggle_addr ('{
|
||||
decode_phys_rd_addr,
|
||||
rf_issue.phys_rd_addr,
|
||||
commit[1].phys_addr
|
||||
}),
|
||||
.read_addr ('{
|
||||
decode_phys_rs_addr[RS1],
|
||||
decode_phys_rs_addr[RS2],
|
||||
rf_issue.phys_rs_addr[RS1],
|
||||
rf_issue.phys_rs_addr[RS2]
|
||||
}),
|
||||
.in_use ('{
|
||||
decode_inuse[RS1],
|
||||
decode_inuse[RS2],
|
||||
rf_issue.inuse[RS1],
|
||||
rf_issue.inuse[RS2]
|
||||
})
|
||||
.toggle (toggle),
|
||||
.toggle_addr (toggle_addr),
|
||||
.read_addr (inuse_read_addr),
|
||||
.in_use (inuse)
|
||||
);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (decode_advance)
|
||||
decode_inuse_r <= decode_inuse;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Register Banks
|
||||
//Implemented in seperate module as there is not universal tool support for inferring
|
||||
//arrays of memory blocks.
|
||||
generate for (i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin : register_file_gen
|
||||
register_bank #(.NUM_READ_PORTS(REGFILE_READ_PORTS))
|
||||
reg_group (
|
||||
.clk, .rst,
|
||||
.write_addr(commit[i].phys_addr),
|
||||
.new_data(commit[i].data),
|
||||
.commit(commit[i].valid & ~gc.writeback_supress),
|
||||
.read_addr(decode_phys_rs_addr),
|
||||
.data(rs_data_set[i])
|
||||
);
|
||||
//LUTRAM implementation
|
||||
//Read in decode stage, writeback groups muxed and output registered per regfile read port
|
||||
generate for (genvar i = 0; i < NUM_WB_GROUPS; i++) begin : register_file_gen
|
||||
lutram_1w_mr #(.DATA_TYPE(logic[DATA_WIDTH-1:0]), .DEPTH(64), .NUM_READ_PORTS(READ_PORTS))
|
||||
register_file_bank (
|
||||
.clk,
|
||||
.waddr(wb_phys_addr[i]),
|
||||
.raddr(decode_phys_rs_addr),
|
||||
.ram_write(commit[i].valid & ~gc.writeback_supress),
|
||||
.new_ram_data(commit[i].data),
|
||||
.ram_data_out(regfile_rs_data[i])
|
||||
);
|
||||
end endgenerate
|
||||
|
||||
generate for (genvar i = 0; i < READ_PORTS; i++) begin : register_file_ff_gen
|
||||
always_ff @ (posedge clk) begin
|
||||
if (((~|decode_phys_rs_addr[i] & ~USE_ZERO) & decode_advance))
|
||||
regfile_rs_data_r[i] <= '0;
|
||||
else if (decode_advance)
|
||||
regfile_rs_data_r[i] <= regfile_rs_data[decode_rs_wb_group[i]][i];
|
||||
end
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Register File Muxing
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] rs_wb_group [REGFILE_READ_PORTS];
|
||||
logic bypass [REGFILE_READ_PORTS];
|
||||
assign rs_wb_group = decode_advance ? decode_rs_wb_group : rf_issue.rs_wb_group;
|
||||
assign bypass = decode_advance ? decode_inuse : decode_inuse_r;
|
||||
|
||||
//Bypass registers
|
||||
//(per wb group and per read port)
|
||||
always_ff @ (posedge clk) begin
|
||||
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin
|
||||
if (decode_advance | rf_issue.inuse[i])
|
||||
rf_issue.data[i] <= bypass[i] ? commit[rs_wb_group[i]].data : rs_data_set[rs_wb_group[i]][i];
|
||||
end
|
||||
for (int i = 0; i < NUM_WB_GROUPS; i++)
|
||||
for (int j = 0; j < READ_PORTS; j++)
|
||||
if (decode_advance | rf_issue.inuse[j])
|
||||
commit_rs_data[i][j] <= commit[i].data;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Register File Muxing
|
||||
//Output mux per read port: bypass wb_group registers with registerfile data a
|
||||
localparam MUX_W = $clog2(NUM_WB_GROUPS+1);
|
||||
|
||||
typedef logic [DATA_WIDTH-1:0] issue_data_mux_t [2**MUX_W];
|
||||
issue_data_mux_t issue_data_mux [READ_PORTS];
|
||||
logic [MUX_W-1:0] issue_sel [READ_PORTS];
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
for (int i = 0; i < READ_PORTS; i++)
|
||||
if (decode_advance)
|
||||
issue_sel[i] <= decode_inuse[i] ? (MUX_W)'(decode_rs_wb_group[i]) : (MUX_W)'(2**MUX_W-1);
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
for (int i = 0; i < READ_PORTS; i++) begin
|
||||
issue_data_mux[i] = '{default: 'x};
|
||||
issue_data_mux[i][2**MUX_W-1] = regfile_rs_data_r[i];
|
||||
for (int j = 0; j < NUM_WB_GROUPS; j++)
|
||||
issue_data_mux[i][j] = commit_rs_data[j][i];
|
||||
end
|
||||
end
|
||||
|
||||
always_comb for (int i = 0; i < READ_PORTS; i++)
|
||||
rf_issue.data[i] = issue_data_mux[i][issue_sel[i]];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
for (genvar i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin : write_to_rd_zero_assertion
|
||||
assert property (@(posedge clk) disable iff (rst) (commit[i].valid) |-> (commit[i].phys_addr != 0)) else $error("write to register zero");
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -32,7 +32,7 @@ module register_free_list
|
|||
import cva5_types::*;
|
||||
|
||||
#(
|
||||
parameter DATA_WIDTH = 70,
|
||||
parameter type DATA_TYPE = logic,
|
||||
parameter FIFO_DEPTH = 4
|
||||
)
|
||||
(
|
||||
|
@ -45,7 +45,7 @@ module register_free_list
|
|||
localparam LOG2_FIFO_DEPTH = $clog2(FIFO_DEPTH);
|
||||
|
||||
//Force FIFO depth to next power of 2
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [DATA_WIDTH-1:0] lut_ram [(2**LOG2_FIFO_DEPTH)];
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [$bits(DATA_TYPE)-1:0] lut_ram [(2**LOG2_FIFO_DEPTH)];
|
||||
logic [LOG2_FIFO_DEPTH-1:0] write_index;
|
||||
logic [LOG2_FIFO_DEPTH-1:0] read_index;
|
||||
logic [LOG2_FIFO_DEPTH:0] inflight_count;
|
||||
|
|
105
core/renamer.sv
105
core/renamer.sv
|
@ -27,7 +27,9 @@ module renamer
|
|||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
parameter NUM_WB_GROUPS = 2,
|
||||
parameter READ_PORTS = 2,
|
||||
parameter RENAME_ZERO = 0 //If set, will use issue.fp_uses_rd instead of issue.uses_rd (in addition to what the name implies)
|
||||
)
|
||||
|
||||
(
|
||||
|
@ -44,22 +46,21 @@ module renamer
|
|||
input logic instruction_issued_with_rd,
|
||||
|
||||
//Retire response
|
||||
input retire_packet_t retire
|
||||
input retire_packet_t wb_retire
|
||||
);
|
||||
//////////////////////////////////////////
|
||||
typedef struct packed{
|
||||
typedef struct packed {
|
||||
rs_addr_t rd_addr;
|
||||
phys_addr_t spec_phys_addr;
|
||||
phys_addr_t previous_phys_addr;
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] previous_wb_group;
|
||||
logic [$clog2(NUM_WB_GROUPS)-1:0] previous_wb_group;
|
||||
} renamer_metadata_t;
|
||||
renamer_metadata_t inuse_list_input;
|
||||
renamer_metadata_t inuse_list_output;
|
||||
renamer_metadata_t inuse_table_input;
|
||||
renamer_metadata_t inuse_table_output;
|
||||
|
||||
logic [5:0] clear_index;
|
||||
|
||||
fifo_interface #(.DATA_WIDTH($bits(phys_addr_t))) free_list ();
|
||||
fifo_interface #(.DATA_WIDTH($bits(renamer_metadata_t))) inuse_list ();
|
||||
fifo_interface #(.DATA_TYPE(phys_addr_t)) free_list ();
|
||||
|
||||
logic rename_valid;
|
||||
logic rollback;
|
||||
|
@ -69,10 +70,10 @@ module renamer
|
|||
//Zero register is never renamed
|
||||
//If a renamed destination is flushed in the issue stage, state is rolled back
|
||||
//When an instruction reaches the retire stage it either commits or reverts its renaming depending on whether the instruction retires or is discarded
|
||||
assign rename_valid = (~gc.fetch_flush) & decode_advance & decode.uses_rd & |decode.rd_addr;
|
||||
assign rename_valid = (~gc.fetch_flush) & decode_advance & decode.uses_rd & (RENAME_ZERO | |decode.rd_addr);
|
||||
|
||||
//Revert physcial address assignment on a flush
|
||||
assign rollback = gc.fetch_flush & issue.stage_valid & issue.uses_rd & |issue.rd_addr;
|
||||
assign rollback = gc.fetch_flush & issue.stage_valid & (RENAME_ZERO ? issue.fp_uses_rd : issue.uses_rd) & (RENAME_ZERO | |issue.rd_addr);
|
||||
|
||||
//counter for indexing through memories for post-reset clearing/initialization
|
||||
lfsr #(.WIDTH(6), .NEEDS_RESET(0))
|
||||
|
@ -84,7 +85,7 @@ module renamer
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Free list FIFO
|
||||
register_free_list #(.DATA_WIDTH($bits(phys_addr_t)), .FIFO_DEPTH(32)) free_list_fifo (
|
||||
register_free_list #(.DATA_TYPE(phys_addr_t), .FIFO_DEPTH(32)) free_list_fifo (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.fifo (free_list),
|
||||
|
@ -92,58 +93,56 @@ module renamer
|
|||
);
|
||||
|
||||
//During post reset init, initialize FIFO with free list (registers 32-63)
|
||||
assign free_list.potential_push = (gc.init_clear & ~clear_index[5]) | (retire.valid);
|
||||
assign free_list.potential_push = (gc.init_clear & ~clear_index[5]) | (wb_retire.valid);
|
||||
assign free_list.push = free_list.potential_push;
|
||||
|
||||
assign free_list.data_in = gc.init_clear ? {1'b1, clear_index[4:0]} : (gc.writeback_supress ? inuse_list_output.spec_phys_addr : inuse_list_output.previous_phys_addr);
|
||||
assign free_list.data_in = gc.init_clear ? {1'b1, clear_index[4:0]} : (gc.writeback_supress ? inuse_table_output.spec_phys_addr : inuse_table_output.previous_phys_addr);
|
||||
assign free_list.pop = rename_valid;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Inuse list FIFO
|
||||
cva5_fifo #(.DATA_WIDTH($bits(renamer_metadata_t)), .FIFO_DEPTH(32)) inuse_list_fifo (
|
||||
//Inuse table
|
||||
assign inuse_table_input = '{
|
||||
rd_addr : issue.rd_addr,
|
||||
spec_phys_addr : issue.phys_rd_addr,
|
||||
previous_phys_addr : spec_table_previous_r.phys_addr,
|
||||
previous_wb_group : spec_table_previous_r.wb_group
|
||||
};
|
||||
|
||||
lutram_1w_1r #(.DATA_TYPE(renamer_metadata_t), .DEPTH(MAX_IDS))
|
||||
inuse_table (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.fifo (inuse_list)
|
||||
.waddr (issue.id),
|
||||
.raddr (wb_retire.id),
|
||||
.ram_write (instruction_issued_with_rd),
|
||||
.new_ram_data (inuse_table_input),
|
||||
.ram_data_out (inuse_table_output)
|
||||
);
|
||||
|
||||
assign inuse_list.potential_push = instruction_issued_with_rd & |issue.rd_addr;
|
||||
assign inuse_list.push = inuse_list.potential_push;
|
||||
|
||||
assign inuse_list_input.rd_addr = issue.rd_addr;
|
||||
assign inuse_list_input.spec_phys_addr = issue.phys_rd_addr;
|
||||
assign inuse_list_input.previous_phys_addr = spec_table_previous_r.phys_addr;
|
||||
assign inuse_list_input.previous_wb_group = spec_table_previous_r.wb_group;
|
||||
assign inuse_list.data_in = inuse_list_input;
|
||||
|
||||
assign inuse_list_output = inuse_list.data_out;
|
||||
assign inuse_list.pop = retire.valid;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Speculative rd-to-phys Table
|
||||
//On rollback restore the previous contents
|
||||
//During post reset init, initialize rd_to_phys with in-use list (lower 32 registers)
|
||||
typedef struct packed{
|
||||
typedef struct packed {
|
||||
phys_addr_t phys_addr;
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] wb_group;
|
||||
logic [$clog2(NUM_WB_GROUPS)-1:0] wb_group;
|
||||
} spec_table_t;
|
||||
rs_addr_t spec_table_read_addr [REGFILE_READ_PORTS+1];
|
||||
spec_table_t spec_table_read_data [REGFILE_READ_PORTS+1];
|
||||
rs_addr_t spec_table_read_addr [READ_PORTS+1];
|
||||
spec_table_t spec_table_read_data [READ_PORTS+1];
|
||||
|
||||
spec_table_t spec_table_next;
|
||||
spec_table_t spec_table_next_mux [4];
|
||||
spec_table_t spec_table_previous;
|
||||
spec_table_t spec_table_previous_r;
|
||||
|
||||
logic spec_table_update;
|
||||
rs_addr_t spec_table_write_index;
|
||||
rs_addr_t spec_table_write_index_mux [4];
|
||||
|
||||
assign spec_table_update = rename_valid | rollback | gc.init_clear | (retire.valid & gc.writeback_supress);
|
||||
assign spec_table_update = rename_valid | rollback | gc.init_clear | (wb_retire.valid & gc.writeback_supress);
|
||||
|
||||
logic [1:0] spec_table_sel;
|
||||
|
||||
one_hot_to_integer #(.C_WIDTH(4)) spec_table_sel_one_hot_to_int (
|
||||
.one_hot ({gc.init_clear, rollback, (retire.valid & gc.writeback_supress), 1'b0}),
|
||||
.one_hot ({gc.init_clear, rollback, (wb_retire.valid & gc.writeback_supress), 1'b0}),
|
||||
.int_out (spec_table_sel)
|
||||
);
|
||||
|
||||
|
@ -152,9 +151,9 @@ module renamer
|
|||
assign spec_table_next_mux[0].phys_addr = free_list.data_out;
|
||||
assign spec_table_next_mux[0].wb_group = decode.rd_wb_group;
|
||||
//gc.writeback_supress
|
||||
assign spec_table_write_index_mux[1] = inuse_list_output.rd_addr;
|
||||
assign spec_table_next_mux[1].phys_addr = inuse_list_output.previous_phys_addr;
|
||||
assign spec_table_next_mux[1].wb_group = inuse_list_output.previous_wb_group;
|
||||
assign spec_table_write_index_mux[1] = inuse_table_output.rd_addr;
|
||||
assign spec_table_next_mux[1].phys_addr = inuse_table_output.previous_phys_addr;
|
||||
assign spec_table_next_mux[1].wb_group = inuse_table_output.previous_wb_group;
|
||||
//rollback
|
||||
assign spec_table_write_index_mux[2] = issue.rd_addr;
|
||||
assign spec_table_next_mux[2].phys_addr = spec_table_previous_r.phys_addr;
|
||||
|
@ -168,12 +167,12 @@ module renamer
|
|||
assign spec_table_next = spec_table_next_mux[spec_table_sel];
|
||||
|
||||
assign spec_table_read_addr[0] = spec_table_write_index;
|
||||
assign spec_table_read_addr[1:REGFILE_READ_PORTS] = '{decode.rs_addr[RS1], decode.rs_addr[RS2]};
|
||||
assign spec_table_read_addr[1+:READ_PORTS] = decode.rs_addr;
|
||||
|
||||
lutram_1w_mr #(
|
||||
.WIDTH($bits(spec_table_t)),
|
||||
.DATA_TYPE(spec_table_t),
|
||||
.DEPTH(32),
|
||||
.NUM_READ_PORTS(REGFILE_READ_PORTS+1)
|
||||
.NUM_READ_PORTS(READ_PORTS+1)
|
||||
)
|
||||
spec_table_ram (
|
||||
.clk(clk),
|
||||
|
@ -183,24 +182,20 @@ module renamer
|
|||
.new_ram_data(spec_table_next),
|
||||
.ram_data_out(spec_table_read_data)
|
||||
);
|
||||
assign spec_table_previous = spec_table_read_data[0];
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (spec_table_update) begin
|
||||
spec_table_previous_r <= spec_table_previous;
|
||||
end
|
||||
if (spec_table_update)
|
||||
spec_table_previous_r <= spec_table_read_data[0];
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Renamed Outputs
|
||||
spec_table_t [REGFILE_READ_PORTS-1:0] spec_table_decode;
|
||||
generate for (genvar i = 0; i < REGFILE_READ_PORTS; i++) begin : gen_renamed_addrs
|
||||
assign spec_table_decode[i] = spec_table_read_data[i+1];
|
||||
assign decode.phys_rs_addr[i] = spec_table_decode[i].phys_addr;
|
||||
assign decode.rs_wb_group[i] = spec_table_decode[i].wb_group;
|
||||
generate for (genvar i = 0; i < READ_PORTS; i++) begin : gen_renamed_addrs
|
||||
assign decode.phys_rs_addr[i] = spec_table_read_data[i+1].phys_addr;
|
||||
assign decode.rs_wb_group[i] = spec_table_read_data[i+1].wb_group;
|
||||
end endgenerate
|
||||
assign decode.phys_rd_addr = RENAME_ZERO | |decode.rd_addr ? free_list.data_out : '0;
|
||||
|
||||
assign decode.phys_rd_addr = |decode.rd_addr ? free_list.data_out : '0;
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -208,10 +203,10 @@ module renamer
|
|||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
rename_rd_zero_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) (decode.rd_addr == 0) |-> (decode.phys_rd_addr == 0)) else $error("rd zero renamed");
|
||||
assert property (@(posedge clk) disable iff (rst || RENAME_ZERO) (decode.rd_addr == 0) |-> (decode.phys_rd_addr == 0)) else $error("rd zero renamed");
|
||||
|
||||
for (genvar i = 0; i < REGFILE_READ_PORTS; i++) begin : rename_rs_zero_assertion
|
||||
assert property (@(posedge clk) disable iff (rst) (decode.rs_addr[i] == 0) |-> (decode.phys_rs_addr[i] == 0)) else $error("rs zero renamed");
|
||||
for (genvar i = 0; i < READ_PORTS; i++) begin : rename_rs_zero_assertion
|
||||
assert property (@(posedge clk) disable iff (rst || RENAME_ZERO) (decode.rs_addr[i] == 0) |-> (decode.phys_rs_addr[i] == 0)) else $error("rs zero renamed");
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,267 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module store_queue
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
|
||||
input logic lq_push,
|
||||
input logic lq_pop,
|
||||
store_queue_interface.queue sq,
|
||||
|
||||
//Address hash (shared by loads and stores)
|
||||
input addr_hash_t addr_hash,
|
||||
//hash check on adding a load to the queue
|
||||
output logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts,
|
||||
//Load issue collision check
|
||||
input logic [CONFIG.SQ_DEPTH-1:0] prev_store_conflicts,
|
||||
output logic store_conflict,
|
||||
|
||||
//Writeback snooping
|
||||
input wb_packet_t wb_snoop,
|
||||
|
||||
//Retire
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
input logic retire_port_valid [RETIRE_PORTS]
|
||||
);
|
||||
|
||||
localparam LOG2_SQ_DEPTH = $clog2(CONFIG.SQ_DEPTH);
|
||||
typedef logic [LOG2_MAX_IDS:0] load_check_count_t;
|
||||
|
||||
|
||||
wb_packet_t wb_snoop_r;
|
||||
|
||||
//Register-based memory blocks
|
||||
logic [CONFIG.SQ_DEPTH-1:0] valid;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] valid_next;
|
||||
addr_hash_t [CONFIG.SQ_DEPTH-1:0] hashes;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] released;
|
||||
id_t [CONFIG.SQ_DEPTH-1:0] id_needed;
|
||||
load_check_count_t [CONFIG.SQ_DEPTH-1:0] load_check_count;
|
||||
logic [31:0] store_data_from_wb [CONFIG.SQ_DEPTH];
|
||||
|
||||
//LUTRAM-based memory blocks
|
||||
sq_entry_t sq_entry_in;
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [$bits(sq_entry_t)-1:0] sq_entry [CONFIG.SQ_DEPTH];
|
||||
(* ramstyle = "MLAB, no_rw_check" *) id_t [CONFIG.SQ_DEPTH-1:0] ids;
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [LOG2_SQ_DEPTH-1:0] sq_ids [MAX_IDS];
|
||||
|
||||
load_check_count_t [CONFIG.SQ_DEPTH-1:0] load_check_count_next;
|
||||
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index_next;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_oldest;
|
||||
|
||||
logic [CONFIG.SQ_DEPTH-1:0] new_request_one_hot;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] issued_one_hot;
|
||||
|
||||
|
||||
logic [CONFIG.SQ_DEPTH-1:0] wb_id_match;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
assign sq_index_next = sq_index +LOG2_SQ_DEPTH'(sq.push);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
sq_index <= 0;
|
||||
else
|
||||
sq_index <= sq_index_next;
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
sq_oldest <= 0;
|
||||
else
|
||||
sq_oldest <= sq_oldest +LOG2_SQ_DEPTH'(sq.pop);
|
||||
end
|
||||
|
||||
assign new_request_one_hot = CONFIG.SQ_DEPTH'(sq.push) << sq_index;
|
||||
assign issued_one_hot = CONFIG.SQ_DEPTH'(sq.pop) << sq_oldest;
|
||||
|
||||
assign valid_next = (valid | new_request_one_hot) & ~issued_one_hot;
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
valid <= '0;
|
||||
else
|
||||
valid <= valid_next;
|
||||
end
|
||||
|
||||
assign sq.empty = ~|valid;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
sq.full <= 0;
|
||||
else
|
||||
sq.full <= valid_next[sq_index_next] | (|load_check_count_next[sq_index_next]);
|
||||
end
|
||||
|
||||
//SQ attributes and issue data
|
||||
assign sq_entry_in = '{
|
||||
addr : sq.data_in.addr,
|
||||
be : sq.data_in.be,
|
||||
fn3 : sq.data_in.fn3,
|
||||
forwarded_store : sq.data_in.forwarded_store,
|
||||
data : sq.data_in.data
|
||||
};
|
||||
always_ff @ (posedge clk) begin
|
||||
if (sq.push)
|
||||
sq_entry[sq_index] <= sq_entry_in;
|
||||
end
|
||||
|
||||
//Hash mem
|
||||
always_ff @ (posedge clk) begin
|
||||
if (sq.push)
|
||||
hashes[sq_index] <= addr_hash;
|
||||
end
|
||||
|
||||
//Keep count of the number of pending loads that might need a store result
|
||||
//Mask out any store completing on this cycle
|
||||
logic [CONFIG.SQ_DEPTH-1:0] new_load_waiting;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] waiting_load_completed;
|
||||
|
||||
always_comb begin
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
|
||||
potential_store_conflicts[i] = (valid[i] & ~issued_one_hot[i]) & (addr_hash == hashes[i]);
|
||||
new_load_waiting[i] = potential_store_conflicts[i] & lq_push;
|
||||
waiting_load_completed[i] = prev_store_conflicts[i] & lq_pop;
|
||||
|
||||
load_check_count_next[i] =
|
||||
load_check_count[i]
|
||||
+ LOG2_MAX_IDS'(new_load_waiting[i])
|
||||
- LOG2_MAX_IDS'(waiting_load_completed[i]);
|
||||
end
|
||||
end
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
load_check_count <= '0;
|
||||
else
|
||||
load_check_count <= load_check_count_next;
|
||||
end
|
||||
|
||||
//If a potential blocking store has not been issued yet, the load is blocked until the store(s) complete
|
||||
assign store_conflict = |(prev_store_conflicts & valid);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//ID Handling
|
||||
|
||||
//sq_id to global_id mem
|
||||
always_ff @ (posedge clk) begin
|
||||
if (sq.push)
|
||||
ids[sq_index] <= sq.data_in.id;
|
||||
end
|
||||
// global_id to sq_id mem
|
||||
always_ff @ (posedge clk) begin
|
||||
if (sq.push)
|
||||
sq_ids[sq.data_in.id] <= sq_index;
|
||||
end
|
||||
//waiting on ID mem
|
||||
always_ff @ (posedge clk) begin
|
||||
if (sq.push)
|
||||
id_needed[sq_index] <= sq.data_in.id_needed;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Release Handling
|
||||
logic [CONFIG.SQ_DEPTH-1:0] newly_released;
|
||||
logic [LOG2_SQ_DEPTH-1:0] store_released_index [RETIRE_PORTS];
|
||||
logic store_released [RETIRE_PORTS];
|
||||
always_comb begin
|
||||
newly_released = '0;
|
||||
for (int i = 0; i < RETIRE_PORTS; i++) begin
|
||||
store_released_index[i] = sq_ids[retire_ids[i]];
|
||||
store_released[i] = {1'b1, ids[store_released_index[i]]} == {retire_port_valid[i], retire_ids[i]};
|
||||
newly_released |= CONFIG.SQ_DEPTH'(store_released[i]) << store_released_index[i];
|
||||
end
|
||||
end
|
||||
always_ff @ (posedge clk) begin
|
||||
released <= (released | newly_released) & ~new_request_one_hot;
|
||||
end
|
||||
|
||||
assign sq.no_released_stores_pending = ~|(valid & released);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Forwarded Store Data
|
||||
always_ff @ (posedge clk) begin
|
||||
wb_snoop_r <= wb_snoop;
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
|
||||
if ({1'b0, wb_snoop_r.valid, wb_snoop_r.id} == {released[i], 1'b1, id_needed[i]})
|
||||
store_data_from_wb[i] <= wb_snoop_r.data;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Store Transaction Outputs
|
||||
logic [31:0] data_for_alignment;
|
||||
logic [31:0] sq_data;
|
||||
sq_entry_t output_entry;
|
||||
assign output_entry = sq_entry[sq_oldest];
|
||||
|
||||
always_comb begin
|
||||
//Input: ABCD
|
||||
//Assuming aligned requests,
|
||||
//Possible byte selections: (A/C/D, B/D, C/D, D)
|
||||
data_for_alignment = output_entry.forwarded_store ? store_data_from_wb[sq_oldest] : output_entry.data;
|
||||
|
||||
sq_data[7:0] = data_for_alignment[7:0];
|
||||
sq_data[15:8] = (output_entry.addr[1:0] == 2'b01) ? data_for_alignment[7:0] : data_for_alignment[15:8];
|
||||
sq_data[23:16] = (output_entry.addr[1:0] == 2'b10) ? data_for_alignment[7:0] : data_for_alignment[23:16];
|
||||
case(output_entry.addr[1:0])
|
||||
2'b10 : sq_data[31:24] = data_for_alignment[15:8];
|
||||
2'b11 : sq_data[31:24] = data_for_alignment[7:0];
|
||||
default : sq_data[31:24] = data_for_alignment[31:24];
|
||||
endcase
|
||||
end
|
||||
|
||||
assign sq.valid = valid[sq_oldest] & released[sq_oldest];
|
||||
assign sq.data_out = '{
|
||||
addr : output_entry.addr,
|
||||
be : output_entry.be,
|
||||
fn3 : output_entry.fn3,
|
||||
forwarded_store : output_entry.forwarded_store,
|
||||
data : sq_data
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
sq_overflow_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) sq.push |-> (~sq.full | sq.pop)) else $error("sq overflow");
|
||||
fifo_underflow_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) sq.pop |-> sq.valid) else $error("sq underflow");
|
||||
|
||||
|
||||
endmodule
|
|
@ -91,7 +91,7 @@ module tlb_lut_ram
|
|||
genvar i;
|
||||
generate
|
||||
for (i=0; i<WAYS; i=i+1) begin : lut_rams
|
||||
lutram_1w_1r #(.WIDTH($bits(tlb_entry_t)), .DEPTH(DEPTH))
|
||||
lutram_1w_1r #(.DATA_TYPE(tlb_entry_t), .DEPTH(DEPTH))
|
||||
write_port (
|
||||
.clk(clk),
|
||||
.waddr(tlb_addr),
|
||||
|
|
|
@ -64,9 +64,9 @@ package csr_types;
|
|||
logic I; //Base
|
||||
logic H;
|
||||
logic G;
|
||||
logic F;
|
||||
logic F; //Single precision
|
||||
logic E;
|
||||
logic D;
|
||||
logic D; //Double precision
|
||||
logic C;
|
||||
logic B;
|
||||
logic A; //Atomic
|
|
@ -33,7 +33,7 @@ package cva5_config;
|
|||
////////////////////////////////////////////////////
|
||||
//CSR Options
|
||||
typedef struct packed {
|
||||
int unsigned COUNTER_W; //CSR counter width (33-64 bits): 48-bits --> 32 days @ 100MHz
|
||||
int unsigned COUNTER_W; //CSR counter width (33-64 bits): 48-bits --> 32 days @ 100MHz
|
||||
bit MCYCLE_WRITEABLE;
|
||||
bit MINSTR_WRITEABLE;
|
||||
bit MTVEC_WRITEABLE;
|
||||
|
@ -99,20 +99,84 @@ package cva5_config;
|
|||
int unsigned DEPTH;
|
||||
} tlb_config_t;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit IDs
|
||||
//To add a new unit update:
|
||||
// - MAX_NUM_UNITS
|
||||
// - units_t
|
||||
// - unit_id_enum_t
|
||||
//ensuring that the bit index in units_t matches the enum value in unit_id_enum_t
|
||||
//Additionally, writeback units must be grouped before non-writeback units
|
||||
localparam MAX_NUM_UNITS = 9;
|
||||
typedef struct packed {
|
||||
bit IEC;
|
||||
bit BR;
|
||||
//End of Write-Back Units
|
||||
bit CUSTOM;
|
||||
bit FPU;
|
||||
bit CSR;
|
||||
bit DIV;
|
||||
bit MUL;
|
||||
bit LS;
|
||||
bit ALU;
|
||||
} units_t;
|
||||
|
||||
typedef enum bit [$clog2(MAX_NUM_UNITS)-1:0] {
|
||||
IEC_ID = 8,
|
||||
BR_ID = 7,
|
||||
//End of Write-Back Units (insert new writeback units here)
|
||||
CUSTOM_ID = 6,
|
||||
FPU_ID = 5,
|
||||
CSR_ID = 4,
|
||||
DIV_ID = 3,
|
||||
MUL_ID = 2,
|
||||
LS_ID = 1,
|
||||
ALU_ID = 0
|
||||
} unit_id_enum_t;
|
||||
localparam unit_id_enum_t NON_WRITEBACK_ID = BR_ID;
|
||||
|
||||
//WB Group config
|
||||
// First index is write-back port
|
||||
// Second index is position within the write-back port (Priority selection, with highest priority for index 0)
|
||||
// See EXAMPLE_WB_GROUP_CONFIG below for an example of how to specify the configuration
|
||||
typedef unit_id_enum_t [MAX_NUM_UNITS-1:0][MAX_NUM_UNITS-1:0] wb_group_config_t;
|
||||
|
||||
//Convenience function for determining how many writeback units are in each writeback group
|
||||
function int unsigned get_num_wb_units (input unit_id_enum_t [MAX_NUM_UNITS-1:0] ids);
|
||||
get_num_wb_units = 0;
|
||||
for (int i = 0; i < MAX_NUM_UNITS; i++)
|
||||
if (ids[i] != NON_WRITEBACK_ID)
|
||||
get_num_wb_units++;
|
||||
endfunction
|
||||
|
||||
//Convenience function for turning the enum-based WB grouping into the units_t bit-vector representation
|
||||
//used in decode stage to determine the writeback group for the current instruction
|
||||
function units_t [MAX_NUM_UNITS-1:0] get_wb_units_type_representation(input wb_group_config_t ids);
|
||||
get_wb_units_type_representation = '{default : '0};
|
||||
for (int i = 0; i < MAX_NUM_UNITS; i++)
|
||||
for (int j = 0; j < MAX_NUM_UNITS; j++)
|
||||
if (ids[i][j] != NON_WRITEBACK_ID)
|
||||
get_wb_units_type_representation[i][ids[i][j]] = 1;
|
||||
endfunction
|
||||
|
||||
typedef struct packed {
|
||||
//ISA options
|
||||
bit INCLUDE_M_MODE;
|
||||
bit INCLUDE_S_MODE;
|
||||
bit INCLUDE_U_MODE;
|
||||
bit INCLUDE_MUL;
|
||||
bit INCLUDE_DIV;
|
||||
|
||||
bit INCLUDE_IFENCE; //local mem operations only
|
||||
bit INCLUDE_CSRS;
|
||||
bit INCLUDE_AMO; //cache operations only
|
||||
bit INCLUDE_AMO;
|
||||
bit INCLUDE_CBO; //Data cache invalidation operations
|
||||
|
||||
//Units
|
||||
units_t INCLUDE_UNIT;
|
||||
|
||||
//CSR constants
|
||||
csr_config_t CSRS;
|
||||
//Memory Options
|
||||
int unsigned SQ_DEPTH;//CAM-based reasonable max of 4
|
||||
bit INCLUDE_FORWARDING_TO_STORES;
|
||||
//Caches
|
||||
bit INCLUDE_ICACHE;
|
||||
cache_config_t ICACHE;
|
||||
|
@ -139,6 +203,7 @@ package cva5_config;
|
|||
branch_predictor_config_t BP;
|
||||
//Writeback Options
|
||||
int unsigned NUM_WB_GROUPS;
|
||||
wb_group_config_t WB_GROUP;
|
||||
} cpu_config_t;
|
||||
|
||||
//Function to generate derived cache parameters
|
||||
|
@ -151,17 +216,42 @@ package cva5_config;
|
|||
};
|
||||
endfunction
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Example Config
|
||||
// ALU requires its own WB port
|
||||
// LS unit must be the first unit on its writeback port (LS unit does not use ack signal for timing considerations)
|
||||
// Index in group is the priority order (highest priority for index zero)
|
||||
// For optimal resource usage, there should be no holes in the write-back unit ordering
|
||||
// (i.e. if a unit is often not included, either remove from the WB config or place at the end of a writeback group)
|
||||
localparam wb_group_config_t EXAMPLE_WB_GROUP_CONFIG = '{
|
||||
0 : '{0: ALU_ID, default : NON_WRITEBACK_ID},
|
||||
1 : '{0: LS_ID, default : NON_WRITEBACK_ID},
|
||||
2 : '{0: MUL_ID, 1: DIV_ID, 2: CSR_ID, 3: FPU_ID, 4: CUSTOM_ID, default : NON_WRITEBACK_ID},
|
||||
default : '{default : NON_WRITEBACK_ID}
|
||||
};
|
||||
|
||||
localparam cpu_config_t EXAMPLE_CONFIG = '{
|
||||
//ISA options
|
||||
INCLUDE_M_MODE : 1,
|
||||
INCLUDE_S_MODE : 1,
|
||||
INCLUDE_U_MODE : 1,
|
||||
INCLUDE_MUL : 1,
|
||||
INCLUDE_DIV : 1,
|
||||
INCLUDE_S_MODE : 0,
|
||||
INCLUDE_U_MODE : 0,
|
||||
|
||||
INCLUDE_UNIT : '{
|
||||
ALU : 1,
|
||||
LS : 1,
|
||||
MUL : 1,
|
||||
DIV : 1,
|
||||
CSR : 1,
|
||||
FPU : 1,
|
||||
CUSTOM : 0,
|
||||
BR : 1,
|
||||
IEC : 1
|
||||
},
|
||||
|
||||
INCLUDE_IFENCE : 1,
|
||||
INCLUDE_CSRS : 1,
|
||||
INCLUDE_AMO : 0,
|
||||
INCLUDE_CBO : 0,
|
||||
|
||||
//CSR constants
|
||||
CSRS : '{
|
||||
MACHINE_IMPLEMENTATION_ID : 0,
|
||||
|
@ -170,16 +260,17 @@ package cva5_config;
|
|||
RESET_MTVEC : 32'h80000100,
|
||||
NON_STANDARD_OPTIONS : '{
|
||||
COUNTER_W : 33,
|
||||
MCYCLE_WRITEABLE : 1,
|
||||
MINSTR_WRITEABLE : 1,
|
||||
MCYCLE_WRITEABLE : 0,
|
||||
MINSTR_WRITEABLE : 0,
|
||||
MTVEC_WRITEABLE : 1,
|
||||
INCLUDE_MSCRATCH : 1,
|
||||
INCLUDE_MSCRATCH : 0,
|
||||
INCLUDE_MCAUSE : 1,
|
||||
INCLUDE_MTVAL : 1
|
||||
}
|
||||
},
|
||||
//Memory Options
|
||||
SQ_DEPTH : 4,
|
||||
INCLUDE_FORWARDING_TO_STORES : 1,
|
||||
INCLUDE_ICACHE : 0,
|
||||
ICACHE_ADDR : '{
|
||||
L: 32'h80000000,
|
||||
|
@ -249,29 +340,8 @@ package cva5_config;
|
|||
RAS_ENTRIES : 8
|
||||
},
|
||||
//Writeback Options
|
||||
NUM_WB_GROUPS : 2
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit IDs
|
||||
typedef struct packed {
|
||||
int unsigned ALU;
|
||||
int unsigned LS;
|
||||
int unsigned CSR;
|
||||
int unsigned MUL;
|
||||
int unsigned DIV;
|
||||
int unsigned BR;
|
||||
int unsigned IEC;
|
||||
} unit_id_param_t;
|
||||
|
||||
localparam unit_id_param_t EXAMPLE_UNIT_IDS = '{
|
||||
ALU : 0,
|
||||
LS : 1,
|
||||
CSR : 2,
|
||||
MUL : 3,
|
||||
DIV : 4,
|
||||
BR : 5,
|
||||
IEC : 6
|
||||
NUM_WB_GROUPS : 3,
|
||||
WB_GROUP : EXAMPLE_WB_GROUP_CONFIG
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -282,17 +352,28 @@ package cva5_config;
|
|||
////////////////////////////////////////////////////
|
||||
//ID limit
|
||||
//MAX_IDS restricted to a power of 2
|
||||
localparam MAX_IDS = 8; //8 sufficient for rv32im configs
|
||||
localparam MAX_IDS = 16; //8 sufficient for rv32imd configs
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Number of commit ports
|
||||
localparam RETIRE_PORTS = 2; //min 1. (Non-powers of two supported) > 1 is recommended to allow stores to commit sooner
|
||||
localparam REGFILE_READ_PORTS = 2; //min 2, for RS1 and RS2. (Non-powers of two supported)
|
||||
typedef enum bit {
|
||||
typedef enum {
|
||||
RS1 = 0,
|
||||
RS2 = 1
|
||||
} rs1_index_t;
|
||||
RS2 = 1,
|
||||
RS3 = 2
|
||||
} rs_index_t;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//FP number widths
|
||||
localparam EXPO_WIDTH = 11; //11 is compliant
|
||||
localparam FRAC_WIDTH = 52; //52 is compliant
|
||||
localparam EXPO_WIDTH_F = 8; //8 is compliant
|
||||
localparam FRAC_WIDTH_F = 23; //23 is compliant
|
||||
localparam GRS_WIDTH = FRAC_WIDTH*2; //Should be FRAC_WIDTH*2 for full compliance
|
||||
//Do not change these values, they are derived from the previous
|
||||
localparam FLEN = 1+EXPO_WIDTH+FRAC_WIDTH; //Single precision (32 bits)
|
||||
localparam FLEN_F = 1+EXPO_WIDTH_F+FRAC_WIDTH_F; //Double precision (64 bits)
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Exceptions
|
||||
|
@ -304,19 +385,13 @@ package cva5_config;
|
|||
PRE_ISSUE_EXCEPTION = 2
|
||||
} exception_sources_t;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Trace Options
|
||||
//Trace interface is necessary for verilator simulation
|
||||
localparam ENABLE_TRACE_INTERFACE = 1;
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//L1 Arbiter IDs
|
||||
localparam L1_CONNECTIONS = 4;
|
||||
typedef enum bit [1:0] {
|
||||
L1_DCACHE_ID = 0,
|
||||
L1_DMMU_ID = 1,
|
||||
L1_ICACHE_ID = 2,
|
||||
L1_ICACHE_ID = 1,
|
||||
L1_DMMU_ID = 2,
|
||||
L1_IMMU_ID = 3
|
||||
} l1_id_t;
|
||||
|
|
@ -41,13 +41,6 @@ package cva5_types;
|
|||
ALU_SHIFT = 2'b11
|
||||
} alu_op_t;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
ALU_LOGIC_XOR = 2'b00,
|
||||
ALU_LOGIC_OR = 2'b01,
|
||||
ALU_LOGIC_AND = 2'b10,
|
||||
ALU_LOGIC_ADD = 2'b11
|
||||
} alu_logic_op_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic valid;
|
||||
exception_code_t code;
|
||||
|
@ -77,8 +70,10 @@ package cva5_types;
|
|||
|
||||
rs_addr_t rd_addr;
|
||||
phys_addr_t phys_rd_addr;
|
||||
phys_addr_t fp_phys_rd_addr;
|
||||
|
||||
logic uses_rd;
|
||||
logic fp_uses_rd;
|
||||
logic is_multicycle;
|
||||
id_t id;
|
||||
exception_sources_t exception_unit;
|
||||
|
@ -86,34 +81,6 @@ package cva5_types;
|
|||
fetch_metadata_t fetch_metadata;
|
||||
} issue_packet_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN:0] in1;//contains sign padding bit for slt operation
|
||||
logic [XLEN:0] in2;//contains sign padding bit for slt operation
|
||||
logic [XLEN-1:0] shifter_in;
|
||||
logic [31:0] constant_adder;
|
||||
alu_op_t alu_op;
|
||||
alu_logic_op_t logic_op;
|
||||
logic [4:0] shift_amount;
|
||||
logic subtract;
|
||||
logic arith;//contains sign padding bit for arithmetic shift right operation
|
||||
logic lshift;
|
||||
} alu_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [XLEN:0] rs1;
|
||||
logic [XLEN:0] rs2;
|
||||
logic [31:0] pc_p4;
|
||||
logic [2:0] fn3;
|
||||
logic [31:0] issue_pc;
|
||||
logic issue_pc_valid;
|
||||
logic jal;
|
||||
logic jalr;
|
||||
logic jal_jalr;
|
||||
logic is_call;
|
||||
logic is_return;
|
||||
logic [20:0] pc_offset;
|
||||
} branch_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
id_t id;
|
||||
logic valid;
|
||||
|
@ -138,66 +105,29 @@ package cva5_types;
|
|||
logic [4:0] op;
|
||||
} amo_details_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN-1:0] rs1;
|
||||
logic [XLEN-1:0] rs2;
|
||||
logic [11:0] offset;
|
||||
logic [2:0] fn3;
|
||||
logic load;
|
||||
logic store;
|
||||
logic fence;
|
||||
logic forwarded_store;
|
||||
id_t store_forward_id;
|
||||
//amo support
|
||||
amo_details_t amo;
|
||||
} load_store_inputs_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN-1:0] rs1;
|
||||
logic [XLEN-1:0] rs2;
|
||||
logic [1:0] op;
|
||||
} mul_inputs_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN-1:0] rs1;
|
||||
logic [XLEN-1:0] rs2;
|
||||
logic [1:0] op;
|
||||
logic reuse_result;
|
||||
} div_inputs_t;
|
||||
|
||||
typedef struct packed{
|
||||
csr_addr_t addr;
|
||||
logic[1:0] op;
|
||||
logic reads;
|
||||
logic writes;
|
||||
logic [XLEN-1:0] data;
|
||||
} csr_inputs_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [31:0] pc_p4;
|
||||
logic is_ifence;
|
||||
logic is_mret;
|
||||
logic is_sret;
|
||||
} gc_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] addr;
|
||||
logic load;
|
||||
logic store;
|
||||
logic cache_op;
|
||||
logic [3:0] be;
|
||||
logic [2:0] fn3;
|
||||
logic [31:0] data;
|
||||
id_t id;
|
||||
logic forwarded_store;
|
||||
id_t id_needed;
|
||||
logic fp;
|
||||
logic double;
|
||||
logic [FLEN-1:0] fp_data;
|
||||
} lsq_entry_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] addr;
|
||||
logic [3:0] be;
|
||||
logic [2:0] fn3;
|
||||
logic forwarded_store;
|
||||
logic cache_op;
|
||||
logic [31:0] data;
|
||||
logic fp;
|
||||
logic double;
|
||||
logic [FLEN-1:0] fp_data;
|
||||
} sq_entry_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -212,27 +142,34 @@ package cva5_types;
|
|||
logic [31:0] data;
|
||||
} wb_packet_t;
|
||||
|
||||
typedef struct packed {
|
||||
id_t id;
|
||||
logic valid;
|
||||
logic[FLEN-1:0] data;
|
||||
} fp_wb_packet_t;
|
||||
|
||||
typedef struct packed{
|
||||
id_t id;
|
||||
logic valid;
|
||||
phys_addr_t phys_addr;
|
||||
logic [31:0] data;
|
||||
} commit_packet_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic valid;
|
||||
id_t phys_id;
|
||||
logic [LOG2_RETIRE_PORTS : 0] count;
|
||||
} retire_packet_t;
|
||||
|
||||
typedef enum logic[1:0] {
|
||||
INT_DONE,
|
||||
SINGLE_DONE,
|
||||
DOUBLE_HOLD,
|
||||
DOUBLE_DONE
|
||||
} fp_ls_op_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] addr;
|
||||
logic load;
|
||||
logic store;
|
||||
logic cache_op;
|
||||
logic [3:0] be;
|
||||
logic [2:0] fn3;
|
||||
logic [31:0] data_in;
|
||||
id_t id;
|
||||
fp_ls_op_t fp_op;
|
||||
} data_access_shared_inputs_t;
|
||||
|
||||
typedef enum {
|
||||
|
@ -262,46 +199,50 @@ package cva5_types;
|
|||
logic external;
|
||||
} interrupt_t;
|
||||
|
||||
typedef struct packed {
|
||||
//Fetch
|
||||
logic early_branch_correction;
|
||||
typedef enum {
|
||||
FETCH_EARLY_BR_CORRECTION_STAT,
|
||||
FETCH_SUB_UNIT_STALL_STAT,
|
||||
FETCH_ID_STALL_STAT,
|
||||
FETCH_IC_HIT_STAT,
|
||||
FETCH_IC_MISS_STAT,
|
||||
FETCH_IC_ARB_STALL_STAT,
|
||||
|
||||
//Decode
|
||||
logic operand_stall;
|
||||
logic unit_stall;
|
||||
logic no_id_stall;
|
||||
logic no_instruction_stall;
|
||||
logic other_stall;
|
||||
logic instruction_issued_dec;
|
||||
logic branch_operand_stall;
|
||||
logic alu_operand_stall;
|
||||
logic ls_operand_stall;
|
||||
logic div_operand_stall;
|
||||
FETCH_BP_BR_CORRECT_STAT,
|
||||
FETCH_BP_BR_MISPREDICT_STAT,
|
||||
FETCH_BP_RAS_CORRECT_STAT,
|
||||
FETCH_BP_RAS_MISPREDICT_STAT,
|
||||
|
||||
//Instruction mix
|
||||
logic alu_op;
|
||||
logic branch_or_jump_op;
|
||||
logic load_op;
|
||||
logic store_op;
|
||||
logic mul_op;
|
||||
logic div_op;
|
||||
logic misc_op;
|
||||
ISSUE_NO_INSTRUCTION_STAT,
|
||||
ISSUE_NO_ID_STAT,
|
||||
ISSUE_FLUSH_STAT,
|
||||
ISSUE_UNIT_BUSY_STAT,
|
||||
ISSUE_OPERANDS_NOT_READY_STAT,
|
||||
ISSUE_HOLD_STAT,
|
||||
ISSUE_MULTI_SOURCE_STAT,
|
||||
ISSUE_OPERAND_STALL_ON_LOAD_STAT,
|
||||
ISSUE_OPERAND_STALL_ON_MULTIPLY_STAT,
|
||||
ISSUE_OPERAND_STALL_ON_DIVIDE_STAT,
|
||||
ISSUE_OPERAND_STALL_FOR_BRANCH_STAT,
|
||||
ISSUE_STORE_WITH_FORWARDED_DATA_STAT,
|
||||
ISSUE_DIVIDER_RESULT_REUSE_STAT,
|
||||
|
||||
//Branch Unit
|
||||
logic branch_correct;
|
||||
logic branch_misspredict;
|
||||
logic return_correct;
|
||||
logic return_misspredict;
|
||||
LSU_LOAD_BLOCKED_BY_STORE_STAT,
|
||||
LSU_SUB_UNIT_STALL_STAT,
|
||||
LSU_DC_HIT_STAT,
|
||||
LSU_DC_MISS_STAT,
|
||||
LSU_DC_ARB_STALL_STAT
|
||||
} stats_t;
|
||||
|
||||
//Load Store Unit
|
||||
logic load_conflict_delay;
|
||||
|
||||
//Register File
|
||||
logic rs1_forwarding_needed;
|
||||
logic rs2_forwarding_needed;
|
||||
logic rs1_and_rs2_forwarding_needed;
|
||||
|
||||
} cva5_trace_events_t;
|
||||
typedef enum {
|
||||
ALU_STAT,
|
||||
BR_STAT,
|
||||
MUL_STAT,
|
||||
DIV_STAT,
|
||||
LOAD_STAT,
|
||||
STORE_STAT,
|
||||
FPU_STAT,
|
||||
MISC_STAT
|
||||
} instruction_mix_stats_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] pc;
|
||||
|
@ -309,10 +250,4 @@ package cva5_types;
|
|||
logic valid;
|
||||
} trace_retire_outputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] instruction_pc_dec;
|
||||
logic [31:0] instruction_data_dec;
|
||||
cva5_trace_events_t events;
|
||||
} trace_outputs_t;
|
||||
|
||||
endpackage
|
236
core/types_and_interfaces/fpu_types.sv
Normal file
236
core/types_and_interfaces/fpu_types.sv
Normal file
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* Copyright © 2019-2023 Yuhui Gao, Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Yuhui Gao <yuhuig@sfu.ca>
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
|
||||
package fpu_types;
|
||||
import cva5_config::*;
|
||||
import cva5_types::*;
|
||||
|
||||
typedef logic[GRS_WIDTH-1:0] grs_t;
|
||||
typedef logic[EXPO_WIDTH-1:0] fp_shift_amt_t;
|
||||
|
||||
//Constants
|
||||
localparam BIAS = 2**(EXPO_WIDTH-1) - 1;
|
||||
localparam BIAS_F = 2**(EXPO_WIDTH_F-1)-1;
|
||||
localparam [FLEN-1:0] CANONICAL_NAN = {1'b0, {EXPO_WIDTH{1'b1}}, 1'b1, {(FRAC_WIDTH-1){1'b0}}}; //canonical NaN
|
||||
|
||||
typedef logic[EXPO_WIDTH-1:0] expo_d_t;
|
||||
typedef logic[EXPO_WIDTH_F-1:0] expo_s_t;
|
||||
typedef logic[FRAC_WIDTH-1:0] frac_d_t;
|
||||
|
||||
typedef union packed {
|
||||
logic[FLEN-1:0] raw;
|
||||
struct packed {
|
||||
logic sign;
|
||||
expo_d_t expo;
|
||||
frac_d_t frac;
|
||||
} d;
|
||||
struct packed {
|
||||
logic[FLEN-FLEN_F-1:0] box;
|
||||
logic sign;
|
||||
expo_s_t expo;
|
||||
logic[FRAC_WIDTH_F-1:0] frac;
|
||||
} s;
|
||||
} fp_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic inf;
|
||||
logic snan;
|
||||
logic qnan;
|
||||
logic zero;
|
||||
} special_case_t;
|
||||
|
||||
typedef logic[2:0] rm_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic nv;
|
||||
logic dz;
|
||||
logic of;
|
||||
logic uf;
|
||||
logic nx;
|
||||
} fflags_t;
|
||||
|
||||
typedef struct packed {
|
||||
rm_t rm;
|
||||
logic valid;
|
||||
logic[4:0] unit;
|
||||
fp_t rs1;
|
||||
fp_t rs2;
|
||||
fp_t rs3;
|
||||
logic[31:0] int_rs1;
|
||||
id_t id;
|
||||
logic is_single;
|
||||
logic is_fma;
|
||||
logic is_fadd;
|
||||
logic is_i2f;
|
||||
logic is_d2s;
|
||||
logic is_minmax;
|
||||
logic is_sign_inj;
|
||||
logic is_sign_inj_single;
|
||||
logic is_f2i;
|
||||
logic is_mv_i2f;
|
||||
logic is_fcmp;
|
||||
logic is_class;
|
||||
logic add;
|
||||
logic neg_mul;
|
||||
logic conv_signed;
|
||||
} fp_preprocessing_packet_t;
|
||||
|
||||
typedef struct packed {
|
||||
fp_t rs1;
|
||||
fp_t rs2;
|
||||
logic rs1_hidden;
|
||||
logic rs2_hidden;
|
||||
logic rs1_safe;
|
||||
logic rs2_safe;
|
||||
special_case_t rs1_special_case;
|
||||
special_case_t rs2_special_case;
|
||||
logic rs1_expo_overflow;
|
||||
logic[EXPO_WIDTH:0] expo_diff;
|
||||
logic add;
|
||||
logic swap;
|
||||
grs_t fp_add_grs;
|
||||
rm_t rm;
|
||||
logic single;
|
||||
} fp_add_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
special_case_t rs1_special_case;
|
||||
special_case_t rs2_special_case;
|
||||
logic rs1_hidden;
|
||||
logic rs2_hidden;
|
||||
fp_t rs1;
|
||||
fp_t rs2;
|
||||
rm_t rm;
|
||||
logic single;
|
||||
fp_shift_amt_t rs2_prenormalize_shift_amt;
|
||||
} fp_mul_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic mul_sign;
|
||||
logic add_sign;
|
||||
fp_t rs3;
|
||||
logic rs3_hidden;
|
||||
special_case_t rs3_special_case;
|
||||
} fp_fma_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic add;
|
||||
logic fma;
|
||||
//mul is implicit if others unset
|
||||
fp_add_inputs_t add_args;
|
||||
fp_fma_inputs_t fma_args;
|
||||
fp_mul_inputs_t mul_args;
|
||||
} fp_madd_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
fp_t rs1;
|
||||
fp_t rs2;
|
||||
rm_t rm;
|
||||
logic rs1_hidden;
|
||||
logic rs2_hidden;
|
||||
fp_shift_amt_t rs1_prenormalize_shift_amt;
|
||||
fp_shift_amt_t rs2_prenormalize_shift_amt;
|
||||
logic single;
|
||||
special_case_t rs1_special_case;
|
||||
special_case_t rs2_special_case;
|
||||
} fp_div_inputs_t;
|
||||
|
||||
//Digit set for division
|
||||
typedef enum logic[2:0] {
|
||||
NEG_THREE = 3'b010, //Only reached by subtraction when last quotient digit is -2
|
||||
NEG_TWO = 3'b011,
|
||||
NEG_ONE = 3'b001,
|
||||
ZERO = 3'b000,
|
||||
POS_ONE = 3'b101,
|
||||
POS_TWO = 3'b111
|
||||
} q_t;
|
||||
|
||||
typedef struct packed {
|
||||
fp_t rs1;
|
||||
logic rs1_hidden;
|
||||
special_case_t special_case;
|
||||
fp_shift_amt_t rs1_prenormalize_shift_amt;
|
||||
rm_t rm;
|
||||
logic single;
|
||||
} fp_sqrt_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic i2f;
|
||||
logic fminmax;
|
||||
logic fsgnj;
|
||||
logic fmv;
|
||||
logic d2s;
|
||||
//s2d is implicit if others unset
|
||||
|
||||
//Used by FMV
|
||||
logic[31:0] int_rs;
|
||||
//Used by S2D, D2S
|
||||
logic rs1_hidden;
|
||||
special_case_t rs1_special_case;
|
||||
//Used by S2D, D2S, FSGNJ
|
||||
fp_t rs1;
|
||||
//Used by FSGNJ
|
||||
logic fsgnj_single;
|
||||
logic rs1_boxed;
|
||||
logic rs2_boxed;
|
||||
//Used by FSGNJ, FMINMAX
|
||||
logic swap;
|
||||
fp_t rs2;
|
||||
//Used by FSGNJ, FMINMAX, I2F
|
||||
logic single;
|
||||
rm_t rm;
|
||||
//Used by FMINMAX
|
||||
special_case_t rs2_special_case;
|
||||
//Used by I2F
|
||||
logic[31:0] int_rs_abs;
|
||||
logic i2f_sign;
|
||||
} fp_wb2fp_misc_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic fclass;
|
||||
logic fcmp;
|
||||
logic f2i;
|
||||
//fmv is implicit if others unset
|
||||
|
||||
//Used by f2i
|
||||
logic int_less_than_1;
|
||||
expo_d_t rs1_expo_unbiased;
|
||||
//Used by fclass, fcmp, f2i
|
||||
fp_t rs1;
|
||||
//Used by fclass
|
||||
logic rs1_original_hidden_bit;
|
||||
//Used by fclass, fcmp
|
||||
special_case_t rs1_special_case;
|
||||
//Used by fcmp
|
||||
special_case_t rs2_special_case;
|
||||
fp_t rs2;
|
||||
logic swap;
|
||||
//Used by fcmp as fn3 and f2i as rounding
|
||||
rm_t rm;
|
||||
//Used by f2i
|
||||
logic rs1_hidden;
|
||||
logic is_signed;
|
||||
} fp_wb2int_misc_inputs_t;
|
||||
|
||||
endpackage
|
|
@ -64,24 +64,18 @@ interface unit_issue_interface;
|
|||
modport unit (output ready, input possible_issue, new_request, id);
|
||||
endinterface
|
||||
|
||||
interface unit_writeback_interface;
|
||||
import riscv_types::*;
|
||||
interface unit_writeback_interface #(parameter DATA_WIDTH = 32);
|
||||
import cva5_types::*;
|
||||
|
||||
logic ack;
|
||||
//Handshaking
|
||||
logic ack;
|
||||
logic done;
|
||||
|
||||
id_t id;
|
||||
logic done;
|
||||
logic [XLEN-1:0] rd;
|
||||
id_t id;
|
||||
logic [DATA_WIDTH-1:0] rd;
|
||||
|
||||
modport unit (
|
||||
input ack,
|
||||
output id, done, rd
|
||||
);
|
||||
modport wb (
|
||||
output ack,
|
||||
input id, done, rd
|
||||
);
|
||||
modport unit (input ack, output done, id, rd);
|
||||
modport wb (output ack, input done, id, rd);
|
||||
endinterface
|
||||
|
||||
interface ras_interface;
|
||||
|
@ -114,25 +108,11 @@ interface exception_interface;
|
|||
modport econtrol (input valid, code, id, tval, output ack);
|
||||
endinterface
|
||||
|
||||
interface csr_exception_interface;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
logic valid;
|
||||
exception_code_t code;
|
||||
logic [31:0] tval;
|
||||
logic [31:0] exception_pc;
|
||||
logic [31:0] trap_pc;
|
||||
|
||||
modport econtrol (output valid, code, tval, exception_pc, input trap_pc);
|
||||
modport csr (input valid, code, tval, exception_pc, output trap_pc);
|
||||
endinterface
|
||||
|
||||
interface fifo_interface #(parameter DATA_WIDTH = 42);//#(parameter type data_type = logic[31:0]);
|
||||
interface fifo_interface #(parameter type DATA_TYPE = logic);
|
||||
logic push;
|
||||
logic pop;
|
||||
logic [DATA_WIDTH-1:0] data_in;
|
||||
logic [DATA_WIDTH-1:0] data_out;
|
||||
DATA_TYPE data_in;
|
||||
DATA_TYPE data_out;
|
||||
logic valid;
|
||||
logic full;
|
||||
logic potential_push;
|
||||
|
@ -198,12 +178,17 @@ interface load_store_queue_interface;
|
|||
lsq_entry_t data_in;
|
||||
logic potential_push;
|
||||
logic push;
|
||||
logic full;
|
||||
logic load_pop;
|
||||
logic store_pop;
|
||||
|
||||
//LSQ outputs
|
||||
data_access_shared_inputs_t data_out;
|
||||
logic valid;
|
||||
logic pop;
|
||||
data_access_shared_inputs_t load_data_out;
|
||||
data_access_shared_inputs_t store_data_out;
|
||||
|
||||
logic load_valid;
|
||||
logic store_valid;
|
||||
|
||||
logic full;
|
||||
|
||||
//LSQ status
|
||||
logic sq_empty;
|
||||
|
@ -211,12 +196,12 @@ interface load_store_queue_interface;
|
|||
logic no_released_stores_pending;
|
||||
|
||||
modport queue (
|
||||
input data_in, potential_push, push, pop,
|
||||
output full, data_out, valid, sq_empty, empty, no_released_stores_pending
|
||||
input data_in, potential_push, push, load_pop, store_pop,
|
||||
output full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty, no_released_stores_pending
|
||||
);
|
||||
modport ls (
|
||||
output data_in, potential_push, push, pop,
|
||||
input full, data_out, valid, sq_empty, empty, no_released_stores_pending
|
||||
output data_in, potential_push, push, load_pop, store_pop,
|
||||
input full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty, no_released_stores_pending
|
||||
);
|
||||
endinterface
|
||||
|
||||
|
@ -228,12 +213,11 @@ interface store_queue_interface;
|
|||
//Issue inputs
|
||||
lsq_entry_t data_in;
|
||||
logic push;
|
||||
logic full;
|
||||
logic pop;
|
||||
|
||||
sq_entry_t data_out;
|
||||
|
||||
logic valid;
|
||||
logic pop;
|
||||
logic full;
|
||||
|
||||
//SQ status
|
||||
logic empty;
|
||||
|
@ -249,26 +233,29 @@ interface store_queue_interface;
|
|||
);
|
||||
endinterface
|
||||
|
||||
interface writeback_store_interface;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
interface cache_functions_interface #(parameter int TAG_W = 8, parameter int LINE_W = 4, parameter int SUB_LINE_W = 2);
|
||||
|
||||
id_t id_needed;
|
||||
logic possibly_waiting;
|
||||
logic waiting;
|
||||
logic ack;
|
||||
function logic [LINE_W-1:0] xor_mask (int WAY);
|
||||
for (int i = 0; i < LINE_W; i++)
|
||||
xor_mask[i] = ((WAY % 2) == 0) ? 1'b1 : 1'b0;
|
||||
endfunction
|
||||
|
||||
logic id_done;
|
||||
logic [31:0] data;
|
||||
function logic [LINE_W-1:0] getHashedLineAddr (logic[31:0] addr, int WAY);
|
||||
getHashedLineAddr = addr[2 + SUB_LINE_W +: LINE_W] ^ (addr[2 + SUB_LINE_W + LINE_W +: LINE_W] & xor_mask(WAY));
|
||||
endfunction
|
||||
|
||||
function logic[TAG_W-1:0] getTag(logic[31:0] addr);
|
||||
getTag = addr[2 + LINE_W + SUB_LINE_W +: TAG_W];
|
||||
endfunction
|
||||
|
||||
function logic [LINE_W-1:0] getTagLineAddr (logic[31:0] addr);
|
||||
getTagLineAddr = addr[2 + SUB_LINE_W +: LINE_W];
|
||||
endfunction
|
||||
|
||||
function logic [LINE_W+SUB_LINE_W-1:0] getDataLineAddr (logic[31:0] addr);
|
||||
getDataLineAddr = addr[2 +: LINE_W + SUB_LINE_W];
|
||||
endfunction
|
||||
|
||||
modport ls (
|
||||
input id_done, data,
|
||||
output id_needed, possibly_waiting ,waiting, ack
|
||||
);
|
||||
modport wb (
|
||||
input id_needed, possibly_waiting, waiting, ack,
|
||||
output id_done, data
|
||||
);
|
||||
endinterface
|
||||
|
||||
interface addr_utils_interface #(parameter bit [31:0] BASE_ADDR = 32'h00000000, parameter bit [31:0] UPPER_BOUND = 32'hFFFFFFFF);
|
||||
|
@ -285,6 +272,7 @@ interface addr_utils_interface #(parameter bit [31:0] BASE_ADDR = 32'h00000000,
|
|||
|
||||
localparam int unsigned BIT_RANGE = bit_range();
|
||||
|
||||
/* verilator lint_off SELRANGE */
|
||||
function address_range_check (input logic[31:0] addr);
|
||||
return (BIT_RANGE == 0) ? 1 : (addr[31:32-BIT_RANGE] == BASE_ADDR[31:32-BIT_RANGE]);
|
||||
endfunction
|
||||
|
@ -327,21 +315,31 @@ interface unsigned_division_interface #(parameter DATA_WIDTH = 32);
|
|||
modport divider (output remainder, quotient, done, input dividend, dividend_CLZ, divisor, divisor_CLZ, divisor_is_zero, start);
|
||||
endinterface
|
||||
|
||||
interface renamer_interface #(parameter NUM_WB_GROUPS = 2);
|
||||
import cva5_config::*;
|
||||
interface unsigned_sqrt_interface #(parameter DATA_WIDTH = 32);
|
||||
logic start;
|
||||
logic [DATA_WIDTH-1:0] radicand;
|
||||
logic [DATA_WIDTH-1:0] remainder;
|
||||
logic [DATA_WIDTH-1:0] result;
|
||||
logic done;
|
||||
|
||||
modport requester (input remainder, result, done, output radicand, start);
|
||||
modport sqrt (output remainder, result, done, input radicand, start);
|
||||
endinterface
|
||||
|
||||
interface renamer_interface #(parameter NUM_WB_GROUPS = 3, parameter READ_PORTS = 2);
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
rs_addr_t rd_addr;
|
||||
rs_addr_t rs_addr [REGFILE_READ_PORTS];
|
||||
rs_addr_t rs_addr [READ_PORTS];
|
||||
logic [$clog2(NUM_WB_GROUPS)-1:0] rd_wb_group;
|
||||
logic uses_rd;
|
||||
id_t id;
|
||||
|
||||
phys_addr_t phys_rs_addr [REGFILE_READ_PORTS];
|
||||
phys_addr_t phys_rs_addr [READ_PORTS];
|
||||
phys_addr_t phys_rd_addr;
|
||||
|
||||
logic [$clog2(NUM_WB_GROUPS)-1:0] rs_wb_group [REGFILE_READ_PORTS];
|
||||
logic [$clog2(NUM_WB_GROUPS)-1:0] rs_wb_group [READ_PORTS];
|
||||
|
||||
modport renamer (
|
||||
input rd_addr, rs_addr, rd_wb_group, uses_rd, id,
|
||||
|
@ -353,16 +351,14 @@ interface renamer_interface #(parameter NUM_WB_GROUPS = 2);
|
|||
);
|
||||
endinterface
|
||||
|
||||
interface register_file_issue_interface #(parameter NUM_WB_GROUPS = 2);
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
interface register_file_issue_interface #(parameter NUM_WB_GROUPS = 3, parameter DATA_WIDTH = 32, parameter READ_PORTS = 2);
|
||||
import cva5_types::*;
|
||||
|
||||
//read interface
|
||||
phys_addr_t phys_rs_addr [REGFILE_READ_PORTS];
|
||||
logic [$clog2(NUM_WB_GROUPS)-1:0] rs_wb_group [REGFILE_READ_PORTS];
|
||||
logic [31:0] data [REGFILE_READ_PORTS];
|
||||
logic inuse [REGFILE_READ_PORTS];
|
||||
phys_addr_t phys_rs_addr [READ_PORTS];
|
||||
logic [$clog2(NUM_WB_GROUPS)-1:0] rs_wb_group [READ_PORTS];
|
||||
logic [DATA_WIDTH-1:0] data [READ_PORTS];
|
||||
logic inuse [READ_PORTS];
|
||||
|
||||
//issue write interface
|
||||
phys_addr_t phys_rd_addr;
|
||||
|
@ -377,3 +373,36 @@ interface register_file_issue_interface #(parameter NUM_WB_GROUPS = 2);
|
|||
input data, inuse
|
||||
);
|
||||
endinterface
|
||||
|
||||
interface fp_intermediate_wb_interface;
|
||||
import cva5_types::*;
|
||||
import fpu_types::*;
|
||||
|
||||
logic ack;
|
||||
|
||||
id_t id;
|
||||
logic done;
|
||||
fp_t rd;
|
||||
logic expo_overflow;
|
||||
fflags_t fflags;
|
||||
rm_t rm;
|
||||
logic carry;
|
||||
logic safe;
|
||||
logic hidden;
|
||||
grs_t grs;
|
||||
fp_shift_amt_t clz;
|
||||
logic right_shift;
|
||||
fp_shift_amt_t right_shift_amt;
|
||||
logic subnormal;
|
||||
logic ignore_max_expo;
|
||||
logic d2s;
|
||||
|
||||
modport unit (
|
||||
input ack,
|
||||
output id, done, rd, expo_overflow, fflags, rm, hidden, grs, clz, carry, safe, subnormal, right_shift, right_shift_amt, ignore_max_expo, d2s
|
||||
);
|
||||
modport wb (
|
||||
output ack,
|
||||
input id, done, rd, expo_overflow, fflags, rm, hidden, grs, clz, carry, safe, subnormal, right_shift, right_shift_amt, ignore_max_expo, d2s
|
||||
);
|
||||
endinterface
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2020 Eric Matthews, Lesley Shannon
|
||||
* Copyright © 2017-2020 Eric Matthews, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -20,23 +20,7 @@
|
|||
* Eric Matthews <ematthew@sfu.ca>
|
||||
*/
|
||||
|
||||
module illegal_instruction_checker
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
|
||||
(
|
||||
input logic [31:0] instruction,
|
||||
output logic illegal_instruction
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Instruction Patterns for Illegal Instruction Checking
|
||||
|
||||
package opcodes;
|
||||
//Base ISA
|
||||
localparam [31:0] BEQ = 32'b?????????????????000?????1100011;
|
||||
localparam [31:0] BNE = 32'b?????????????????001?????1100011;
|
||||
|
@ -98,6 +82,62 @@ module illegal_instruction_checker
|
|||
localparam [31:0] REM = 32'b0000001??????????110?????0110011;
|
||||
localparam [31:0] REMU = 32'b0000001??????????111?????0110011;
|
||||
|
||||
//Single precision
|
||||
localparam [31:0] SP_FLW = 32'b?????????????????010?????0000111;
|
||||
localparam [31:0] SP_FSW = 32'b?????????????????010?????0100111;
|
||||
localparam [31:0] SP_FMADD = 32'b?????00??????????????????1000011;
|
||||
localparam [31:0] SP_FMSUB = 32'b?????00??????????????????1000111;
|
||||
localparam [31:0] SP_FNMSUB = 32'b?????00??????????????????1001011;
|
||||
localparam [31:0] SP_FNMADD = 32'b?????00??????????????????1001111;
|
||||
localparam [31:0] SP_FADD = 32'b0000000??????????????????1010011;
|
||||
localparam [31:0] SP_FSUB = 32'b0000100??????????????????1010011;
|
||||
localparam [31:0] SP_FMUL = 32'b0001000??????????????????1010011;
|
||||
localparam [31:0] SP_FDIV = 32'b0001100??????????????????1010011;
|
||||
localparam [31:0] SP_FSQRT = 32'b010110000000?????????????1010011;
|
||||
localparam [31:0] SP_FSGNJ = 32'b0010000??????????000?????1010011;
|
||||
localparam [31:0] SP_FSGNJN = 32'b0010000??????????001?????1010011;
|
||||
localparam [31:0] SP_FSGNJX = 32'b0010000??????????010?????1010011;
|
||||
localparam [31:0] SP_FMIN = 32'b0010100??????????000?????1010011;
|
||||
localparam [31:0] SP_FMAX = 32'b0010100??????????001?????1010011;
|
||||
localparam [31:0] SP_FCVT_W_S = 32'b110000000000?????????????1010011;
|
||||
localparam [31:0] SP_FCVT_WU_S = 32'b110000000001?????????????1010011;
|
||||
localparam [31:0] SP_FMV_X_W = 32'b111000000000?????000?????1010011;
|
||||
localparam [31:0] SP_FEQ = 32'b1010000??????????010?????1010011;
|
||||
localparam [31:0] SP_FLT = 32'b1010000??????????001?????1010011;
|
||||
localparam [31:0] SP_FLE = 32'b1010000??????????000?????1010011;
|
||||
localparam [31:0] SP_FCLASS = 32'b111000000000?????001?????1010011;
|
||||
localparam [31:0] SP_FCVT_S_W = 32'b110100000000?????????????1010011;
|
||||
localparam [31:0] SP_FCVT_S_WU = 32'b110100000001?????????????1010011;
|
||||
localparam [31:0] SP_FMV_W_X = 32'b111100000000?????000?????1010011;
|
||||
|
||||
//Double precision
|
||||
localparam [31:0] DP_FLD = 32'b?????????????????011?????0000111;
|
||||
localparam [31:0] DP_FSD = 32'b?????????????????011?????0100111;
|
||||
localparam [31:0] DP_FMADD = 32'b?????01??????????????????1000011;
|
||||
localparam [31:0] DP_FMSUB = 32'b?????01??????????????????1000111;
|
||||
localparam [31:0] DP_FNMSUB = 32'b?????01??????????????????1001011;
|
||||
localparam [31:0] DP_FNMADD = 32'b?????01??????????????????1001111;
|
||||
localparam [31:0] DP_FADD = 32'b0000001??????????????????1010011;
|
||||
localparam [31:0] DP_FSUB = 32'b0000101??????????????????1010011;
|
||||
localparam [31:0] DP_FMUL = 32'b0001001??????????????????1010011;
|
||||
localparam [31:0] DP_FDIV = 32'b0001101??????????????????1010011;
|
||||
localparam [31:0] DP_FSQRT = 32'b010110100000?????????????1010011;
|
||||
localparam [31:0] DP_FSGNJ = 32'b0010001??????????000?????1010011;
|
||||
localparam [31:0] DP_FSGNJN = 32'b0010001??????????001?????1010011;
|
||||
localparam [31:0] DP_FSGNJX = 32'b0010001??????????010?????1010011;
|
||||
localparam [31:0] DP_FMIN = 32'b0010101??????????000?????1010011;
|
||||
localparam [31:0] DP_FMAX = 32'b0010101??????????001?????1010011;
|
||||
localparam [31:0] DP_FCVT_S_D = 32'b010000000001?????????????1010011;
|
||||
localparam [31:0] DP_FCVT_D_S = 32'b010000100000?????????????1010011;
|
||||
localparam [31:0] DP_FEQ = 32'b1010001??????????010?????1010011;
|
||||
localparam [31:0] DP_FLT = 32'b1010001??????????001?????1010011;
|
||||
localparam [31:0] DP_FLE = 32'b1010001??????????000?????1010011;
|
||||
localparam [31:0] DP_FCLASS = 32'b111000100000?????001?????1010011;
|
||||
localparam [31:0] DP_FCVT_W_D = 32'b110000100000?????????????1010011;
|
||||
localparam [31:0] DP_FCVT_WU_D = 32'b110000100001?????????????1010011;
|
||||
localparam [31:0] DP_FCVT_D_W = 32'b110100100000?????????????1010011;
|
||||
localparam [31:0] DP_FCVT_D_WU = 32'b110100100001?????????????1010011;
|
||||
|
||||
//AMO
|
||||
localparam [31:0] AMO_ADD = 32'b00000????????????010?????0101111;
|
||||
localparam [31:0] AMO_XOR = 32'b00100????????????010?????0101111;
|
||||
|
@ -117,93 +157,12 @@ module illegal_instruction_checker
|
|||
localparam [31:0] SFENCE_VMA = 32'b0001001??????????000000001110011;
|
||||
localparam [31:0] WFI = 32'b00010000010100000000000001110011;
|
||||
|
||||
logic base_legal;
|
||||
logic csr_legal;
|
||||
logic csr_addr_base;
|
||||
logic csr_addr_machine;
|
||||
logic csr_addr_supervisor;
|
||||
logic csr_addr_debug;
|
||||
logic mul_legal;
|
||||
logic div_legal;
|
||||
logic ifence_legal;
|
||||
logic amo_legal;
|
||||
logic machine_legal;
|
||||
logic supervisor_legal;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Cache
|
||||
localparam [31:0] CBO_INVAL = 32'b000000000000?????010000000001111;
|
||||
localparam [31:0] CBO_CLEAN = 32'b000000000001?????010000000001111;
|
||||
localparam [31:0] CBO_FLUSH = 32'b000000000010?????010000000001111;
|
||||
|
||||
assign base_legal = instruction inside {
|
||||
BEQ, BNE, BLT, BGE, BLTU, BGEU, JALR, JAL, LUI, AUIPC,
|
||||
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND,
|
||||
LB, LH, LW, LBU, LHU, SB, SH, SW,
|
||||
FENCE
|
||||
};
|
||||
|
||||
assign csr_addr_base = instruction[31:20] inside {
|
||||
FFLAGS, FRM, FCSR,
|
||||
CYCLE, TIME, INSTRET, CYCLEH, TIMEH, INSTRETH
|
||||
};
|
||||
localparam [31:0] CUSTOM = 32'b?????????????????????????1111011;
|
||||
|
||||
assign csr_addr_machine = instruction[31:20] inside {
|
||||
MVENDORID, MARCHID, MIMPID, MHARTID,
|
||||
MSTATUS, MISA, MEDELEG, MIDELEG, MIE, MTVEC, MCOUNTEREN,
|
||||
MSCRATCH, MEPC, MCAUSE, MTVAL, MIP,
|
||||
MCYCLE, MINSTRET, MCYCLEH, MINSTRETH
|
||||
};
|
||||
|
||||
assign csr_addr_supervisor = instruction[31:20] inside {
|
||||
SSTATUS, SEDELEG, SIDELEG, SIE, STVEC, SCOUNTEREN,
|
||||
SSCRATCH, SEPC, SCAUSE, STVAL, SIP,
|
||||
SATP
|
||||
};
|
||||
|
||||
assign csr_addr_debug = instruction[31:20] inside {
|
||||
DCSR, DPC, DSCRATCH
|
||||
};
|
||||
|
||||
//Privilege check done later on instruction issue
|
||||
//Here we just check instruction encoding and valid CSR address
|
||||
assign csr_legal = instruction inside {
|
||||
CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI
|
||||
} && (
|
||||
csr_addr_base |
|
||||
(CONFIG.INCLUDE_M_MODE & csr_addr_machine) |
|
||||
(CONFIG.INCLUDE_S_MODE & csr_addr_supervisor)
|
||||
);
|
||||
|
||||
assign mul_legal = instruction inside {
|
||||
MUL, MULH, MULHSU, MULHU
|
||||
};
|
||||
|
||||
assign div_legal = instruction inside {
|
||||
DIV, DIVU, REM, REMU
|
||||
};
|
||||
|
||||
assign ifence_legal = instruction inside {FENCE_I};
|
||||
|
||||
assign amo_legal = instruction inside {
|
||||
AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP,
|
||||
LR, SC
|
||||
};
|
||||
|
||||
assign machine_legal = instruction inside {
|
||||
MRET, ECALL, EBREAK
|
||||
};
|
||||
|
||||
assign supervisor_legal = instruction inside {
|
||||
SRET, SFENCE_VMA, WFI
|
||||
};
|
||||
|
||||
assign illegal_instruction = ~(
|
||||
base_legal |
|
||||
(CONFIG.INCLUDE_CSRS & csr_legal) |
|
||||
(CONFIG.INCLUDE_MUL & mul_legal) |
|
||||
(CONFIG.INCLUDE_DIV & div_legal) |
|
||||
(CONFIG.INCLUDE_AMO & amo_legal) |
|
||||
(CONFIG.INCLUDE_IFENCE & ifence_legal) |
|
||||
(CONFIG.INCLUDE_M_MODE & machine_legal) |
|
||||
(CONFIG.INCLUDE_S_MODE & supervisor_legal)
|
||||
);
|
||||
|
||||
endmodule
|
||||
endpackage
|
|
@ -35,7 +35,8 @@ package riscv_types;
|
|||
logic [4:0] rs1_addr;
|
||||
logic [2:0] fn3;
|
||||
logic [4:0] rd_addr;
|
||||
logic [6:0] opcode;
|
||||
logic [6:2] upper_opcode;
|
||||
logic [1:0] lower_opcode;
|
||||
} common_instruction_t;
|
||||
|
||||
typedef enum logic [4:0] {
|
||||
|
@ -51,7 +52,14 @@ package riscv_types;
|
|||
FENCE_T = 5'b00011,
|
||||
AMO_T = 5'b01011,
|
||||
SYSTEM_T = 5'b11100,
|
||||
//end of RV32I
|
||||
FPU_LOAD_T = 5'b00001,
|
||||
FPU_STORE_T = 5'b01001,
|
||||
FPU_MADD_T = 5'b10000,
|
||||
FPU_MSUB_T = 5'b10001,
|
||||
FPU_NMSUB_T = 5'b10010,
|
||||
FPU_NMADD_T = 5'b10011,
|
||||
FPU_OP_T = 5'b10100,
|
||||
//end of RV32IMD
|
||||
CUSTOM_T = 5'b11110
|
||||
} opcodes_trimmed_t;
|
||||
|
||||
|
@ -70,7 +78,7 @@ package riscv_types;
|
|||
LS_B_fn3 = 3'b000,
|
||||
LS_H_fn3 = 3'b001,
|
||||
LS_W_fn3 = 3'b010,
|
||||
//unused 011
|
||||
LS_D_fn3 = 3'b011,
|
||||
L_BU_fn3 = 3'b100,
|
||||
L_HU_fn3 = 3'b101
|
||||
//unused 110
|
||||
|
@ -282,4 +290,39 @@ package riscv_types;
|
|||
logic [XLEN-1:0] t5;
|
||||
logic [XLEN-1:0] t6;
|
||||
} simulation_named_regfile;
|
||||
|
||||
typedef struct packed{
|
||||
logic [FLEN-1:0] ft0;
|
||||
logic [FLEN-1:0] ft1;
|
||||
logic [FLEN-1:0] ft2;
|
||||
logic [FLEN-1:0] ft3;
|
||||
logic [FLEN-1:0] ft4;
|
||||
logic [FLEN-1:0] ft5;
|
||||
logic [FLEN-1:0] ft6;
|
||||
logic [FLEN-1:0] ft7;
|
||||
logic [FLEN-1:0] fs0;
|
||||
logic [FLEN-1:0] fs1;
|
||||
logic [FLEN-1:0] fa0;
|
||||
logic [FLEN-1:0] fa1;
|
||||
logic [FLEN-1:0] fa2;
|
||||
logic [FLEN-1:0] fa3;
|
||||
logic [FLEN-1:0] fa4;
|
||||
logic [FLEN-1:0] fa5;
|
||||
logic [FLEN-1:0] fa6;
|
||||
logic [FLEN-1:0] fa7;
|
||||
logic [FLEN-1:0] fs2;
|
||||
logic [FLEN-1:0] fs3;
|
||||
logic [FLEN-1:0] fs4;
|
||||
logic [FLEN-1:0] fs5;
|
||||
logic [FLEN-1:0] fs6;
|
||||
logic [FLEN-1:0] fs7;
|
||||
logic [FLEN-1:0] fs8;
|
||||
logic [FLEN-1:0] fs9;
|
||||
logic [FLEN-1:0] fs10;
|
||||
logic [FLEN-1:0] fs11;
|
||||
logic [FLEN-1:0] ft8;
|
||||
logic [FLEN-1:0] ft9;
|
||||
logic [FLEN-1:0] ft10;
|
||||
logic [FLEN-1:0] ft11;
|
||||
} fp_simulation_named_regfile;
|
||||
endpackage
|
|
@ -23,111 +23,57 @@
|
|||
module writeback
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG,
|
||||
parameter int unsigned NUM_UNITS [CONFIG.NUM_WB_GROUPS] = '{1, 4},
|
||||
parameter int unsigned NUM_WB_UNITS = 5
|
||||
parameter int unsigned NUM_WB_UNITS = 5,
|
||||
parameter unit_id_enum_t [MAX_NUM_UNITS-1:0] WB_INDEX = '{0: ALU_ID, 1: MUL_ID, 2: DIV_ID, 3: LS_ID, 4: CSR_ID, 5: FPU_ID, default: NON_WRITEBACK_ID}
|
||||
)
|
||||
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
//Unit writeback
|
||||
unit_writeback_interface.wb unit_wb[NUM_WB_UNITS],
|
||||
unit_writeback_interface.wb unit_wb[MAX_NUM_UNITS],
|
||||
//WB output
|
||||
output wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
|
||||
//Snoop interface (LS unit)
|
||||
output wb_packet_t wb_snoop
|
||||
output wb_packet_t wb_packet
|
||||
);
|
||||
|
||||
//Writeback
|
||||
logic [NUM_WB_UNITS-1:0] unit_ack [CONFIG.NUM_WB_GROUPS];
|
||||
//aliases for write-back-interface signals
|
||||
id_t [NUM_WB_UNITS-1:0] unit_instruction_id [CONFIG.NUM_WB_GROUPS];
|
||||
logic [NUM_WB_UNITS-1:0] unit_done [CONFIG.NUM_WB_GROUPS];
|
||||
id_t [NUM_WB_UNITS-1:0] unit_instruction_id;
|
||||
logic [NUM_WB_UNITS-1:0] unit_done;
|
||||
logic [31:0] unit_rd [NUM_WB_UNITS];
|
||||
logic [NUM_WB_UNITS-1:0] unit_ack;
|
||||
|
||||
typedef logic [XLEN-1:0] unit_rd_t [NUM_WB_UNITS];
|
||||
unit_rd_t unit_rd [CONFIG.NUM_WB_GROUPS];
|
||||
//Per-ID muxes for commit buffer
|
||||
logic [$clog2(NUM_WB_UNITS)-1:0] unit_sel [CONFIG.NUM_WB_GROUPS];
|
||||
localparam int unsigned LOG2_NUM_WB_UNITS = (NUM_WB_UNITS == 1) ? 1 : $clog2(NUM_WB_UNITS);
|
||||
logic [LOG2_NUM_WB_UNITS-1:0] unit_sel;
|
||||
|
||||
typedef int unsigned unit_count_t [CONFIG.NUM_WB_GROUPS];
|
||||
|
||||
function unit_count_t get_cumulative_unit_count();
|
||||
unit_count_t counts;
|
||||
int unsigned cumulative_count = 0;
|
||||
for (int i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin
|
||||
counts[i] = cumulative_count;
|
||||
cumulative_count += NUM_UNITS[i];
|
||||
end
|
||||
return counts;
|
||||
endfunction
|
||||
|
||||
localparam unit_count_t CUMULATIVE_NUM_UNITS = get_cumulative_unit_count();
|
||||
|
||||
genvar i, j;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
//Re-assigning interface inputs to array types so that they can be dynamically indexed
|
||||
generate
|
||||
for (i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin : gen_wb_group_unpacking
|
||||
for (j = 0; j < NUM_UNITS[i]; j++) begin : gen_wb_unit_unpacking
|
||||
assign unit_instruction_id[i][j] = unit_wb[CUMULATIVE_NUM_UNITS[i] + j].id;
|
||||
assign unit_done[i][j] = unit_wb[CUMULATIVE_NUM_UNITS[i] + j].done;
|
||||
assign unit_wb[CUMULATIVE_NUM_UNITS[i] + j].ack = unit_ack[i][j];
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
//As units are selected for commit ports based on their unit ID,
|
||||
//for each additional commit port one unit can be skipped for the commit mux
|
||||
generate
|
||||
for (i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin : gen_wb_port_grouping
|
||||
for (j = 0; j < NUM_UNITS[i]; j++) begin : gen_wb_unit_grouping
|
||||
assign unit_rd[i][j] = unit_wb[CUMULATIVE_NUM_UNITS[i] + j].rd;
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
generate for (genvar i = 0; i < NUM_WB_UNITS; i++) begin : gen_wb_unit_unpacking
|
||||
assign unit_instruction_id[i] = unit_wb[WB_INDEX[i]].id;
|
||||
assign unit_done[i] = unit_wb[WB_INDEX[i]].done;
|
||||
assign unit_rd[i] = unit_wb[WB_INDEX[i]].rd;
|
||||
assign unit_wb[WB_INDEX[i]].ack = unit_ack[i];
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit select for register file
|
||||
//Iterating through all commit ports:
|
||||
// Search for complete units (in fixed unit order)
|
||||
// Assign to a commit port, mask that unit and commit port
|
||||
generate for (i = 0; i < CONFIG.NUM_WB_GROUPS; i++) begin : gen_wb_mux
|
||||
priority_encoder
|
||||
#(.WIDTH(NUM_UNITS[i]))
|
||||
unit_done_encoder
|
||||
(
|
||||
.priority_vector (unit_done[i][NUM_UNITS[i]-1 : 0]),
|
||||
.encoded_result (unit_sel[i][NUM_UNITS[i] == 1 ? 0 : ($clog2(NUM_UNITS[i])-1) : 0])
|
||||
);
|
||||
assign wb_packet[i].valid = |unit_done[i];
|
||||
assign wb_packet [i].id = unit_instruction_id[i][unit_sel[i]];
|
||||
assign wb_packet[i].data = unit_rd[i][unit_sel[i]];
|
||||
priority_encoder #(.WIDTH(NUM_WB_UNITS))
|
||||
unit_done_encoder
|
||||
(
|
||||
.priority_vector (unit_done),
|
||||
.encoded_result (unit_sel)
|
||||
);
|
||||
assign wb_packet = '{
|
||||
valid : |unit_done,
|
||||
id : unit_instruction_id[unit_sel],
|
||||
data : unit_rd[unit_sel]
|
||||
};
|
||||
|
||||
assign unit_ack[i] = NUM_WB_UNITS'(wb_packet[i].valid) << unit_sel[i];
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Store Forwarding Support
|
||||
//TODO: support additional writeback groups
|
||||
//currently limited to one writeback group with the
|
||||
//assumption that writeback group zero has single-cycle
|
||||
//operation
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
wb_snoop.valid <= 0;
|
||||
else
|
||||
wb_snoop.valid <= wb_packet[1].valid;
|
||||
end
|
||||
always_ff @ (posedge clk) begin
|
||||
wb_snoop.data <= wb_packet[1].data;
|
||||
wb_snoop.id <= wb_packet[1].id;
|
||||
end
|
||||
assign unit_ack = NUM_WB_UNITS'(wb_packet.valid) << unit_sel;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
|
|
|
@ -37,13 +37,10 @@ module l1_to_wishbone
|
|||
|
||||
localparam MAX_REQUESTS = 32;
|
||||
|
||||
fifo_interface #(.DATA_WIDTH($bits(l2_request_t))) request_fifo ();
|
||||
fifo_interface #(.DATA_WIDTH($bits(l2_data_request_t))) data_fifo ();
|
||||
fifo_interface #(.DATA_TYPE(l2_request_t)) request_fifo ();
|
||||
fifo_interface #(.DATA_TYPE(l2_data_request_t)) data_fifo ();
|
||||
|
||||
l2_request_t request_in;
|
||||
l2_request_t request;
|
||||
|
||||
l2_data_request_t data_request_in;
|
||||
l2_data_request_t data_request;
|
||||
|
||||
logic request_complete;
|
||||
|
@ -53,34 +50,34 @@ module l1_to_wishbone
|
|||
assign cpu.data_full = data_fifo.full;
|
||||
|
||||
//Repack input attributes
|
||||
assign request_in.addr = cpu.addr;
|
||||
assign request_in.rnw = cpu.rnw;
|
||||
assign request_in.is_amo = cpu.is_amo;
|
||||
assign request_in.amo_type_or_burst_size = cpu.amo_type_or_burst_size;
|
||||
assign request_in.sub_id = cpu.sub_id;
|
||||
|
||||
assign request_fifo.data_in = '{
|
||||
addr : cpu.addr,
|
||||
rnw : cpu.rnw,
|
||||
is_amo : cpu.is_amo,
|
||||
amo_type_or_burst_size : cpu.amo_type_or_burst_size,
|
||||
sub_id : cpu.sub_id
|
||||
};
|
||||
assign request_fifo.push = cpu.request_push;
|
||||
assign request_fifo.potential_push = cpu.request_push;
|
||||
assign request_fifo.pop = request_complete;
|
||||
assign request_fifo.data_in = request_in;
|
||||
assign request = request_fifo.data_out;
|
||||
|
||||
assign data_request_in.data = cpu.wr_data;
|
||||
assign data_request_in.be = cpu.wr_data_be;
|
||||
|
||||
assign data_fifo.push = cpu.wr_data_push;
|
||||
assign data_fifo.potential_push = cpu.wr_data_push;
|
||||
assign data_fifo.pop = wishbone.we & wishbone.ack;
|
||||
assign data_fifo.data_in = data_request_in;
|
||||
assign data_fifo.data_in = '{
|
||||
data : cpu.wr_data,
|
||||
be : cpu_wr_data_be
|
||||
};
|
||||
assign data_request = data_fifo.data_out;
|
||||
|
||||
cva5_fifo #(.DATA_WIDTH($bits(l2_request_t)), .FIFO_DEPTH(MAX_REQUESTS))
|
||||
cva5_fifo #(.DATA_TYPE(l2_request_t), .FIFO_DEPTH(MAX_REQUESTS))
|
||||
request_fifo_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.fifo (request_fifo)
|
||||
);
|
||||
cva5_fifo #(.DATA_WIDTH($bits(l2_data_request_t)), .FIFO_DEPTH(MAX_REQUESTS))
|
||||
cva5_fifo #(.DATA_TYPE(l2_data_request_t), .FIFO_DEPTH(MAX_REQUESTS))
|
||||
data_fifo_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
|
|
|
@ -74,15 +74,29 @@ module litex_wrapper
|
|||
input logic idbus_err
|
||||
);
|
||||
|
||||
|
||||
localparam wb_group_config_t MINIMAL_WB_GROUP_CONFIG = '{
|
||||
0 : '{0: ALU_ID, default : NON_WRITEBACK_ID},
|
||||
1 : '{0: LS_ID, 1: CSR_ID, default : NON_WRITEBACK_ID},
|
||||
default : '{default : NON_WRITEBACK_ID}
|
||||
};
|
||||
|
||||
localparam cpu_config_t MINIMAL_CONFIG = '{
|
||||
//ISA options
|
||||
INCLUDE_M_MODE : 1,
|
||||
INCLUDE_S_MODE : 0,
|
||||
INCLUDE_U_MODE : 0,
|
||||
INCLUDE_MUL : 0,
|
||||
INCLUDE_DIV : 0,
|
||||
INCLUDE_UNIT : '{
|
||||
ALU : 1,
|
||||
LS : 1,
|
||||
MUL : 0,
|
||||
DIV : 0,
|
||||
CSR : 1,
|
||||
CUSTOM : 0,
|
||||
BR : 1,
|
||||
IEC : 1
|
||||
},
|
||||
INCLUDE_IFENCE : 0,
|
||||
INCLUDE_CSRS : 1,
|
||||
INCLUDE_AMO : 0,
|
||||
//CSR constants
|
||||
CSRS : '{
|
||||
|
@ -102,6 +116,7 @@ module litex_wrapper
|
|||
},
|
||||
//Memory Options
|
||||
SQ_DEPTH : 2,
|
||||
INCLUDE_FORWARDING_TO_STORES : 0,
|
||||
INCLUDE_ICACHE : 0,
|
||||
ICACHE_ADDR : '{
|
||||
L: 32'h40000000,
|
||||
|
@ -171,7 +186,15 @@ module litex_wrapper
|
|||
RAS_ENTRIES : 8
|
||||
},
|
||||
//Writeback Options
|
||||
NUM_WB_GROUPS : 2
|
||||
NUM_WB_GROUPS : 2,
|
||||
WB_GROUP : MINIMAL_WB_GROUP_CONFIG
|
||||
};
|
||||
|
||||
localparam wb_group_config_t STANDARD_WB_GROUP_CONFIG = '{
|
||||
0 : '{0: ALU_ID, default : NON_WRITEBACK_ID},
|
||||
1 : '{0: LS_ID, default : NON_WRITEBACK_ID},
|
||||
2 : '{0: MUL_ID, 1: DIV_ID, 2: CSR_ID, 3: CUSTOM_ID, default : NON_WRITEBACK_ID},
|
||||
default : '{default : NON_WRITEBACK_ID}
|
||||
};
|
||||
|
||||
localparam cpu_config_t STANDARD_CONFIG = '{
|
||||
|
@ -179,10 +202,17 @@ module litex_wrapper
|
|||
INCLUDE_M_MODE : 1,
|
||||
INCLUDE_S_MODE : 0,
|
||||
INCLUDE_U_MODE : 0,
|
||||
INCLUDE_MUL : 1,
|
||||
INCLUDE_DIV : 1,
|
||||
INCLUDE_UNIT : '{
|
||||
ALU : 1,
|
||||
LS : 1,
|
||||
MUL : 1,
|
||||
DIV : 1,
|
||||
CSR : 1,
|
||||
CUSTOM : 0,
|
||||
BR : 1,
|
||||
IEC : 1
|
||||
},
|
||||
INCLUDE_IFENCE : 0,
|
||||
INCLUDE_CSRS : 1,
|
||||
INCLUDE_AMO : 0,
|
||||
//CSR constants
|
||||
CSRS : '{
|
||||
|
@ -202,6 +232,7 @@ module litex_wrapper
|
|||
},
|
||||
//Memory Options
|
||||
SQ_DEPTH : 4,
|
||||
INCLUDE_FORWARDING_TO_STORES : 1,
|
||||
INCLUDE_ICACHE : 1,
|
||||
ICACHE_ADDR : '{
|
||||
L : 32'h00000000,
|
||||
|
@ -271,7 +302,8 @@ module litex_wrapper
|
|||
RAS_ENTRIES : 8
|
||||
},
|
||||
//Writeback Options
|
||||
NUM_WB_GROUPS : 2
|
||||
NUM_WB_GROUPS : 3,
|
||||
WB_GROUP : STANDARD_WB_GROUP_CONFIG
|
||||
};
|
||||
|
||||
function cpu_config_t config_select (input integer variant);
|
||||
|
@ -290,7 +322,6 @@ module litex_wrapper
|
|||
avalon_interface m_avalon();
|
||||
local_memory_interface instruction_bram();
|
||||
local_memory_interface data_bram();
|
||||
trace_outputs_t tr;
|
||||
interrupt_t s_interrupt;
|
||||
|
||||
//L2 to Wishbone
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue