Initial commit

This commit is contained in:
Eric Matthews 2017-08-31 19:28:31 -07:00
commit 56c889adb9
53 changed files with 7289 additions and 0 deletions

6
README.md Normal file
View file

@ -0,0 +1,6 @@
Taiga
================
Taiga is a 32-bit RISC-V processor designed for FPGAs supporting the Multiply/Divide and Atomic extensions (RV32IMA). The processor is written in SystemVerilog and has been designed to be both highly extensible and highly configurable.
Taiga is licensed under the Mozilla Public License, v. 2.0 (http://mozilla.org/MPL/2.0/.)

93
core/alu_unit.sv Normal file
View file

@ -0,0 +1,93 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module alu_unit(
input logic clk,
input logic rst,
func_unit_ex_interface.unit alu_ex,
unit_writeback_interface.unit alu_wb,
input alu_inputs_t alu_inputs
);
logic [XLEN:0] add_sub_result;
logic [XLEN-1:0] logic_result;
logic [XLEN-1:0] result;
logic done;
logic[XLEN:0] add_sub_1;
logic[XLEN:0] add_sub_2;
logic[XLEN-1:0] shifter_result;
assign add_sub_1 = {(alu_inputs.in1[XLEN-1] & ~alu_inputs.sltu), alu_inputs.in1};
assign add_sub_2 = {(alu_inputs.in2[XLEN-1] & ~alu_inputs.sltu), alu_inputs.in2};
//Add sub op
assign add_sub_result = alu_inputs.add ? add_sub_1 + add_sub_2 : add_sub_1 - add_sub_2;
//Barrel Shifter (initial bit flipping occurs in decode/issue stage)
barrel_shifter shifter (
.shifter_input(alu_inputs.shifter_in),
.shift_amount(alu_inputs.in2[4:0]),
.arith(alu_inputs.arith),
.left_shift(alu_inputs.left_shift),
.shifted_result(shifter_result)
);
//Logic ops
always_comb begin
unique case (alu_inputs.fn3)// <- only 3 of 8 cases
XOR_fn3 : logic_result = alu_inputs.in1 ^ alu_inputs.in2;
OR_fn3 : logic_result = alu_inputs.in1 | alu_inputs.in2;
AND_fn3 : logic_result = alu_inputs.in1 & alu_inputs.in2;
endcase
end
//Result mux
always_comb begin
case (alu_inputs.op)
ALU_SLT : result = {31'b0, add_sub_result[XLEN]};
ALU_LOGIC : result = logic_result;
ALU_SHIFT : result = shifter_result;
ALU_ADD_SUB : result = add_sub_result[XLEN-1:0];
endcase
end
assign alu_ex.ready = ~done | (done & alu_wb.accepted);
assign alu_wb.rd = result;
always_ff @(posedge clk) begin
if (rst) begin
done <= 0;
end else if (alu_ex.new_request_dec) begin
done <= 1;
end else if (alu_wb.accepted) begin
done <= 0;
end
end
assign alu_wb.done = done;
assign alu_wb.early_done = alu_ex.new_request_dec | (done & ~alu_wb.accepted);
endmodule

42
core/amo_alu.sv Normal file
View file

@ -0,0 +1,42 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module amo_alu(
input amo_alu_inputs_t amo_alu_inputs,
output logic[31:0] result
);
always_comb begin
unique case (amo_alu_inputs.op)// <--unique as not all codes are in use
AMO_SWAP : result = amo_alu_inputs.rs2;
AMO_ADD : result = amo_alu_inputs.rs1_load + amo_alu_inputs.rs2;
AMO_XOR : result = amo_alu_inputs.rs1_load ^ amo_alu_inputs.rs2;
AMO_AND : result = amo_alu_inputs.rs1_load & amo_alu_inputs.rs2;
AMO_OR : result = amo_alu_inputs.rs1_load | amo_alu_inputs.rs2;
AMO_MIN : result = {1'b1, 30'b0};
AMO_MAX : result = {1'b0, {30{1'b1}}};
AMO_MINU : result = '0;
AMO_MAXU : result = '1;
endcase
end
endmodule

147
core/avalon_master.vhd Normal file
View file

@ -0,0 +1,147 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.numeric_std.ALL;
USE ieee.math_real.log2;
USE ieee.math_real.ceil;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
entity avalon_master is
Port (
clk : in std_logic;
rst : in std_logic;
--Bus ports
addr : out std_logic_vector(31 downto 0);
avread : out std_logic;
avwrite : out std_logic;
byteenable : out std_logic_vector(3 downto 0);
readdata : in std_logic_vector(31 downto 0);
writedata : out std_logic_vector(31 downto 0);
waitrequest : in std_logic;
readdatavalid : in std_logic;
writeresponsevalid : in std_logic;
--L/S interface
addr_in : in std_logic_vector(31 downto 0);
data_in : in std_logic_vector(31 downto 0);
data_out : out std_logic_vector(31 downto 0);
data_valid : out std_logic;
ready : out std_logic;
new_request : in std_logic;
rnw : in std_logic;
be : in std_logic_vector(3 downto 0);
data_ack : in std_logic
);
end avalon_master;
architecture Behavioral of avalon_master is
signal rnw_r : std_logic;
begin
process (clk) is
begin
if (clk'event and clk = '1') then
if (new_request = '1') then
rnw_r <= rnw;
addr <= addr_in;
end if;
end if;
end process;
process (clk) is
begin
if (clk'event and clk = '1') then
if (rst = '1') then
ready <= '1';
elsif (new_request = '1') then
ready <= '0';
elsif ((data_ack = '1' and rnw_r = '1') or (waitrequest = '0' and rnw_r = '0')) then
ready <= '1';
end if;
end if;
end process;
process (clk) is
begin
if (clk'event and clk = '1') then
if (rst = '1') then
avread <= '0';
elsif (new_request = '1' and rnw = '1') then
avread <= '1';
elsif (waitrequest = '0') then
avread <= '0';
end if;
end if;
end process;
process (clk) is
begin
if (clk'event and clk = '1') then
if (rst = '1') then
avwrite <= '0';
elsif (new_request = '1' and rnw = '0') then
avwrite <= '1';
elsif (waitrequest = '0') then
avwrite <= '0';
end if;
end if;
end process;
process (clk) is
begin
if (clk'event and clk = '1') then
if (new_request = '1' and rnw = '0') then
writedata <= data_in;
byteenable <= be;
end if;
end if;
end process;
process (clk) is
begin
if (clk'event and clk = '1') then
if (rnw_r = '1' and waitrequest = '0') then
data_out <= readdata;
end if;
end if;
end process;
process (clk) is
begin
if (clk'event and clk = '1') then
if(rst = '1') then
data_valid <= '0';
elsif(data_ack = '1') then
data_valid <= '0';
elsif (rnw_r = '1' and waitrequest = '0') then
data_valid <= '1';
end if;
end if;
end process;
end Behavioral;

113
core/axi_master.sv Normal file
View file

@ -0,0 +1,113 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module axi_master
(
input logic clk,
input logic rst,
axi_interface.master m_axi,
input logic [2:0] size,
output logic[31:0] data_out,
input data_access_shared_inputs_t ls_inputs,
ls_sub_unit_interface.sub_unit ls
);
logic ready;
//read constants
assign m_axi.arlen = 0; // 1 request
assign m_axi.arburst = 0;// burst type does not matter
assign m_axi.rready = 1; //always ready to receive data
always_ff @ (posedge clk) begin
if (ls.new_request) begin
m_axi.araddr <= ls_inputs.addr;
m_axi.arsize <= size;
m_axi.awsize <= size;
m_axi.awaddr <= ls_inputs.addr;
m_axi.wdata <= ls_inputs.data_in;
m_axi.wstrb <= ls_inputs.be;
end
end
//write constants
assign m_axi.awlen = 0;
assign m_axi.awburst = 0;
assign m_axi.bready = 1;
always_ff @ (posedge clk) begin
if (rst)
ready <= 1;
else if (ls.new_request)
ready <= 0;
else if (ls.ack | m_axi.bvalid)
ready <= 1;
end
assign ls.ready = ready;
always_ff @ (posedge clk) begin
if (rst)
ls.data_valid <= 0;
else if (m_axi.rvalid)
ls.data_valid <= 1;
else if (ls.ack)
ls.data_valid <= 0;
end
//read channel
always_ff @ (posedge clk) begin
if (rst)
m_axi.arvalid <= 0;
else if (ls.new_request & ls_inputs.load)
m_axi.arvalid <= 1;
else if (m_axi.arready)
m_axi.arvalid <= 0;
end
always_ff @ (posedge clk) begin
if (m_axi.rvalid)
data_out <= m_axi.rdata;
end
//write channel
always_ff @ (posedge clk) begin
if (rst)
m_axi.awvalid <= 0;
else if (ls.new_request & ls_inputs.store)
m_axi.awvalid <= 1;
else if (m_axi.awready)
m_axi.awvalid <= 0;
end
always_ff @ (posedge clk) begin
if (rst)
m_axi.wvalid <= 0;
else if (ls.new_request & ls_inputs.store)
m_axi.wvalid <= 1;
else if (m_axi.wready)
m_axi.wvalid <= 0;
end
assign m_axi.wlast = m_axi.wvalid;
endmodule

257
core/axi_to_arb.sv Normal file
View file

@ -0,0 +1,257 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
import l2_config_and_types::*;
module axi_to_arb
(
input logic clk,
input logic rst,
//read addr channel
input logic axi_arready,
output logic axi_arvalid,
output logic[31:0] axi_araddr,
output logic[3:0] axi_arlen,
output logic[2:0] axi_arsize,
output logic[1:0] axi_arburst,
output logic[2:0] axi_arprot,
output logic[3:0] axi_arcache,
output logic[4:0] axi_arid,
//read data channel
output logic axi_rready,
input logic axi_rvalid,
input logic[31:0] axi_rdata,
input logic[1:0] axi_rresp,
input logic axi_rlast,
input logic[4:0] axi_rid,
//write addr channel
input logic axi_awready,
output logic axi_awvalid,
output logic [31:0] axi_awaddr,
output logic [7:0] axi_awlen,
output logic [2:0] axi_awsize,
output logic [1:0] axi_awburst,
output logic[3:0] axi_awcache,
output logic[2:0] axi_awprot,
//write data
input logic axi_wready,
output logic axi_wvalid,
output logic [31:0] axi_wdata,
output logic [3:0] axi_wstrb,
output logic axi_wlast,
//write response
output logic axi_bready,
input logic axi_bvalid,
input logic [1:0] axi_bresp,
//arb interface
l2_memory_interface.memory l2
);
logic pop_request;
logic read_modify_write;
logic read_modify_write_in_progress;
logic address_phase_complete;
logic [31:0] amo_result;
logic [31:0] amo_result_r;
logic [DCACHE_SUB_LINE_ADDR_W-1:0] read_count;
logic amo_write_ready;
logic[4:0] write_reference_burst_count;
amo_alu_inputs_t amo_alu_inputs;
logic write_in_progress;
logic write_transfer_complete;
logic pop;
logic[4:0] write_burst_count;
logic[4:0] burst_count, burst_count_r;
logic on_last_burst;
//AMO read modify write support ****************************************************
assign read_modify_write = l2.request.is_amo && (l2.request.amo_type_or_burst_size != AMO_LR || l2.request.amo_type_or_burst_size != AMO_SC);
always_ff @ (posedge clk) begin
if (rst)
read_modify_write_in_progress <= 0;
else if (axi_bvalid)
read_modify_write_in_progress <= 0;
else if (l2.request_valid & read_modify_write)
read_modify_write_in_progress <= 1;
end
always_ff @ (posedge clk) begin
if (rst)
address_phase_complete <= 0;
else if (pop)
address_phase_complete <= 0;
else if (axi_arvalid & axi_arready)
address_phase_complete <= 1;
end
//TODO: if the data cache is made non-blocking on a miss then we could capture a previous request here instead of the one we just issued
//safe under current circumstances as subsequent load won't be issued until the first one returns.
always_ff @ (posedge clk) begin
if (rst)
read_count <= 0;
else if (axi_rvalid && (axi_rid == l2.request.id))
read_count <= read_count + 1;
end
assign amo_alu_inputs.rs1_load = axi_rdata;
assign amo_alu_inputs.rs2 = l2.wr_data;
assign amo_alu_inputs.op = l2.request.amo_type_or_burst_size;
amo_alu amo_unit (.*, .result(amo_result));
//TODO: assumption that all data caches have same line size, would have to update wrt the burst size to be safe if they have different line lengths
//also update araddr
always_ff @ (posedge clk) begin
if (axi_rvalid && (read_count == l2.request.addr[DCACHE_SUB_LINE_ADDR_W:0]))
amo_result_r <= amo_result;
end
always_ff @ (posedge clk) begin
if (rst)
amo_write_ready <= 0;
else if (pop)
amo_write_ready <= 0;
else if (l2.request.is_amo && axi_rvalid && read_count == l2.request.addr[DCACHE_SUB_LINE_ADDR_W:0])
amo_write_ready <= 1;
end
//End AMO
assign burst_count = l2.request.amo_type_or_burst_size;
//read constants
assign axi_arlen = burst_count; //
assign axi_arburst = 2'b01;// INCR
assign axi_rready = 1; //always ready to receive data
assign axi_arsize = 3'b010;//4 bytes
assign axi_arcache = 4'b0011; //bufferable cacheable memory
assign axi_arport = '0;
assign axi_arid = l2.request.id;
assign axi_araddr ={l2.request.addr[29:DCACHE_SUB_LINE_ADDR_W], {DCACHE_SUB_LINE_ADDR_W{1'b0}}, 2'b00};
assign write_reference_burst_count = read_modify_write ? 0 : burst_count;
//write constants
assign axi_awlen = write_reference_burst_count;
assign axi_awburst = 2'b01;// INCR
assign axi_awsize = 3'b010;//4 bytes
assign axi_bready = 1;
assign axi_awcache = 4'b0011;//bufferable cacheable memory
assign axi_awport = '0;
assign axi_awaddr ={l2.request.addr, 2'b00};
assign axi_wdata = read_modify_write ? amo_result_r : l2.wr_data;
assign axi_wstrb =read_modify_write ? '1 : l2.request.be;
//Done when read request sent, or slave ack on write data
assign pop = (axi_arvalid & axi_arready & ~read_modify_write) | (axi_awvalid & axi_awready);
assign l2.request_pop = pop;
//read channel
always_ff @ (posedge clk) begin
if (rst)
axi_arvalid <= 0;
else if (axi_arvalid & axi_arready)
axi_arvalid <= 0;
else if (l2.request_valid & l2.request.rnw & ~address_phase_complete)
axi_arvalid <= 1;
end
//write channel
always_ff @ (posedge clk) begin
if (rst)
axi_awvalid <= 0;
else if (l2.wr_data_valid & l2.request_valid & (~l2.request.rnw | amo_write_ready) & ~write_in_progress)
axi_awvalid <= 1;
else if (axi_awready)
axi_awvalid <= 0;
end
always_ff @ (posedge clk) begin
if (rst)
write_in_progress <= 0;
else if (axi_bvalid)
write_in_progress <= 0;
else if (l2.wr_data_valid & l2.request_valid & (~l2.request.rnw | amo_write_ready))
write_in_progress <= 1;
end
always_ff @ (posedge clk) begin
if (rst)
write_burst_count <= 0;
else if (axi_bvalid)
write_burst_count <= 0;
else if (axi_wvalid && axi_wready && write_reference_burst_count != write_burst_count)
write_burst_count <= write_burst_count+1;
end
always_ff @ (posedge clk) begin
if (rst)
on_last_burst <= 0;
else if (axi_bvalid)
on_last_burst <= 0;
else if ((~write_in_progress && write_reference_burst_count == 0) || write_in_progress && write_reference_burst_count == write_burst_count)
on_last_burst <= 1;
end
always_ff @ (posedge clk) begin
if (rst)
write_transfer_complete <= 0;
else if (axi_bvalid)
write_transfer_complete <= 0;
else if (axi_wlast && axi_wready)
write_transfer_complete <= 1;
end
assign axi_wvalid = write_in_progress & l2.wr_data_valid & ~write_transfer_complete;
assign axi_wlast = on_last_burst & write_in_progress & l2.wr_data_valid & ~write_transfer_complete;
assign l2.wr_data_read = write_in_progress & axi_wready & ~write_transfer_complete;
//read response
assign l2.rd_data = axi_rdata;
assign l2.rd_id = axi_rid;
assign l2.rd_data_valid = axi_rvalid;
endmodule

58
core/barrel_shifter.sv Normal file
View file

@ -0,0 +1,58 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module barrel_shifter (
input logic[XLEN-1:0] shifter_input,
input logic[4:0] shift_amount,
input logic arith,
input logic left_shift,
output logic[XLEN-1:0]shifted_result
);
logic[XLEN-1:0] lshifter_input;
logic[XLEN-1:0] shifter_in;
logic[XLEN-1:0] lshifted;
logic[XLEN:0] shifted;
//Bit flipping shared shifter
// always_comb begin
// for (int i =0; i < 32; i++) begin
// lshifter_input[i] = shifter_input[31-i];
// end
//end
//assign shifter_in = left_shift ? lshifter_input : shifter_input;
assign shifted = signed'({arith,shifter_input}) >>> shift_amount;
always_comb begin
for (int i =0; i < 32; i++) begin
lshifted[i] = shifted[31-i];
end
end
//assign lshifted = {<<{shifted}};//if stream operator supported
assign shifted_result = left_shift ? lshifted : shifted[31:0];
endmodule

138
core/branch_table.sv Normal file
View file

@ -0,0 +1,138 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module branch_table(
input logic clk,
input logic rst,
branch_table_interface.branch_table bt
);
parameter ADDR_W = $clog2(BRANCH_TABLE_ENTRIES);
parameter BTAG_W = 30 - ADDR_W;
typedef struct packed {
logic valid;
logic [BTAG_W-1:0] tag;
logic prediction;
logic use_ras;
} branch_table_entry_t;
(* RAM_STYLE="BLOCK" *)
logic[$bits(branch_table_entry_t)-1:0] branch_table_tag_ram [0:BRANCH_TABLE_ENTRIES-1];
branch_table_entry_t if_entry;
branch_table_entry_t ex_entry;
(* RAM_STYLE="BLOCK" *)
logic [31:0] branch_table_addr_ram [0:BRANCH_TABLE_ENTRIES-1];
logic [31:0] predicted_pc;
logic [31:0] miss_predict_br;
logic [31:0] miss_predict_jal;
logic [31:0] miss_predict_ret;
logic [31:0] miss_predict_jalr;
logic miss_predict;
logic miss_predict2;
logic tag_match;
logic bt_on;
initial begin
for(int i=0; i<BRANCH_TABLE_ENTRIES; i=i+1) begin
//foreach(branch_table_tag_ram[i]) begin
branch_table_tag_ram[i] = 0;
branch_table_addr_ram[i] = 0;
end
end
//Tags and prediction
always_ff @(posedge clk) begin
if (bt.branch_ex) begin
branch_table_tag_ram[bt.ex_pc[ADDR_W+1:2]] <= ex_entry;
end
end
always_ff @(posedge clk) begin
if (bt.new_mem_request)
if_entry <= branch_table_tag_ram[bt.next_pc[ADDR_W+1:2]];
end
//branch address
always_ff @(posedge clk) begin
if (bt.branch_ex) begin
branch_table_addr_ram[bt.ex_pc[ADDR_W+1:2]] <= ( bt.branch_taken ? bt.jump_pc : bt.njump_pc);
end
end
always_ff @(posedge clk) begin
if (bt.new_mem_request)
predicted_pc <= branch_table_addr_ram[bt.next_pc[ADDR_W+1:2]];
end
//Predict next branch to same location/direction as current
assign ex_entry.valid = 1;
assign ex_entry.tag = bt.ex_pc[31:32-BTAG_W];
assign ex_entry.prediction = bt.branch_taken;
assign ex_entry.use_ras = bt.is_return_ex;
assign miss_predict = bt.branch_ex && (
(bt.dec_pc != bt.jump_pc && bt.branch_taken) ||
(bt.dec_pc != bt.njump_pc && ~bt.branch_taken));
assign tag_match = ({if_entry.valid, if_entry.tag} == {(bt.next_pc_valid & bt_on), bt.if_pc[31:32-BTAG_W]});
assign bt.predicted_pc = predicted_pc;
assign bt.prediction = if_entry.prediction;
always_ff @(posedge clk) begin
if (rst)
bt_on <= 0;
else if (bt.branch_ex)
bt_on <= 1;
end
generate if (USE_BRANCH_PREDICTOR) begin
assign bt.use_prediction = bt_on & tag_match;
assign bt.flush = miss_predict;
end else begin
assign bt.use_prediction = 0;
assign bt.flush = bt.branch_ex & bt.branch_taken;
end endgenerate
assign bt.use_ras = if_entry.use_ras;
always_ff @(posedge clk) begin
if (rst) begin
miss_predict_br <= 0;
end else if (miss_predict & ~bt.is_return_ex) begin
miss_predict_br <= miss_predict_br+1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
miss_predict_ret <= 0;
end else if (miss_predict & bt.is_return_ex) begin
miss_predict_ret <= miss_predict_ret+1;
end
end
endmodule

192
core/branch_unit.sv Normal file
View file

@ -0,0 +1,192 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module branch_unit(
input logic clk,
input logic rst,
func_unit_ex_interface.unit branch_ex,
input branch_inputs_t branch_inputs,
branch_table_interface.branch_unit bt,
ras_interface.branch_unit ras,
unit_writeback_interface.unit branch_wb
);
logic result;
logic equal;
logic lessthan;
logic equal_ex;
logic lessthan_ex;
logic [XLEN:0] sub_result;
logic [31:0] pc_offset;
logic [2:0] fn3_ex;
logic[31:0] rd_ex;
logic [31:0] jump_count;
logic [31:0] call_count;
logic [31:0] ret_count;
logic [31:0] br_count;
logic jump_ex;
logic bcomp_ex;
logic done;
logic new_jal_jalr_dec;
assign equal = (branch_inputs.rs1 == branch_inputs.rs2);
// assign sub_result = signed'({branch_inputs.rs1[XLEN-1] & branch_inputs.use_signed, branch_inputs.rs1}) - signed'({branch_inputs.rs2[XLEN-1] & branch_inputs.use_signed, branch_inputs.rs2});
assign lessthan = signed'({branch_inputs.rs1[XLEN-1] & branch_inputs.use_signed, branch_inputs.rs1}) <
signed'({branch_inputs.rs2[XLEN-1] & branch_inputs.use_signed, branch_inputs.rs2});
// sub_result[XLEN];
always_comb begin
unique case (fn3_ex) // <-- 010, 011 unused
BEQ_fn3 : result = equal_ex;
BNE_fn3 : result = ~equal_ex;
BLT_fn3 : result = lessthan_ex;
BGE_fn3 : result = ~lessthan_ex;
BLTU_fn3 : result = lessthan_ex;
BGEU_fn3 : result = ~lessthan_ex;
endcase
end
assign bt.branch_taken = (bcomp_ex & result) | jump_ex;
always_comb begin
if (branch_inputs.jal)
pc_offset = 32'(signed'({branch_inputs.jal_imm, 1'b0}));
else if (branch_inputs.jalr)
pc_offset = 32'(signed'(branch_inputs.jalr_imm));
else
pc_offset = 32'(signed'({branch_inputs.br_imm, 1'b0}));
end
assign bt.prediction_dec = branch_inputs.prediction;
assign bt.branch_ex = branch_ex.new_request;
always_ff @(posedge clk) begin
if (branch_ex.new_request_dec) begin
fn3_ex <= branch_inputs.fn3;
equal_ex <= equal;
lessthan_ex <= lessthan;
bt.ex_pc <= branch_inputs.dec_pc;
bcomp_ex <= branch_inputs.branch_compare;
jump_ex <= branch_inputs.jal | branch_inputs.jalr;
bt.jump_pc <= (branch_inputs.jalr ? branch_inputs.rs1 : branch_inputs.dec_pc) + signed'(pc_offset);
bt.njump_pc <= branch_inputs.dec_pc + 4;
//bt.prediction_dec <= branch_inputs.prediction;
end
end
assign new_jal_jalr_dec = branch_ex.new_request_dec & (branch_inputs.jal | branch_inputs.jalr) & ~branch_inputs.rdx0;
always_ff @(posedge clk) begin
if (new_jal_jalr_dec) begin
rd_ex <= branch_inputs.dec_pc + 4;
end
end
/*********************************
* RAS support
*********************************/
logic rs1_link, rs1_eq_rd, rd_link;
logic is_call;
logic is_return;
assign rs1_link = (branch_inputs.rs1_addr ==? 5'b00?01);
assign rd_link = (branch_inputs.rd_addr ==? 5'b00?01);
assign rs1_eq_rd = (branch_inputs.rs1_addr == branch_inputs.rd_addr);
always_ff @(posedge clk) begin
if (branch_ex.new_request_dec) begin
is_call <= ( (branch_inputs.jal & rd_link) | (branch_inputs.jalr & rd_link) );
is_return <= ( (branch_inputs.jalr & ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd))) );
end
end
assign ras.push = is_call & branch_ex.new_request;
assign ras.pop = is_return & branch_ex.new_request;
assign ras.new_addr = rd_ex;
assign bt.is_return_ex = is_return;
/*********************************
* Output
*********************************/
assign branch_ex.ready = ~done | (done & branch_wb.accepted);
assign branch_wb.rd = rd_ex;
always_ff @(posedge clk) begin
if (rst) begin
done <= 0;
end else if (new_jal_jalr_dec) begin
done <= 1;
end else if (branch_wb.accepted) begin
done <= 0;
end
end
assign branch_wb.done = done;
assign branch_wb.early_done = new_jal_jalr_dec | (done & ~branch_wb.accepted);
/*********************************************/
//---------- Simulation counters
// always_ff @(posedge clk) begin
// if (rst) begin
// jump_count <= 0;
// end else if (branch_ex.new_request & jump_ex & ~is_call & ~is_return) begin
// jump_count <= jump_count+1;
// end
// end
// always_ff @(posedge clk) begin
// if (rst) begin
// call_count <= 0;
// end else if (is_call & branch_ex.new_request) begin
// call_count <= call_count+1;
// end
// end
// always_ff @(posedge clk) begin
// if (rst) begin
// ret_count <= 0;
// end else if (is_return & branch_ex.new_request) begin
// ret_count <= ret_count+1;
// end
// end
// always_ff @(posedge clk) begin
// if (rst) begin
// br_count <= 0;
// end else if (branch_ex.new_request_dec & branch_inputs.branch_compare) begin
// br_count <= br_count+1;
// end
// end
endmodule

48
core/byte_en_BRAM.sv Normal file
View file

@ -0,0 +1,48 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module byte_en_BRAM #(
parameter LINES = 8192,
parameter preload_file = "",
parameter USE_PRELOAD_FILE = 0
)
(
input logic clk,
input logic[$clog2(LINES)-1:0] addr_a,
input logic en_a,
input logic[XLEN/8-1:0] be_a,
input logic[XLEN-1:0] data_in_a,
output logic[XLEN-1:0] data_out_a,
input logic[$clog2(LINES)-1:0] addr_b,
input logic en_b,
input logic[XLEN/8-1:0] be_b,
input logic[XLEN-1:0] data_in_b,
output logic[XLEN-1:0] data_out_b
);
generate
if(FPGA_VENDOR == "xilinx")
xilinx_byte_enable_ram #(LINES, preload_file, USE_PRELOAD_FILE) ram_block (.*);
else
intel_byte_enable_ram #(LINES, preload_file, USE_PRELOAD_FILE) ram_block (.*);
endgenerate
endmodule

687
core/csr_unit.sv Normal file
View file

@ -0,0 +1,687 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module csr_unit (
input logic clk,
input logic rst,
func_unit_ex_interface.unit csr_ex,
unit_writeback_interface.unit csr_wb, // writeback_unit_interface_dummy.unit csr_wb, // writeback_unit_interface_dummy.unit csr_wb,
//Decode
csr_inputs_interface.unit csr_inputs,
input logic instruction_issued_no_rd,
//exception_control
csr_exception_interface.csr csr_exception,
//TLBs
output logic tlb_on,
output logic [9:0] asid,
//MMUs
mmu_interface.csr immu,
mmu_interface.csr dmmu,
//WB
input logic instruction_complete,
input logic return_from_exception,
//External
input logic interrupt
);
typedef struct packed {
logic [2:0] rw_bits;
logic [2:0] privilege;
logic [7:0] sub_addr;
} csr_addr_t;
//Constant registers
typedef struct packed {
logic[1:0] base; //RV32I
logic[2:0] reserved;
logic Z;
logic Y;
logic X;
logic W;
logic V;
logic U; //User mode
logic T;
logic S; //Supervisor mode
logic R;
logic Q;
logic P;
logic O;
logic N;
logic M; //multiply divide
logic L;
logic K;
logic J;
logic I; //Base
logic H;
logic G;
logic F;
logic E;
logic D;
logic C;
logic B;
logic A; //Atomic
} misa_t;
misa_t misa;
bit [XLEN-1:0] mvendorid = 0;
bit [XLEN-1:0] marchid = 0;
bit [XLEN-1:0] mimpid = 0;
bit [XLEN-1:0] mhartid = CPU_ID;
typedef struct packed {
logic sd;
logic [XLEN-2:29] zero_bits1;
logic [4:0] vm;
logic [23:20] zero_bits2;
logic mxr;
logic pum;
logic mprv;
logic [1:0] xs;
logic [1:0] fs;
logic [1:0] mpp;
logic [1:0] hpp;
logic spp;
logic mpie;
logic hpie;
logic spie;
logic upie;
logic mie;
logic hie;
logic sie;
logic uie;
} mstatus_t;
mstatus_t mstatus, mstatus_write, mstatus_exception, mstatus_return, mstatus_next, mstatus_mask, mstatus_mmask, mstatus_smask;
typedef struct packed {
logic [XLEN-1:12] zeros;
logic meip;
logic heip;
logic seip;
logic ueip;
logic mtip;
logic htip;
logic stip;
logic utip;
logic msip;
logic hsip;
logic ssip;
logic usip;
} mip_t;
typedef struct packed {
logic [XLEN-1:12] zeros;
logic meie;
logic heie;
logic seie;
logic ueie;
logic mtie;
logic htie;
logic stie;
logic utie;
logic msie;
logic hsie;
logic ssie;
logic usie;
} mie_t;
struct packed {
logic [9:0] asid;
logic [21:0] ppn;
} sptbr;
//Non-constant registers
//scratch ram
logic[XLEN-1:0] scratch_regs [15:0];//Only 0x1 and 0x3 used by supervisor and machine mode respectively
logic[XLEN-1:0] scratch_out;
logic[XLEN-1:0] mtvec;
logic[XLEN-1:0] medeleg;
logic[XLEN-1:0] mideleg;
mip_t mip, mip_mask;
mie_t mie_reg, mie_mask;
logic[XLEN-1:0] mepc;
logic[XLEN-1:0] mtimecmp;
logic[XLEN-1:0] mcause;
logic[XLEN-1:0] mbadaddr;
mip_t sip_mask;
mie_t sie_mask;
logic[XLEN-1:0] sepc;
logic[XLEN-1:0] stime;
logic[XLEN-1:0] stimecmp;
logic[XLEN-1:0] scause;
logic[XLEN-1:0] sbadaddr;
logic[XLEN-1:0] sstatus;
logic[XLEN-1:0] stvec;
logic[TIMER_W-1:0] mcycle;
logic[TIMER_W-1:0] mtime;
logic[TIMER_W-1:0] minst_ret;
logic [1:0] inst_ret_inc;
//write_logic
logic user_write;
logic supervisor_write;
logic machine_write;
//Control logic
csr_addr_t csr_addr;
logic privilege_exception;
logic msr_write;
logic msr_update;
logic [1:0] privilege_level;
logic [1:0] next_privilege_level;
logic [31:0] selected_csr;
logic [31:0] updated_csr;
logic invalid_addr;
logic machine_trap;
logic supervisor_trap;
//******************************************************************
//TLB status --- used to mux physical/virtual address
assign tlb_on = mstatus.vm[3]; //We only support Sv32 or Mbare so only need to check one bit
assign asid = sptbr.asid;
//******************
//MMU interface
assign immu.mxr = mstatus.mxr;
assign dmmu.mxr = mstatus.mxr;
assign immu.pum = mstatus.pum;
assign dmmu.pum = mstatus.pum;
assign immu.privilege = privilege_level;
assign dmmu.privilege = mstatus.mprv ? mstatus.mpp : privilege_level;
assign immu.ppn = sptbr.ppn;
assign dmmu.ppn = sptbr.ppn;
//******************
//Machine ISA register
assign misa = '{default:0, base:1, U:1, S:1, M:1, I:1};
//assign exception = interrupt | misaligned_fetch | instruction_fault | illegal_opcode | unaligned_load | unaligned_store | load_fault | store_fault;
//convert addr into packed struct form
assign csr_addr = csr_inputs.csr_addr;
assign privilege_exception = csr_ex.new_request && (csr_addr.privilege > privilege_level);
assign user_write = !csr_inputs.system_op && !privilege_exception && (csr_addr.rw_bits != CSR_READ_ONLY && csr_addr.privilege == USER);
assign supervisor_write = !csr_inputs.system_op && !privilege_exception && (csr_addr.rw_bits != CSR_READ_ONLY && csr_addr.privilege == SUPERVISOR);
assign machine_write = !csr_inputs.system_op && !privilege_exception && (csr_addr.rw_bits != CSR_READ_ONLY && csr_addr.privilege == MACHINE);
assign csr_exception.illegal_instruction = invalid_addr | privilege_exception;
assign machine_trap = csr_exception.valid && next_privilege_level == MACHINE;
assign supervisor_trap = csr_exception.valid && next_privilege_level == SUPERVISOR;
always_ff @(posedge clk) begin
if (rst) begin
csr_ex.ready <= 1;
end else if (csr_ex.new_request_dec) begin
csr_ex.ready <= 0;
end else if (csr_wb.accepted) begin
csr_ex.ready <= 1;
end
end
always_ff @(posedge clk) begin
if (csr_ex.new_request) begin
csr_wb.rd <= selected_csr;
end
end
assign csr_wb.early_done = csr_ex.new_request | (csr_wb.done & ~csr_wb.accepted);
always_ff @(posedge clk) begin
if (rst) begin
csr_wb.done <= 0;
end else if (csr_ex.new_request) begin
csr_wb.done <= 1;
end else if (csr_wb.accepted) begin
csr_wb.done <= 0;
end
end
always_comb begin
case (csr_inputs.csr_op)
// CSR_RW : updated_csr = csr_inputs.rs1;
CSR_RS : updated_csr = selected_csr | csr_inputs.rs1;
CSR_RC : updated_csr = selected_csr & ~csr_inputs.rs1;
default : updated_csr = csr_inputs.rs1;//selected_csr;
endcase
end
//In progress---------------------------
// always_comb begin
// next_privilege_level = MACHINE;
// if (interrupt) begin
// next_privilege_level = MACHINE;
// end
// else if (csr_exception.valid) begin
// if (medeleg[csr_exception.code])
// next_privilege_level = SUPERVISOR;
// end
// else if (return_from_exception) begin
// next_privilege_level = USER;
// end
// end
// //Current privilege level
// always_ff @(posedge clk) begin
// if (rst) begin
// privilege_level <= MACHINE;
// end else if (csr_exception.valid | return_from_exception) begin
// privilege_level <= next_privilege_level;
// end
// end
// //save previous interrupt and privilege info on exception
// always_comb begin
// mstatus_exception = mstatus;
// unique case (next_privilege_level)
// SUPERVISOR: begin
// mstatus_exception.spie = (privilege_level == SUPERVISOR) ? mstatus.sie : mstatus.uie;
// mstatus_exception.sie = 0;
// mstatus_exception.spp = privilege_level[0]; //one if from supervisor-mode, zero if from user-mode
// end
// MACHINE: begin
// mstatus_exception.mpie = (privilege_level == MACHINE) ? mstatus.mie : ((privilege_level == SUPERVISOR) ? mstatus.sie : mstatus.uie);
// mstatus_exception.mie = 0;
// mstatus_exception.mpp = privilege_level; //machine,supervisor or user
// end
// endcase
// end
// //return from trap
// always_comb begin
// mstatus_return = mstatus;
// unique case (next_privilege_level)
// SUPERVISOR: begin
// if (mstatus.spp) begin //supervisor
// mstatus_return.sie = mstatus.spie;
// mstatus_return.spie = 1;
// mstatus_return.spp = 0;
// end
// else begin //user
// mstatus_return.spie = 1;
// mstatus_return.spp = 0;
// end
// end
// MACHINE: begin
// unique case(mstatus.mpp)
// USER: begin
// mstatus_return.mpie = 1;
// mstatus_return.mpp = USER;
// end
// SUPERVISOR: begin
// mstatus_return.sie = mstatus.mpie;
// mstatus_return.mpie = 1;
// mstatus_return.mpp = USER;
// end
// MACHINE: begin
// mstatus_return.mie = mstatus.mpie;
// mstatus_return.mpie = 1;
// mstatus_return.mpp = USER;
// end
// endcase
// end
// endcase
// end
// //machine status mask
// assign mstatus_mmask = '{default:0, vm:SV32, mxr:1, pum:1, mprv:1, mpp:'1, spp:1, mpie:1, spie:1, mie:1, sie:1};
// //supervisor status mask
// assign mstatus_smask = '{default:0, pum:1, spp:1, spie:1, sie:1};
// assign mstatus_mask = machine_write ? mstatus_mmask : mstatus_smask;
// assign mstatus_write = (mstatus & ~mstatus_mask) | (updated_csr & mstatus_mask);
// assign msr_write = (machine_write && csr_addr.sub_addr == MSTATUS[7:0]) | (supervisor_write && csr_addr.sub_addr == SSTATUS[7:0]);
// //read_write portion of machine status register
// always_ff @(posedge clk) begin
// if (rst) begin
// mstatus. vm <= BARE;
// mstatus.mxr <= 0;
// mstatus.pum <= 0;
// mstatus.mprv <= 0;
// mstatus.mpp <= MACHINE;
// mstatus.spp <= 0;
// mstatus.mpie <= 0;
// mstatus.spie <= 0;
// mstatus.mie <= 0;
// mstatus.sie <= 0;
// //*****************
// // Constant zeros
// //*****************
// //No FPU or custom extensions with state
// mstatus.sd <= 0;
// mstatus. zero_bits1 <= 0;
// mstatus. zero_bits2 <= 0;
// mstatus.xs <= 0;
// mstatus.fs <= 0;
// //No hypervisor
// mstatus.hpp <= 0;
// mstatus.hpie <= 0;
// mstatus.hie <= 0;
// //No user mode interrupts
// mstatus.upie <= 0;
// mstatus.uie <= 0;
// end
// else if (csr_exception.valid)
// mstatus <= mstatus_exception;
// else if (return_from_exception)
// mstatus <= mstatus_return;
// else if (msr_write)
// mstatus <= mstatus_write;
// end
// //mtvec
// always_ff @(posedge clk) begin
// if (rst) begin
// mtvec <= {RESET_VEC[XLEN-1:2], 2'b00};
// end else if (machine_write && csr_addr.sub_addr == MTVEC[7:0]) begin
// mtvec <= {updated_csr[XLEN-1:2], 2'b00};
// end
// end
// //medeleg
// //assign medeleg_mask = '{default:0, seip:1, stip:1, ssip:1};
// always_ff @(posedge clk) begin
// if (rst) begin
// medeleg <= '0;
// end else if (machine_write && csr_addr.sub_addr == MEDELEG[7:0]) begin
// medeleg <= updated_csr;
// end
// end
// //mideleg
// always_ff @(posedge clk) begin
// if (rst) begin
// mideleg <= '0;
// end else if (machine_write && csr_addr.sub_addr == MIDELEG[7:0]) begin
// // mideleg <= (mideleg & ~mideleg_mask) | (updated_csr & mideleg_mask);
// end
// end
// //mip
// assign mip_mask = '{default:0, stip:1, ssip:1};
// always_ff @(posedge clk) begin
// if (rst) begin
// mip <= 0;
// end
// else if (machine_write && csr_addr.sub_addr == MIP[7:0]) begin
// mip <= (mip & ~mip_mask) | (updated_csr & mip_mask);
// end
// end
// //mie
// assign mie_mask = '{default:0, meie:1, seie:1, mtie:1, stie:1, msie:1, ssie:1};
// assign sie_mask = '{default:0, seie:1, stie:1, ssie:1};
// always_ff @(posedge clk) begin
// if (rst) begin
// mie_reg <= '0;
// end
// else if (machine_write && csr_addr.sub_addr == MIE[7:0]) begin
// mie_reg <= (mie_reg & ~mie_mask) | (updated_csr & mie_mask);
// end
// else if (supervisor_write && csr_addr.sub_addr == SIE[7:0]) begin
// mie_reg <= (mie_reg & ~sie_mask) | (updated_csr & sie_mask);
// end
// end
// //mtimecmp
// // always_ff @(posedge clk) begin
// // if (rst) begin
// // mtimecmp <= '0;
// // end else if (machine_write && csr_addr.sub_addr == MTIMECMP[7:0]) begin
// // mtimecmp <= updated_csr;
// // end
// //end
// //mepc
// always_ff @(posedge clk) begin
// if (machine_trap) begin
// mepc <= csr_exception.pc;
// end
// else if (machine_write && csr_addr.sub_addr == MEPC[7:0]) begin
// mepc <= {updated_csr[XLEN-1:2], 2'b00};
// end
// end
// //mcause
// assign mcause[XLEN-1:ECODE_W] = 0;
// always_ff @(posedge clk) begin
// if (machine_trap) begin
// mcause[ECODE_W-1:0] = csr_exception.code;
// end
// else if (machine_write && csr_addr.sub_addr == MCAUSE[7:0]) begin
// mcause[ECODE_W-1:0] <= updated_csr[ECODE_W-1:0];
// end
// end
// //mbadaddr
// always_ff @(posedge clk) begin
// if (machine_trap) begin
// mbadaddr <= csr_exception.addr;
// end
// else if (machine_write && csr_addr.sub_addr == MBADADDR[7:0]) begin
// mbadaddr <= updated_csr;
// end
// end
// //END OF MACHINE REGS
// //scratch regs
// always_ff @(posedge clk) begin
// if ((machine_write && csr_addr.sub_addr == MSCRATCH[7:0]) || (supervisor_write && csr_addr.sub_addr == SSCRATCH[7:0])) begin
// scratch_regs[csr_addr.privilege] <= updated_csr;
// end
// end
// assign scratch_out = scratch_regs[csr_addr.privilege];
// //BEGIN OF SUPERVISOR REGS
// assign sip_mask = '{default:0, seip:1, stip:1, ssip:1};
// //sepc
// always_ff @(posedge clk) begin
// if (supervisor_trap) begin
// sepc <= csr_exception.pc;
// end
// else if (supervisor_write && csr_addr.sub_addr == SEPC[7:0]) begin
// sepc <= updated_csr;
// end
// end
// //scause
// assign scause[XLEN-1:ECODE_W] = 0;
// always_ff @(posedge clk) begin
// if (supervisor_trap) begin
// scause[ECODE_W-1:0] = csr_exception.code;
// end
// else if (supervisor_write && csr_addr.sub_addr == SCAUSE[7:0]) begin
// scause[ECODE_W-1:0] <= updated_csr[ECODE_W-1:0];
// end
// end
// //sbadaddr
// always_ff @(posedge clk) begin
// if (supervisor_trap) begin
// sbadaddr <= csr_exception.addr;
// end
// else if (supervisor_write && csr_addr.sub_addr == SBADADDR[7:0]) begin
// sbadaddr <= updated_csr;
// end
// end
// //sptbr
// always_ff @(posedge clk) begin
// if (rst) begin
// sptbr <= 0;
// end else if (supervisor_write && csr_addr.sub_addr == SPTBR[7:0]) begin
// sptbr <= updated_csr;
// end
// end
//Timers and Counters
//Register increment for instructions completed
always_ff @(posedge clk) begin
if (rst) begin
inst_ret_inc <= 0;
end else begin
if (instruction_complete & instruction_issued_no_rd)
inst_ret_inc <= 2;
else if (instruction_complete | instruction_issued_no_rd)
inst_ret_inc <= 1;
else
inst_ret_inc <= 0;
end
end
always_ff @(posedge clk) begin
if (rst) begin
mcycle <= 0;
mtime <= 0;
minst_ret <= 0;
end else begin
mcycle <= mcycle + 1;
mtime <= mtime + 1;
minst_ret <= minst_ret + inst_ret_inc;
end
end
always_comb begin
invalid_addr = 0;
case(csr_addr)
//Machine info
MISA : selected_csr = misa;
MVENDORID : selected_csr = mvendorid;
MARCHID : selected_csr = marchid;
MIMPID : selected_csr = mimpid;
MHARTID : selected_csr = mhartid;
//Machine trap setup
MSTATUS : selected_csr = mstatus;
MEDELEG : selected_csr = medeleg;
MIDELEG : selected_csr = mideleg;
MIE : selected_csr = mie_reg;
MTVEC : selected_csr = mtvec;
//Machine trap handling
MSCRATCH : selected_csr = scratch_out;
MEPC : selected_csr = mepc;
MCAUSE : selected_csr = mcause;
MBADADDR : selected_csr = mbadaddr;
MIP : selected_csr = mip;
//Machine Timers and Counters
MCYCLE : selected_csr = mcycle[XLEN-1:0];
MTIME : selected_csr = mtime[XLEN-1:0];
MINSTRET : selected_csr = minst_ret[XLEN-1:0];
MCYCLEH : selected_csr = mcycle[TIMER_W-1:XLEN];
MTIMEH : selected_csr = mtime[TIMER_W-1:XLEN];
MINSTRETH : selected_csr = minst_ret[TIMER_W-1:XLEN];
//Counter enables
MUCOUNTEREN : selected_csr = 0;
MSCOUNTEREN : selected_csr = 0;
//counter deltas
MUCYCLE_DELTA : selected_csr = 0;
MUTIME_DELTA : selected_csr = 0;
MUINSTRET_DELTA : selected_csr = 0;
MSCYCLE_DELTA : selected_csr = 0;
MSTIME_DELTA : selected_csr = 0;
MSINSTRET_DELTA : selected_csr = 0;
MUCYCLE_DELTAH : selected_csr = 0;
MUTIME_DELTAH : selected_csr = 0;
MUINSTRET_DELTAH : selected_csr = 0;
MSCYCLE_DELTAH : selected_csr = 0;
MSTIME_DELTAH : selected_csr = 0;
MSINSTRET_DELTAH : selected_csr = 0;
//Supervisor Trap Setup
SSTATUS : selected_csr = (mstatus & mstatus_smask);
SEDELEG : selected_csr = 0;
SIDELEG : selected_csr = 0;
SIE : selected_csr = (mie_reg & sie_mask);
STVEC : selected_csr = stvec;
//Supervisor trap handling
SSCRATCH : selected_csr = scratch_out;
SEPC : selected_csr = sepc;
SCAUSE : selected_csr = scause;
SBADADDR : selected_csr = sbadaddr;
SIP : selected_csr = (mip & sip_mask);
//Supervisor Protection and Translation
SPTBR : selected_csr = sptbr;
//Supervisor user shadows
SCYCLE : selected_csr = mcycle[XLEN-1:0];
STIME : selected_csr = mtime[XLEN-1:0];
SINSTRET : selected_csr = minst_ret[XLEN-1:0];
SCYCLEH : selected_csr = mcycle[TIMER_W-1:XLEN];
STIMEH : selected_csr = mtime[TIMER_W-1:XLEN];
SINSTRETH : selected_csr = minst_ret[TIMER_W-1:XLEN];
//User status
//Floating point
//User Counter Timers
CYCLE : selected_csr = mcycle[XLEN-1:0];
TIME : selected_csr = mtime[XLEN-1:0];
INSTRET : selected_csr = minst_ret[XLEN-1:0];
CYCLEH : selected_csr = mcycle[TIMER_W-1:XLEN];
TIMEH : selected_csr = mtime[TIMER_W-1:XLEN];
INSTRETH : selected_csr = minst_ret[TIMER_W-1:XLEN];
default : begin selected_csr = 0; invalid_addr = 1; end
endcase
end
endmodule

48
core/cycler.sv Normal file
View file

@ -0,0 +1,48 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module cycler
#(
parameter C_WIDTH = 2
)
(
input logic clk,
input logic rst,
input logic en,
output logic [C_WIDTH - 1: 0] one_hot
);
generate
if (C_WIDTH == 1) begin
assign one_hot = 1'b1;
end
else begin
always_ff @ (posedge clk) begin
if (rst) begin
one_hot[C_WIDTH-1:1] <= '0;
one_hot[0] <= 1'b1;
end
else if (en) begin
one_hot[C_WIDTH-1:1] <= one_hot[C_WIDTH-2:0];
one_hot[0] <= one_hot[C_WIDTH-1];
end
end
end
endgenerate
endmodule

50
core/dbram.sv Normal file
View file

@ -0,0 +1,50 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module dbram(
input logic clk,
input logic rst,
input data_access_shared_inputs_t ls_inputs,
ls_sub_unit_interface.sub_unit ls,
output logic[31:0] data_out,
bram_interface.user data_bram
);
assign ls.ready = ~ ls.data_valid | ( ls.data_valid & ls.ack);
assign data_bram.addr = ls_inputs.addr[31:2];
assign data_bram.en = ls.new_request;
assign data_bram.be = ls_inputs.be;
assign data_bram.data_in = ls_inputs.data_in;
assign data_out = data_bram.data_out;
always_ff @ (posedge clk) begin
if (rst)
ls.data_valid <= 0;
else if (ls.new_request & ls_inputs.load)
ls.data_valid <= 1;
else if (ls.ack)
ls.data_valid <= 0;
end
endmodule

376
core/dcache.sv Normal file
View file

@ -0,0 +1,376 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module dcache(
input logic clk,
input logic rst,
input logic dcache_on,
l1_arbiter_request_interface.requester l1_request,
l1_arbiter_return_interface.requester l1_response,
input sc_complete,
input sc_success,
input clear_reservation,
input data_access_shared_inputs_t ls_inputs,
output logic[31:0] data_out,
input logic lr,
input logic sc,
input logic is_amo,
input logic [4:0] amo_op,
input logic [31:0] forwarded_data,
input logic use_forwarded_data,
ls_sub_unit_interface.sub_unit ls
);
logic tag_hit;
logic [DCACHE_WAYS-1:0] tag_hit_way;
logic [$clog2(DCACHE_WAYS)-1:0] tag_hit_way_int;
logic tag_update;
logic [DCACHE_WAYS-1:0] tag_update_way;
logic [DCACHE_WAYS-1:0] replacement_way;
logic [$clog2(DCACHE_WAYS)-1:0] replacement_way_int;
logic [$clog2(DCACHE_WAYS)-1:0] tag_update_way_int;
logic [DCACHE_SUB_LINE_ADDR_W-1:0] word_count;
logic [DCACHE_SUB_LINE_ADDR_W-1:0] sc_write_index;
logic [DCACHE_SUB_LINE_ADDR_W-1:0] update_word_index;
logic line_complete;
logic reservation;
logic [31:0] stage2_addr;
logic stage2_load;
logic stage2_store;
logic [3:0] stage2_be;
logic [2:0] stage2_fn3;
logic [31:0] stage2_data_in;
logic stage2_use_forwarded_data;
logic [31:0] stage2_forwarded_data;
logic stage2_lr;
logic stage2_sc;
logic stage2_is_amo;
logic [4:0] stage2_amo_op;
logic [31:0] dbank_data_out;
logic [31:0] hit_data;
logic [31:0] miss_data;
logic [31:0] new_line_data;
logic [31:0] amo_result;
logic [31:0] amo_rs2;
logic miss;
logic[3:0] write_hit_be;
logic second_cycle;
logic request;
logic memory_complete;
logic hit_allowed;
logic read_hit_allowed;
logic address_range_valid;
logic idle;
logic read_miss_complete;
logic store_complete;
logic store_conditional_complete;
amo_alu_inputs_t amo_alu_inputs;
logic[47:0] write_request_count;
logic[47:0] read_request_count;
logic[47:0] read_request_count_internal;
logic[47:0] read_request_count_complete;
const bit[DCACHE_SUB_LINE_ADDR_W-1:0] SUBLINE_PADDING= '0;
/*************************************
* 2nd cycle signals
*************************************/
always_ff @ (posedge clk) begin
if (ls.new_request) begin
stage2_addr <= ls_inputs.addr;
stage2_data_in <= ls_inputs.data_in;
stage2_be <= ls_inputs.be;
stage2_load <= ls_inputs.load;
stage2_store <= ls_inputs.store;
stage2_use_forwarded_data <= use_forwarded_data;
stage2_lr <= lr;
stage2_is_amo <= is_amo; //excludes lr/sc
stage2_sc <= sc;
stage2_amo_op <= amo_op;
stage2_fn3 <= ls_inputs.fn3;
end
end
always_ff @ (posedge clk) begin
if(rst)
write_request_count <= 0;
else if (ls.new_request & ls_inputs.store) begin
write_request_count <= write_request_count + 1;
end
end
always_ff @ (posedge clk) begin
if(rst)
read_request_count <= 0;
else if (~tag_hit & read_hit_allowed) begin
read_request_count <= read_request_count + 1;
end
end
always_ff @ (posedge clk) begin
if(rst)
read_request_count_internal <= 0;
else if (ls.new_request & ls_inputs.load) begin
read_request_count_internal <= read_request_count_internal + 1;
end
end
always_ff @ (posedge clk) begin
if(rst)
read_request_count_complete <= 0;
else if (ls.ack) begin
read_request_count_complete <= read_request_count_complete + 1;
end
end
/*************************************
* General Control Logic
*************************************/
//LR and AMO ops are forced misses (if there is a tag hit they will reuse the same way however)
//Signal is valid only for a single cycle, RAM enables are used to hold outputs in case of pipeline stalls
always_ff @ (posedge clk) begin
if (rst)
read_hit_allowed <= 0;
else
read_hit_allowed <= ls.new_request & ls_inputs.load & dcache_on & ~(lr | is_amo);
end
//LR reservation, cleared on exceptions
always_ff @ (posedge clk) begin
if (rst)
reservation <= 0;
else if (second_cycle)
reservation <= stage2_lr;
else if (sc_complete | clear_reservation)
reservation <= 0;
end
always_ff @ (posedge clk) begin
if (rst)
second_cycle <= 0;
else
second_cycle <= ls.new_request;
end
always_ff @ (posedge clk) begin
if (rst)
miss <= 0;
else if (second_cycle)
miss <= ~(tag_hit & read_hit_allowed);
end
always_ff @ (posedge clk) begin
if (rst)
tag_update <= 0;
else if (second_cycle)
tag_update <= dcache_on & stage2_load & ~tag_hit; //Cache enabled, read miss
else
tag_update <= 0;
end
/*************************************
* L1 Arbiter Interface
*************************************/
assign l1_request.addr = {stage2_addr[31:2], 2'b0} ;//Memory interface aligns request to burst size (done there to support AMO line-read word-write)
assign l1_request.data = stage2_use_forwarded_data ? stage2_forwarded_data : stage2_data_in;
assign l1_request.rnw = stage2_load;
assign l1_request.be = stage2_be;
assign l1_request.size = stage2_load ? (DCACHE_LINE_W-1) : 0;//LR and AMO ops are included in load
assign l1_request.is_amo = (stage2_is_amo | stage2_lr | stage2_sc);
assign l1_request.amo = stage2_amo_op;
always_ff @ (posedge clk) begin
if (rst)
word_count <= 0;
else if (l1_response.data_valid)
word_count <= word_count + 1;
end
always_ff @ (posedge clk) begin
if (rst)
request <= 0;
else if (second_cycle & ~l1_request.ack)
request <= ~(tag_hit & read_hit_allowed) | ~dcache_on;
else if (l1_request.ack)
request <= 0;
end
assign l1_request.request = request | (second_cycle & (~(tag_hit & read_hit_allowed) | ~dcache_on));
/*************************************
* Cache Components
*************************************/
//Free running one hot cycler.
cycler #(DCACHE_WAYS) replacement_policy (.*, .en(1'b1), .one_hot(replacement_way));
//One-hot tag hit / update logic to binary int
one_hot_to_integer #(DCACHE_WAYS) hit_way_conv (.one_hot(tag_hit_way), .int_out(tag_hit_way_int));
one_hot_to_integer #(DCACHE_WAYS) update_way_conv (.one_hot(replacement_way), .int_out(replacement_way_int));
//If atomic load (LR or AMO op) and there's a tag hit reuse same line
always_ff @ (posedge clk) begin
if (second_cycle) begin
tag_update_way<= ((stage2_is_amo | stage2_lr) & tag_hit) ? tag_hit_way : replacement_way;
tag_update_way_int <= ((stage2_is_amo | stage2_lr) & tag_hit) ? tag_hit_way_int : replacement_way_int;
end
end
//Tag banks
dtag_banks dcache_tag_banks (.*,
.stage1_addr(ls_inputs.addr),
.stage2_addr(stage2_addr),
.inv_addr({l1_response.inv_addr, 2'b00}),
.update_way(tag_update_way),
.update(tag_update),
.stage1_adv(ls.new_request),
.stage1_inv(1'b0),//For software invalidation
.extern_inv(l1_response.inv_valid),
.extern_inv_complete(l1_response.inv_ack)
);
assign write_hit_be = stage2_be & {4{tag_hit}};
//AMO op processing on incoming data
always_ff @ (posedge clk) begin
amo_rs2 <= stage2_data_in; //Only forwarding on STORE opcode
end
assign amo_alu_inputs.rs1_load = l1_response.data;
assign amo_alu_inputs.rs2 = amo_rs2;
assign amo_alu_inputs.op = stage2_amo_op;
amo_alu amo_unit (.*, .result(amo_result));
always_comb begin
if (stage2_is_amo && stage2_addr[DCACHE_SUB_LINE_ADDR_W+1:2] == word_count)
new_line_data = amo_result;
else if (sc_complete & sc_success)
new_line_data = stage2_data_in;//Only forwarding on STORE opcode
else
new_line_data = l1_response.data;
end
assign sc_write_index = stage2_addr[DCACHE_SUB_LINE_ADDR_W+1:2];
assign update_word_index = sc_complete ? sc_write_index : word_count;
////////////////////////////////////////////////////////
always_comb begin
unique case(stage2_fn3) //<--011, 110, 111, 100, 101 unused
LS_B_fn3 : stage2_forwarded_data = {4{forwarded_data[7:0]}};
LS_H_fn3 : stage2_forwarded_data = {2{forwarded_data[15:0]}};
LS_W_fn3 : stage2_forwarded_data =forwarded_data;
endcase
end
//Data Bank(s)
ddata_bank #(DCACHE_LINES*DCACHE_LINE_W*DCACHE_WAYS) data_bank (
.clk(clk),
.addr_a({stage2_addr[DCACHE_LINE_ADDR_W+DCACHE_SUB_LINE_ADDR_W+2-1:2], tag_hit_way_int}),
.addr_b({stage2_addr[DCACHE_LINE_ADDR_W+DCACHE_SUB_LINE_ADDR_W+2-1:DCACHE_SUB_LINE_ADDR_W+2], update_word_index, tag_update_way_int}),
.en_a(second_cycle),
.en_b(l1_response.data_valid | (sc_complete & sc_success)),
.be_a(write_hit_be),
.data_in_a(stage2_use_forwarded_data ? stage2_forwarded_data : stage2_data_in),
.data_in_b(new_line_data),
.data_out_a(dbank_data_out)
);
/*************************************
* Output Muxing
*************************************/
always_ff @ (posedge clk) begin
if (l1_response.data_valid && stage2_addr[DCACHE_SUB_LINE_ADDR_W+1:2] == word_count)
miss_data <= l1_response.data;
else if (sc_complete)
miss_data <= {31'b0, sc_success};
end
assign data_out = miss ? miss_data : dbank_data_out;
/*************************************
* Pipeline Advancement
*************************************/
assign line_complete = (l1_response.data_valid && (word_count == (DCACHE_LINE_W-1))); //covers load, LR, AMO
assign store_complete = l1_request.ack & stage2_store & ~stage2_sc;
assign store_conditional_complete = sc_complete;
always_ff @ (posedge clk) begin
if (rst)
memory_complete <= 0;
else if (line_complete | (read_hit_allowed & tag_hit) | sc_complete) //read hit OR line fill OR SC complete
memory_complete <= 1;
else if (ls.ack)
memory_complete <= 0;
end
assign ls.data_valid = memory_complete;
always_ff @ (posedge clk) begin
if (rst)
read_miss_complete <= 0;
else if (line_complete) //read hit OR line fill OR SC complete
read_miss_complete <= 1;
else if (ls.ack)
read_miss_complete <= 0;
end
assign ls.ready = (read_hit_allowed & tag_hit) | store_complete | (read_miss_complete & ls.ack) | idle;
always_ff @ (posedge clk) begin
if (rst)
idle <= 1;
else if (ls.new_request)
idle <= 0;
else if ((read_hit_allowed & tag_hit) | (read_miss_complete & ls.ack) | store_complete | store_conditional_complete ) //read miss OR write through complete
idle <= 1;
end
endmodule

40
core/ddata_bank.sv Normal file
View file

@ -0,0 +1,40 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module ddata_bank #(
parameter LINES = 1024
)
(
input logic clk,
input logic[$clog2(LINES)-1:0] addr_a,
input logic en_a,
input logic[3:0] be_a,
input logic[31:0] data_in_a,
output logic[31:0] data_out_a,
//write only port
input logic[$clog2(LINES)-1:0] addr_b,
input logic en_b,
input logic[31:0] data_in_b
);
byte_en_BRAM #(LINES, "", 0) ram_block (.*, .be_b({4{en_b}}), .data_out_b());
endmodule

395
core/decode.sv Normal file
View file

@ -0,0 +1,395 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module decode(
input logic clk,
input logic rst,
input logic flush,
branch_table_interface.decode bt,
instruction_buffer_interface.decode ib,
id_generator_interface.decode id_gen,
register_file_decode_interface.decode rf_decode,
inflight_queue_interface.decode iq,
output alu_inputs_t alu_inputs,
output load_store_inputs_t ls_inputs,
output branch_inputs_t branch_inputs,
csr_inputs_interface.decode csr_inputs,
output mul_inputs_t mul_inputs,
output div_inputs_t div_inputs,
func_unit_ex_interface.decode alu_ex,
func_unit_ex_interface.decode ls_ex,
func_unit_ex_interface.decode branch_ex,
func_unit_ex_interface.decode csr_ex,
func_unit_ex_interface.decode mul_ex,
func_unit_ex_interface.decode div_ex,
output instruction_issued_no_rd,
input logic instruction_complete,
output logic dec_advance,
output logic [31:0] dec_pc,
output logic illegal_instruction
);
logic [2:0] fn3;
logic [6:0] opcode;
logic [4:0] shamt;
assign fn3 = ib.data_out.instruction[14:12];
assign opcode = ib.data_out.instruction[6:0];
assign shamt = ib.data_out.instruction[24:20];
logic uses_rs1;
logic uses_rs2;
logic uses_rd;
logic [4:0] rs1_addr;
logic [4:0] rs2_addr;
logic [4:0] future_rd_addr;
logic issue_valid;
logic store_issued_with_forwarding;
logic load_store_operands_ready;
logic operands_ready;
logic csr_imm_op;
logic sys_op;
logic mult_div_op;
logic branch_compare;
logic [NUM_WB_UNITS-1:0] new_request;
logic [NUM_WB_UNITS-1:0] issue;
logic unit_available;
logic advance;
logic [XLEN-1:0] alu_rs1_data;
logic [XLEN-1:0] alu_rs2_data;
logic [1:0] alu_op;
logic [2:0] alu_fn3;
logic [31:0] ls_offset;
logic [31:0] virtual_address;
logic [4:0] load_rd;
logic last_ls_request_was_load;
logic load_store_forward;
logic [4:0] prev_div_rs1_addr;
logic [4:0] prev_div_rs2_addr;
logic prev_div_result_valid;
//-----------------------------------------------------------------------------
assign dec_pc = ib.data_out.pc;
assign csr_imm_op = (opcode == SYSTEM) && fn3[2];
assign sys_op = (opcode == SYSTEM) && (fn3 == 0);
assign uses_rs1 = ib.data_out.uses_rs1;
assign uses_rs2 = ib.data_out.uses_rs2;
assign uses_rd = ib.data_out.uses_rd;
assign rs1_addr = ib.data_out.instruction[19:15];
assign rs2_addr = ib.data_out.instruction[24:20];
assign future_rd_addr = ib.data_out.instruction[11:7];
//Register File interface inputs
assign rf_decode.rs1_addr = rs1_addr;
assign rf_decode.rs2_addr = rs2_addr;
assign rf_decode.future_rd_addr = future_rd_addr;
assign rf_decode.instruction_issued = advance & uses_rd;
assign rf_decode.id = id_gen.issue_id;
//Issue logic
always_comb begin
case (opcode)
LUI : illegal_instruction = 1'b0;
AUIPC : illegal_instruction = 1'b0;
JAL : illegal_instruction = 1'b0;
JALR : illegal_instruction = 1'b0;
BRANCH : illegal_instruction = 1'b0;
LOAD : illegal_instruction = 1'b0;
STORE : illegal_instruction = 1'b0;
ARITH_IMM : illegal_instruction = 1'b0;
ARITH : begin
if (!USE_MUL && !USE_DIV)
illegal_instruction = ib.data_out.instruction[25];
else if (!USE_MUL && USE_DIV)
illegal_instruction = ib.data_out.instruction[25] & ~fn3[2];
else if (!USE_MUL && !USE_DIV)
illegal_instruction = ib.data_out.instruction[25] & fn3[2];
else
illegal_instruction = 1'b0;
end
FENCE : illegal_instruction = 1'b0;
AMO : illegal_instruction = 1'b0;
SYSTEM : illegal_instruction = 1'b0;
default : illegal_instruction = 1'b1;
endcase
end
one_hot_to_integer #(NUM_WB_UNITS) iq_id (.one_hot(new_request), .int_out(iq.data_in.unit_id));
assign iq.data_in.rd_addr = future_rd_addr;
assign iq.data_in.id = id_gen.issue_id;
assign iq.new_issue = advance & uses_rd;
assign id_gen.advance = advance & uses_rd;
assign bt.dec_pc = ib.data_out.pc;
assign issue_valid = ib.valid & ((~uses_rd) | (uses_rd & id_gen.id_avaliable)) & ~flush; //~(|delay) &
assign operands_ready = !(
(uses_rs1 && rf_decode.rs1_conflict) ||
(uses_rs2 && rf_decode.rs2_conflict));
assign load_store_forward =((opcode == STORE) && last_ls_request_was_load && (rs2_addr == load_rd));
assign load_store_operands_ready = !(
(uses_rs1 && rf_decode.rs1_conflict) ||
(uses_rs2 && rf_decode.rs2_conflict && ~load_store_forward));
assign mult_div_op = (opcode == ARITH) && ib.data_out.instruction[25];
assign branch_compare = (opcode == BRANCH);
assign new_request[BRANCH_UNIT_ID] = ((opcode == BRANCH) || (opcode == JAL) || (opcode == JALR));
assign new_request[ALU_UNIT_ID] = (((opcode == ARITH) && ~ib.data_out.instruction[25]) || (opcode== ARITH_IMM) || (opcode == AUIPC) || (opcode == LUI));
assign new_request[LS_UNIT_ID] = (opcode == LOAD || opcode == STORE || opcode == AMO);
assign new_request[CSR_UNIT_ID] = (opcode == SYSTEM);
generate if (USE_MUL)
assign new_request[MUL_UNIT_ID] = mult_div_op & ~fn3[2] ;
else
assign new_request[MUL_UNIT_ID] = 0 ;
endgenerate
generate if (USE_DIV)
assign new_request[DIV_UNIT_ID] =mult_div_op & fn3[2] ;
else
assign new_request[DIV_UNIT_ID] = 0 ;
endgenerate
assign issue[BRANCH_UNIT_ID] = issue_valid & operands_ready & new_request[BRANCH_UNIT_ID] & (branch_ex.ready | ~uses_rd);//| ~uses_rd
assign issue[ALU_UNIT_ID] = issue_valid & operands_ready & new_request[ALU_UNIT_ID] & alu_ex.ready;
assign issue[LS_UNIT_ID] = issue_valid & load_store_operands_ready & new_request[LS_UNIT_ID] & ls_ex.ready;
assign issue[CSR_UNIT_ID] = issue_valid & operands_ready & new_request[CSR_UNIT_ID] & csr_ex.ready ;
assign issue[MUL_UNIT_ID] = issue_valid & operands_ready & new_request[MUL_UNIT_ID] & mul_ex.ready;
assign issue[DIV_UNIT_ID] = issue_valid & operands_ready & new_request[DIV_UNIT_ID] & div_ex.ready;
assign advance = |issue;
assign ib.pop = advance;
assign dec_advance = advance;
assign instruction_issued_no_rd = advance & ~uses_rd;
//----------------------------------------------------------------------------------
//ALU unit inputs
//----------------------------------------------------------------------------------
assign alu_ex.new_request_dec = issue[ALU_UNIT_ID];
always_comb begin
if ((opcode == AUIPC))
alu_rs1_data = ib.data_out.pc;
else if (opcode == LUI)
alu_rs1_data = '0;
else
alu_rs1_data = rf_decode.rs1_data;
end
always_comb begin
if ((opcode == AUIPC) || (opcode == LUI))
alu_rs2_data = {ib.data_out.instruction[31:12], 12'b0};
else if (opcode == ARITH_IMM)
alu_rs2_data = 32'(signed'(ib.data_out.instruction[31:20]));
else// ARITH instructions
alu_rs2_data = rf_decode.rs2_data;
end
assign alu_fn3 = ((opcode == AUIPC) || (opcode == LUI)) ? ADD_SUB_fn3 : fn3; //put lui and auipc through adder path
always_comb begin
case (alu_fn3)
SLT_fn3 : alu_op = ALU_SLT;
SLTU_fn3 : alu_op = ALU_SLT;
SLL_fn3 : alu_op = ALU_SHIFT;
XOR_fn3 : alu_op = ALU_LOGIC;
OR_fn3 : alu_op = ALU_LOGIC;
AND_fn3 : alu_op = ALU_LOGIC;
SRA_fn3 : alu_op = ALU_SHIFT;
ADD_SUB_fn3 : alu_op = ALU_ADD_SUB;
endcase
end
logic [XLEN-1:0] left_shift_in;
//assign left_shift_in = {<<{rf_decode.rs1_data}}; //Bit reverse not supported by Altera
always_comb begin
for (int i=0; i < XLEN; i=i+1) begin
left_shift_in[i] = rf_decode.rs1_data[XLEN-i-1];
end
end
always_ff @(posedge clk) begin
if (issue[ALU_UNIT_ID]) begin
alu_inputs.in1 <= alu_rs1_data;
alu_inputs.in2 <= alu_rs2_data;
alu_inputs.fn3 <= fn3;
alu_inputs.add <= ~((opcode == ARITH && ib.data_out.instruction[30]) || ((opcode == ARITH || opcode == ARITH_IMM) && (fn3 ==SLTU_fn3 || fn3 ==SLT_fn3)));//SUB instruction
alu_inputs.arith <= alu_rs1_data[XLEN-1] & ib.data_out.instruction[30];//shift in bit
alu_inputs.left_shift <= ~fn3[2];
alu_inputs.shifter_in <= fn3[2] ? rf_decode.rs1_data : left_shift_in;
alu_inputs.sltu <= fn3[0];//(fn3 ==SLTU_fn3);
alu_inputs.op <= alu_op;
end
end
//----------------------------------------------------------------------------------
//----------------------------------------------------------------------------------
//Load Store unit inputs
//----------------------------------------------------------------------------------
assign ls_ex.new_request_dec = issue[LS_UNIT_ID];
assign ls_offset = 32'(signed'(opcode[5] ? {ib.data_out.instruction[31:25], ib.data_out.instruction[11:7]} : ib.data_out.instruction[31:20]));
assign ls_inputs.virtual_address = rf_decode.rs1_data + ls_offset;//rf_decode.rs1_data;
assign ls_inputs.rs2 = rf_decode.rs2_data;
assign ls_inputs.pc = ib.data_out.pc;
assign ls_inputs.fn3 = ls_inputs.is_amo ? LS_W_fn3 : fn3;
//assign ls_inputs.imm = opcode[5] ? {ib.data_out.instruction[31:25], ib.data_out.instruction[11:7]} : ib.data_out.instruction[31:20];
assign ls_inputs.amo = ib.data_out.instruction[31:27];
assign ls_inputs.is_amo = (opcode == AMO);
assign ls_inputs.load = (opcode == LOAD) || ((opcode == AMO) && (ls_inputs.amo != AMO_SC)); //LR and AMO_ops perform a read operation as well
assign ls_inputs.store = (opcode == STORE);
assign ls_inputs.load_store_forward = (opcode == STORE) && rf_decode.rs2_conflict;
assign ls_inputs.id = id_gen.issue_id;
always_ff @(posedge clk) begin
if (issue[LS_UNIT_ID])
load_rd <= future_rd_addr;
end
always_ff @(posedge clk) begin
if (rst)
last_ls_request_was_load <= 0;
else if (issue[LS_UNIT_ID])
last_ls_request_was_load <= ls_inputs.load;
else if (advance && uses_rd && (load_rd == future_rd_addr))
last_ls_request_was_load <=0;
end
//----------------------------------------------------------------------------------
//----------------------------------------------------------------------------------
//Branch unit inputs
//----------------------------------------------------------------------------------
assign branch_ex.new_request_dec = issue[BRANCH_UNIT_ID];
assign branch_inputs.rs1 = rf_decode.rs1_data;
assign branch_inputs.rs2 = rf_decode.rs2_data;
assign branch_inputs.fn3 = fn3;
assign branch_inputs.dec_pc = ib.data_out.pc;
assign branch_inputs.use_signed = !((fn3 == BLTU_fn3) || (fn3 == BGEU_fn3));
assign branch_inputs.rdx0 = ~uses_rd;//(future_rd_addr == 0); jal jalr x0
assign branch_inputs.rs1_addr = rs1_addr;
assign branch_inputs.rd_addr = future_rd_addr;
assign branch_inputs.prediction = ib.data_out.prediction;
assign branch_inputs.jal = opcode[3];//(opcode == JAL);
assign branch_inputs.jalr = ~opcode[3] & opcode[2];//(opcode == JALR);
assign branch_inputs.branch_compare = (opcode[3:2] == 0) ;//(opcode == BRANCH);
assign branch_inputs.jal_imm = {ib.data_out.instruction[31], ib.data_out.instruction[19:12], ib.data_out.instruction[20], ib.data_out.instruction[30:21]};
assign branch_inputs.jalr_imm = ib.data_out.instruction[31:20];
assign branch_inputs.br_imm = {ib.data_out.instruction[31], ib.data_out.instruction[7], ib.data_out.instruction[30:25], ib.data_out.instruction[11:8]};
//----------------------------------------------------------------------------------
//----------------------------------------------------------------------------------
//CSR unit inputs
//----------------------------------------------------------------------------------
assign csr_ex.new_request_dec = issue[CSR_UNIT_ID];
always_ff @(posedge clk) begin
if (issue[CSR_UNIT_ID]) begin
csr_inputs.rs1 <= csr_imm_op ? {27'b0, rs1_addr} : rf_decode.rs1_data; //immediate mode or rs1_addr reg
csr_inputs.csr_addr <= ib.data_out.instruction[31:20];
csr_inputs.csr_op <= fn3;
end
end
//----------------------------------------------------------------------------------
//----------------------------------------------------------------------------------
//Mul Div unit inputs
//----------------------------------------------------------------------------------
assign mul_ex.new_request_dec = issue[MUL_UNIT_ID];
assign mul_inputs.rs1 = rf_decode.rs1_data;
assign mul_inputs.rs2 = rf_decode.rs2_data;
assign mul_inputs.op = fn3[1:0];
//If a subsequent div request uses the same inputs then
//don't rerun div operation
always_ff @(posedge clk) begin
if (issue[DIV_UNIT_ID]) begin
prev_div_rs1_addr <= rs1_addr;
prev_div_rs2_addr <= rs2_addr;
end
end
always_ff @(posedge clk) begin
if (rst)
prev_div_result_valid <= 0;
else if (issue[DIV_UNIT_ID] && !(rs1_addr == future_rd_addr || rs2_addr == future_rd_addr))
prev_div_result_valid <=1;
else if (advance && uses_rd && (prev_div_rs1_addr == future_rd_addr || prev_div_rs2_addr == future_rd_addr))
prev_div_result_valid <=0;
end
assign div_ex.new_request_dec = issue[DIV_UNIT_ID];
assign div_inputs.rs1 = rf_decode.rs1_data;
assign div_inputs.rs2 = rf_decode.rs2_data;
assign div_inputs.op = fn3[1:0];
assign div_inputs.reuse_result = prev_div_result_valid && (prev_div_rs1_addr == rs1_addr) && (prev_div_rs2_addr == rs2_addr);
assign div_inputs.div_zero = (rf_decode.rs2_data == 0);
//----------------------------------------------------------------------------------
always_ff @(posedge clk) begin
if(rst) begin
branch_ex.new_request <= 0;
alu_ex.new_request <= 0;
ls_ex.new_request <= 0;
csr_ex.new_request <= 0;
mul_ex.new_request <= 0;
div_ex.new_request <= 0;
end else begin
branch_ex.new_request <= issue[BRANCH_UNIT_ID];
alu_ex.new_request <= issue[ALU_UNIT_ID];
ls_ex.new_request <= issue[LS_UNIT_ID];
csr_ex.new_request <= issue[CSR_UNIT_ID];
mul_ex.new_request <= issue[MUL_UNIT_ID];
div_ex.new_request <= issue[DIV_UNIT_ID];
end
end
endmodule

147
core/div_unit.sv Normal file
View file

@ -0,0 +1,147 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module div_unit(
input logic clk,
input logic rst,
func_unit_ex_interface.unit div_ex,
input div_inputs_t div_inputs,
unit_writeback_interface.unit div_wb
);
logic div_complete;
logic div_done;
logic [31:0] quotient;
logic [31:0] remainder;
logic [31:0] result;
logic signed_divop;
logic quotient_signed;
logic remainder_signed;
logic dividend_signed;
logic divisor_signed;
logic div_abort;
logic start;
logic in_progress;
logic [31:0] complementerA;
logic [31:0] complementerB;
logic negateA;
logic negateB;
logic [31:0] inA;
logic [31:0] inB;
logic [31:0] div_result_muxed;
logic [31:0] div_result_sign_corrected;
logic [31:0] wb_div_result;
div_inputs_t stage1;
fifo_interface #(.DATA_WIDTH($bits(div_inputs_t))) input_fifo();
fifo_interface #(.DATA_WIDTH(XLEN)) wb_fifo();
/*********************************
* Input FIFO
*********************************/
lutram_fifo #(.DATA_WIDTH($bits(div_inputs_t)), .FIFO_DEPTH(DIV_INPUT_BUFFER_DEPTH)) div_input_fifo (.fifo(input_fifo), .*);
assign input_fifo.data_in = div_inputs;
assign input_fifo.push = div_ex.new_request_dec;
assign div_ex.ready = ~input_fifo.full;
assign input_fifo.pop = div_done;
assign stage1 = input_fifo.data_out;
/*********************************************/
assign start = input_fifo.valid & ( ~in_progress);
//Abort prevents divider circuit from starting in the case that we are done in one cycle
assign div_abort = input_fifo.valid & (stage1.div_zero | stage1.reuse_result);
assign div_done = (div_complete | div_abort) & ~wb_fifo.full;
//If more than one cycle, set in_progress so that multiple start signals are not sent to the div unit. Also in progress if an abort occurs but the output FIFO is full
always_ff @(posedge clk) begin
if (rst) begin
in_progress <= 0;
end else if (start & ((div_abort & wb_fifo.full) | (~div_abort))) begin
in_progress <= 1;
end else if (div_done) begin
in_progress <= 0;
end
end
//Input and output sign determination
assign signed_divop = ~stage1.op[0];
assign dividend_signed = signed_divop & stage1.rs1[31];
assign divisor_signed = signed_divop & stage1.rs2[31];
assign quotient_signed = signed_divop & (stage1.rs1[31] ^ stage1.rs2[31]);
assign remainder_signed = signed_divop & (stage1.rs1[31]);
// Shared adders for sign conversion of inputs and outputs as they never occur on the same cycle
//(div_complete | stage1.reuse_result) instead of div_done as other signals are not relevant for sign conversion
//************
assign inA = (div_complete | stage1.reuse_result) ? quotient : stage1.rs1;
assign inB = (div_complete | stage1.reuse_result) ? remainder : stage1.rs2;
assign negateA = (div_complete | stage1.reuse_result) ? quotient_signed : dividend_signed;
assign negateB = (div_complete | stage1.reuse_result) ? remainder_signed : divisor_signed;
assign complementerA = (negateA ? ~inA : inA) + negateA;
assign complementerB = (negateB ? ~inB : inB) + negateB;
//*************
//Synthesis time algorithm choice for divider
generate
if(USE_VARIABLE_LATENCY_DIV)
quickdiv #(XLEN) div (.*, .start(start & ~div_abort), .A(complementerA), .B(complementerB), .Q(quotient), .R(remainder), .complete(div_complete), .ack(div_done));
else
normdiv #(XLEN) div (.*, .start(start & ~div_abort), .A(complementerA), .B(complementerB), .Q(quotient), .R(remainder), .complete(div_complete), .ack(div_done));
endgenerate
//Output muxing
always_comb begin
case (stage1.op)
DIV_fn3[1:0] : div_result_muxed <= stage1.div_zero ? '1 : complementerA;
DIVU_fn3[1:0] : div_result_muxed <= stage1.div_zero ? '1 : complementerA;
REM_fn3[1:0] : div_result_muxed <=stage1.div_zero ? stage1.rs1 : complementerB;
REMU_fn3[1:0] : div_result_muxed <= stage1.div_zero ? stage1.rs1 : complementerB;
endcase
end
/*********************************
* Output FIFO
*********************************/
lutram_fifo #(.DATA_WIDTH(XLEN), .FIFO_DEPTH(DIV_OUTPUT_BUFFER_DEPTH)) output_fifo (.fifo(wb_fifo), .*);
assign wb_fifo.data_in = div_result_muxed;
assign wb_fifo.push = div_done;
assign wb_fifo.pop = div_wb.accepted;
assign div_wb.rd = wb_fifo.data_out;
assign div_wb.done = wb_fifo.valid;
assign div_wb.early_done = wb_fifo.early_valid;//div_done | (div_wb.done & ~div_wb.accepted);
/*********************************************/
endmodule

110
core/dtag_banks.sv Normal file
View file

@ -0,0 +1,110 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module dtag_banks(
input logic clk,
input logic rst,
input logic[31:0] stage1_addr,
input logic[31:0] stage2_addr,
input logic[31:0] inv_addr,
input logic[0:DCACHE_WAYS-1] update_way,
input logic update,
input logic stage1_adv,
input logic stage1_inv,
input logic extern_inv,
output logic extern_inv_complete,
output tag_hit,
output logic[0:DCACHE_WAYS-1] tag_hit_way
);
typedef logic [DCACHE_TAG_W : 0] dtag_entry_t;
function logic[DCACHE_TAG_W-1:0] getTag(logic[31:0] addr);
return addr[31 : 32 - DCACHE_TAG_W];
endfunction
function logic[DCACHE_LINE_ADDR_W-1:0] getLineAddr(logic[31:0] addr);
return addr[DCACHE_LINE_ADDR_W + DCACHE_SUB_LINE_ADDR_W + 1 : DCACHE_SUB_LINE_ADDR_W + 2];
endfunction
dtag_entry_t tag_line[DCACHE_WAYS - 1:0];
dtag_entry_t inv_tag_line[DCACHE_WAYS - 1:0];
dtag_entry_t stage2_tag;
dtag_entry_t new_tag;
logic miss_or_extern_invalidate;
logic [DCACHE_WAYS - 1:0] update_tag_way;
logic inv_tags_accessed;
logic[0:DCACHE_WAYS-1] inv_hit_way;
logic[0:DCACHE_WAYS-1] inv_hit_way_r;
logic [DCACHE_LINE_ADDR_W-1:0] update_port_addr;
assign miss_or_extern_invalidate = update | extern_inv;
assign update_port_addr = update ? getLineAddr(stage2_addr) : getLineAddr(inv_addr);
assign stage2_tag = {1'b1, getTag(stage2_addr)};
assign new_tag = {update, getTag(stage2_addr)};
always_ff @ (posedge clk) begin
if (rst)
inv_tags_accessed <= 0;
else
inv_tags_accessed <= extern_inv & ~update;
end
assign extern_inv_complete = (extern_inv & ~update) & inv_tags_accessed;
genvar i;
generate
for (i=0; i < DCACHE_WAYS; i=i+1) begin : tag_bank_gen
assign update_tag_way[i] = update_way[i] | (inv_hit_way[i] & extern_inv_complete);
tag_bank #(DCACHE_TAG_W+1, DCACHE_LINES) dtag_bank (.*,
.en_a(stage1_adv), .wen_a(stage1_inv),
.addr_a(getLineAddr(stage1_addr)),
.data_in_a('0), .data_out_a(tag_line[i]),
.en_b(miss_or_extern_invalidate), .wen_b(update_tag_way[i]),
.addr_b(update_port_addr),
.data_in_b(new_tag), .data_out_b(inv_tag_line[i])
);
assign inv_hit_way[i] = ({1'b1, getTag(inv_addr)} == inv_tag_line[i]);
assign tag_hit_way[i] = (stage2_tag == tag_line[i]);
end
endgenerate
assign tag_hit = |tag_hit_way;
endmodule

225
core/fetch.sv Normal file
View file

@ -0,0 +1,225 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module fetch(
input logic clk,
input logic rst,
input logic dec_advance,
input logic exception,
branch_table_interface.fetch bt,
ras_interface.fetch ras,
tlb_interface.mem tlb,
bram_interface.user instruction_bram,
input logic icache_on,
l1_arbiter_request_interface.requester l1_request,
l1_arbiter_return_interface.requester l1_response,
instruction_buffer_interface.fetch ib,
output logic[31:0] if2_pc,
output logic flush
);
localparam BRAM_ID = 0;
localparam ICACHE_ID = 1;
fetch_sub_unit_interface fetch_sub[1:0]();
logic cache_access;
logic bram_access;
logic mem_ready;
logic [31:0] offset;
logic [31:0] next_pc_source;
logic [31:0] next_pc;
logic [31:0] if_pc;
logic stage1_prediction;
logic new_mem_request;
logic fetch_flush;
logic new_issue;
logic mem_valid;
logic delayed_flush;
logic [31:0] stage2_phys_address;
logic stage2_valid;
logic stage2_prediction;
logic stage2_cache_access;
logic pc_valid;
logic[6:0] opcode;
logic[2:0] fn3;
logic csr_imm_op;
logic sys_op;
logic jal_jalr_x0;
assign flush = bt.flush | exception;
always_ff @(posedge clk) begin
if (rst) begin
pc_valid <= 0;
end else begin
pc_valid <= 1;
end
end
assign bt.next_pc_valid = pc_valid;
//Fetch PC
always_ff @(posedge clk) begin
if (rst) begin
if_pc <= RESET_VEC;
stage1_prediction <= 0;
end
else if (new_mem_request | flush) begin
if_pc <= {next_pc[31:2], 2'b0};
stage1_prediction <= bt.use_prediction & bt.prediction;
end
end
always_comb begin
if (exception)
next_pc = RESET_VEC;
else if (bt.flush)
next_pc = bt.branch_taken ? bt.jump_pc : bt.njump_pc;
else if (bt.use_prediction) begin
if (bt.use_ras & ras.valid)
next_pc = ras.addr;
else
next_pc = bt.predicted_pc;
end
else
next_pc = if_pc + 4;
end
assign bt.new_mem_request = new_mem_request | bt.flush;
assign bt.next_pc = next_pc;
assign if2_pc = if_pc;
assign bt.if_pc = if_pc;
/*************************************
* TLB
*************************************/
assign tlb.virtual_address = if_pc;
assign tlb.execute = 1;
assign tlb.rnw = 0;
always_ff @(posedge clk) begin
if(rst)
stage2_valid <= 0;
if (new_mem_request)
stage2_valid <= new_mem_request;
else if (new_issue)
stage2_valid <= 0;
end
always_ff @(posedge clk) begin
if (new_mem_request) begin
stage2_phys_address <= tlb.physical_address;
stage2_prediction <= stage1_prediction;//not taken if no valid prediction
stage2_cache_access <= cache_access;
end
end
//////////////////////////////////////////////
//Cache check done before cache access
assign cache_access = tlb.physical_address[31:32-MEMORY_BIT_CHECK] == MEMORY_ADDR_L[31:32-MEMORY_BIT_CHECK];
//BRAM check can be done a cycle later, can be used for address checking
assign bram_access = stage2_phys_address[31:32-SCRATCH_BIT_CHECK] == SCRATCH_ADDR_L[31:32-SCRATCH_BIT_CHECK];
assign mem_ready = fetch_sub[ICACHE_ID].ready;
assign fetch_flush = (bt.flush | exception);
assign new_mem_request = pc_valid & tlb.complete & ~fetch_flush & ((stage2_valid & ~ib.early_full) | (~stage2_valid & ~ib.full)) & mem_ready;
assign fetch_sub[BRAM_ID].new_request = new_mem_request;
assign fetch_sub[ICACHE_ID].new_request = new_mem_request & cache_access;
assign fetch_sub[BRAM_ID].stage1_addr = tlb.physical_address;
assign fetch_sub[ICACHE_ID].stage1_addr = tlb.physical_address;
assign fetch_sub[BRAM_ID].stage2_addr = stage2_phys_address;
assign fetch_sub[ICACHE_ID].stage2_addr = stage2_phys_address;
//Memory interfaces
generate if (USE_SCRATCH_MEM)
ibram i_bram (.*, .fetch_sub(fetch_sub[BRAM_ID]));
else
assign fetch_sub[BRAM_ID].ready = 1;
endgenerate
generate if (USE_ICACHE)
icache i_cache (.*, .fetch_sub(fetch_sub[ICACHE_ID]));
else
assign fetch_sub[ICACHE_ID].ready = 1;
endgenerate
//TODO potentially move support into cache so that we're not stalled on a request we no longer need due to a flush
//If the cache is processing a miss when a flush occurs we need to discard the result once complete
always_ff @(posedge clk) begin
if (rst)
delayed_flush <= 0;
else if ((bt.flush | exception) & stage2_cache_access & ~fetch_sub[ICACHE_ID].data_valid)//& ~fetch_sub[ICACHE_ID].ready
delayed_flush <= 1;
else if (fetch_sub[ICACHE_ID].data_valid)
delayed_flush <= 0;
end
assign mem_valid = ~(bt.flush | exception | delayed_flush);
assign new_issue = mem_valid & ((fetch_sub[BRAM_ID].data_valid & ~stage2_cache_access) | fetch_sub[ICACHE_ID].data_valid);
assign ib.push = new_issue;
assign ib.flush = bt.flush;
assign ib.data_in.instruction =
({32{~stage2_cache_access}} & fetch_sub[BRAM_ID].data_out) |
({32{stage2_cache_access}} & fetch_sub[ICACHE_ID].data_out);
assign ib.data_in.pc = stage2_phys_address;
assign ib.data_in.prediction = stage2_prediction;
//Early decode
assign fn3 =ib.data_in.instruction[14:12];
assign opcode = ib.data_in.instruction[6:0];
assign csr_imm_op = (opcode == SYSTEM) && fn3[2];
assign sys_op = (opcode == SYSTEM) && (fn3 == 0);
assign jal_jalr_x0 = ((opcode == JAL) || (opcode == JALR)) && (ib.data_in.instruction[11:7] == 0);
assign ib.data_in.uses_rs1 = !((opcode == LUI) || (opcode == AUIPC) || (opcode == JAL) || (opcode == FENCE) || csr_imm_op || sys_op);
assign ib.data_in.uses_rs2 = ((opcode == BRANCH) || (opcode == STORE) || (opcode == ARITH) || (opcode == AMO));
assign ib.data_in.uses_rd = !((opcode == BRANCH) || (opcode == STORE) || (opcode == FENCE) || sys_op || jal_jalr_x0);
endmodule

59
core/ibram.sv Normal file
View file

@ -0,0 +1,59 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module ibram(
input logic clk,
input logic rst,
fetch_sub_unit_interface.sub_unit fetch_sub,
bram_interface.user instruction_bram
);
logic stage2_adv;
logic address_range_valid;
assign fetch_sub.ready = 1;
always_ff @ (posedge clk) begin
if (rst) begin
stage2_adv <= 0;
end
else begin
stage2_adv <= fetch_sub.new_request;
end
end
assign instruction_bram.addr = fetch_sub.stage1_addr[31:2];
assign instruction_bram.en = fetch_sub.new_request;
assign instruction_bram.be = '0;
assign instruction_bram.data_in = '0;
assign fetch_sub.data_out = instruction_bram.data_out;
always_ff @ (posedge clk) begin
if (rst)
fetch_sub.data_valid <= 0;
else if (fetch_sub.new_request)
fetch_sub.data_valid <= 1;
else
fetch_sub.data_valid <= 0;
end
endmodule

200
core/icache.sv Normal file
View file

@ -0,0 +1,200 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module icache(
input logic clk,
input logic rst,
input logic icache_on,
l1_arbiter_request_interface.requester l1_request,
l1_arbiter_return_interface.requester l1_response,
fetch_sub_unit_interface.sub_unit fetch_sub
);
logic tag_hit;
logic [ICACHE_WAYS-1:0] tag_hit_way;
logic tag_update;
logic [ICACHE_WAYS-1:0] replacement_way;
logic [ICACHE_WAYS-1:0] tag_update_way;
logic [$clog2(ICACHE_LINE_W)-1:0] word_count;
logic line_complete;
logic [31:0] data_out [ICACHE_WAYS-1:0];
logic [31:0] miss_data;
logic miss;
logic second_cycle;
logic idle;
logic memory_complete;
logic hit_allowed;
/*************************************
* General Control Logic
*************************************/
always_ff @ (posedge clk) begin
if (rst) begin
hit_allowed <= 0;
end
else begin
hit_allowed <= fetch_sub.new_request & icache_on;
end
end
always_ff @ (posedge clk) begin
if (rst)
second_cycle <= 0;
else
second_cycle <= fetch_sub.new_request;
end
always_ff @ (posedge clk) begin
if (rst | memory_complete)
miss <= 0;
else if (second_cycle)
miss <= ~tag_hit;
end
always_ff @ (posedge clk) begin
if (rst)
tag_update <= 0;
else if (second_cycle)
tag_update <= icache_on & ~tag_hit; //Cache enabled, read miss
else
tag_update <= 0;
end
/*************************************
* L1 Arbiter Interface
*************************************/
assign l1_request.addr = fetch_sub.stage2_addr;
assign l1_request.data = 0;
assign l1_request.rnw = 1;
assign l1_request.be = 0;
assign l1_request.size = (ICACHE_LINE_W-1);
assign l1_request.is_amo = 0;
assign l1_request.amo = 0;
always_ff @ (posedge clk) begin
if (rst)
word_count <= 0;
else if (l1_response.data_valid)
word_count <= word_count + 1;
end
//request registered
always_ff @ (posedge clk) begin
if (rst)
l1_request.request <= 0;
else if (second_cycle)
l1_request.request <= ~tag_hit | ~icache_on;
else if (l1_request.ack)
l1_request.request <= 0;
end
/*************************************
* Cache Components
*************************************/
//Free running one hot cycler.
cycler #(ICACHE_WAYS) replacement_policy (.*, .en(1'b1), .one_hot(replacement_way));
always_ff @ (posedge clk) begin
if (second_cycle) begin
tag_update_way<= replacement_way;
end
end
//Tag banks
itag_banks icache_tag_banks (.*,
.stage1_addr(fetch_sub.stage1_addr),
.stage2_addr(fetch_sub.stage2_addr),
.update_way(tag_update_way),
.update(tag_update),
.stage1_adv(fetch_sub.new_request)
);
//Data Banks
genvar i;
generate
for (i=0; i < ICACHE_WAYS; i=i+1) begin : data_bank_gen
byte_en_BRAM #(ICACHE_LINES*ICACHE_LINE_W) data_bank (
.clk(clk),
.addr_a(fetch_sub.stage1_addr[ICACHE_LINE_ADDR_W+ICACHE_SUB_LINE_ADDR_W+2-1:2]),
.addr_b({fetch_sub.stage2_addr[ICACHE_LINE_ADDR_W+ICACHE_SUB_LINE_ADDR_W+2-1:ICACHE_SUB_LINE_ADDR_W+2], word_count}),
.en_a(fetch_sub.new_request),
.en_b(tag_update_way[i] & l1_response.data_valid),
.be_a('0),
.be_b({4{l1_response.data_valid}}),
.data_in_a('0),
.data_in_b(l1_response.data),
.data_out_a(data_out[i]),
.data_out_b()
);
end
endgenerate
/*************************************
* Output Muxing
*************************************/
always_ff @ (posedge clk) begin
if (l1_response.data_valid && fetch_sub.stage2_addr[ICACHE_SUB_LINE_ADDR_W+1:2] == word_count)
miss_data <= l1_response.data;
end
always_comb begin
fetch_sub.data_out = miss_data & {32{miss}};
for (int i =0; i < ICACHE_WAYS; i++) begin
fetch_sub.data_out = fetch_sub.data_out | (data_out[i] & {32{tag_hit_way[i]}});
end
end
/*************************************
* Pipeline Advancement
*************************************/
assign line_complete = (l1_response.data_valid && (word_count == (ICACHE_LINE_W-1)));
always_ff @ (posedge clk) begin
if (rst)
memory_complete <= 0;
else if (fetch_sub.new_request | memory_complete)
memory_complete <= 0;
else if (line_complete) //read miss OR write through complete
memory_complete <= 1;
end
assign fetch_sub.data_valid = memory_complete | (hit_allowed & tag_hit);
assign fetch_sub.ready = (hit_allowed & tag_hit) | memory_complete | idle;//~(second_cycle & ~tag_hit) & ~miss;
always_ff @ (posedge clk) begin
if (rst)
idle <= 1;
else if (fetch_sub.new_request)
idle <= 0;
else if (memory_complete | (hit_allowed & tag_hit)) //read miss OR write through complete
idle <= 1;
end
endmodule

65
core/id_generator.sv Normal file
View file

@ -0,0 +1,65 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module id_generator (
input logic clk,
input logic rst,
id_generator_interface.generator id_gen
);
logic inuse [0:INFLIGHT_QUEUE_DEPTH-1];
always_ff @ (posedge clk) begin
for (int i=0; i <INFLIGHT_QUEUE_DEPTH; i=i+1) begin
//foreach(inuse[i]) begin
if(rst)
inuse[i] <= 0;
begin
if(id_gen.advance && id_gen.issue_id == i)
inuse[i] <= 1;
else if (id_gen.complete && id_gen.complete_id == i)
inuse[i] <= 0;
end
end
end
always_comb begin
id_gen.issue_id = id_gen.complete_id;
for (int i=0; i <INFLIGHT_QUEUE_DEPTH; i=i+1) begin
//foreach(inuse[i]) begin
if(~inuse[i])
id_gen.issue_id = i;
end
end
always_comb begin
id_gen.id_avaliable = id_gen.complete;
for (int i=0; i <INFLIGHT_QUEUE_DEPTH; i=i+1) begin
//foreach(inuse[i]) begin
if(~inuse[i])
id_gen.id_avaliable = 1;
end
end
endmodule

79
core/inflight_queue.sv Normal file
View file

@ -0,0 +1,79 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
//No protection on push to full queue or pop from empty
module inflight_queue
(
input logic clk,
input logic rst,
input logic instruction_complete,
inflight_queue_interface.queue iq
);
logic[$bits(inflight_queue_packet)-1:0] shift_reg[INFLIGHT_QUEUE_DEPTH-1:0];
//implementation
assign iq.shift_pop[INFLIGHT_QUEUE_DEPTH-1] = iq.pop[INFLIGHT_QUEUE_DEPTH-1] | ~iq.valid[INFLIGHT_QUEUE_DEPTH-1];
always_comb begin
for (int i=INFLIGHT_QUEUE_DEPTH-2; i >=0; i--) begin
iq.shift_pop[i] = iq.shift_pop[i+1] | (iq.pop[i] | ~iq.valid[i]);
end
end
always_ff @ (posedge clk) begin
if (rst)
iq.valid[0] <= 0;
else if (iq.shift_pop[0])
iq.valid[0] <= iq.new_issue;
end
always_ff @ (posedge clk) begin
if (iq.shift_pop[0])
shift_reg[0] <= iq.data_in;
end
genvar i;
generate
for (i=1 ; i < INFLIGHT_QUEUE_DEPTH; i++) begin : iq_valid_g
always_ff @ (posedge clk) begin
if (rst)
iq.valid[i] <= 0;
else if (iq.shift_pop[i]) begin
iq.valid[i] <= iq.valid[i-1] & ~iq.pop[i-1];
end
end
end
endgenerate
//Data portion
assign iq.data_out[0] = shift_reg[0];
generate
for (i=1 ; i < INFLIGHT_QUEUE_DEPTH; i++) begin : shift_reg_gen
assign iq.data_out[i] = shift_reg[i];
always_ff @ (posedge clk) begin
if (iq.shift_pop[i])
shift_reg[i] <= shift_reg[i-1];
end
end
endgenerate
endmodule

View file

@ -0,0 +1,76 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
//Circular buffer for instruction buffer. Isolates push and pop signals so that critical paths can be separated
module instruction_buffer
(
input logic clk,
input logic rst,
instruction_buffer_interface.buffer ib
);
logic[$bits(instruction_buffer_packet)-1:0] shift_reg[FETCH_BUFFER_DEPTH-1:0];
logic[$bits(instruction_buffer_packet)-1:0] shift_reg_in;
instruction_buffer_packet shift_reg_out;
logic [$clog2(FETCH_BUFFER_DEPTH)-1:0] write_index;
logic [$clog2(FETCH_BUFFER_DEPTH)-1:0] read_index;
logic count_v [FETCH_BUFFER_DEPTH:0];
//implementation
always_ff @ (posedge clk) begin
if (rst | ib.flush) begin
write_index <= 0;
read_index <= 0;
end
else begin
read_index <= read_index + ib.pop;
write_index <= write_index + ib.push;
end
end
assign ib.early_full = count_v[FETCH_BUFFER_DEPTH-1] | count_v[FETCH_BUFFER_DEPTH];
assign ib.full = count_v[FETCH_BUFFER_DEPTH];
assign ib.valid = ~count_v[0];
always_ff @ (posedge clk) begin
if (rst | ib.flush) begin
count_v[0] <= 1;
for (int i = 1; i <= FETCH_BUFFER_DEPTH; i++) count_v[i] <= 0;
end
else if (ib.push & ~ib.pop)
count_v <= {count_v[FETCH_BUFFER_DEPTH-1:0], 1'b0};
else if (~ib.push & ib.pop)
count_v <= {1'b0, count_v[FETCH_BUFFER_DEPTH:1]};
end
always_ff @ (posedge clk) begin
if (ib.push)
shift_reg[write_index] <= ib.data_in;
end
assign ib.data_out = shift_reg[read_index];
endmodule

View file

@ -0,0 +1,72 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module altera_byte_enable_ram #(
parameter LINES = 8192,
parameter preload_file = "",
parameter USE_PRELOAD_FILE = 0
)
(
input logic clk,
input logic[$clog2(LINES)-1:0] addr_a,
input logic en_a,
input logic[XLEN/8-1:0] be_a,
input logic[XLEN-1:0] data_in_a,
output logic[XLEN-1:0] data_out_a,
input logic[$clog2(LINES)-1:0] addr_b,
input logic en_b,
input logic[XLEN/8-1:0] be_b,
input logic[XLEN-1:0] data_in_b,
output logic[XLEN-1:0] data_out_b
);
(* ramstyle = "no_rw_check" *) logic [3:0][7:0] ram [LINES-1:0];
initial
begin
if(USE_PRELOAD_FILE)
$readmemh(preload_file,ram, 0, LINES-1);
end
always_ff @(posedge clk) begin
if (en_a) begin
if (be_a[0]) ram[addr_a][0] <= data_in_a[7:0];
if (be_a[1]) ram[addr_a][1] <= data_in_a[15:8];
if (be_a[2]) ram[addr_a][2] <= data_in_a[23:16];
if (be_a[3]) ram[addr_a][3] <= data_in_a[31:24];
end
data_out_a <= ram[addr_a];
end
always_ff @(posedge clk) begin
if (en_b) begin
if (be_b[0]) ram[addr_b][0] <= data_in_b[7:0];
if (be_b[1]) ram[addr_b][1] <= data_in_b[15:8];
if (be_b[2]) ram[addr_b][2] <= data_in_b[23:16];
if (be_b[3]) ram[addr_b][3] <= data_in_b[31:24];
end
data_out_b <= ram[addr_b];
end
endmodule

436
core/interfaces.sv Normal file
View file

@ -0,0 +1,436 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
import l2_config_and_types::*;
interface bram_interface;
logic[29:0] addr;
logic en;
logic[XLEN/8-1:0] be;
logic[XLEN-1:0] data_in;
logic[XLEN-1:0] data_out;
modport bram (input addr, en, be, data_in, output data_out);
modport user (output addr, en, be, data_in, input data_out);
endinterface
interface branch_table_interface;
logic[31:0] if_pc;
logic[31:0] dec_pc;
logic[31:0] ex_pc;
logic [31:0] jump_pc;
logic [31:0] njump_pc;
logic[31:0] next_pc;
logic next_pc_valid;
logic branch_taken;
logic branch_ex;
logic prediction_dec;
logic is_return_ex;
logic new_mem_request;
logic[31:0] predicted_pc;
logic prediction;
logic use_prediction;
logic use_ras;
logic flush;
modport branch_table (input if_pc, dec_pc, ex_pc, next_pc, njump_pc, jump_pc, branch_taken, branch_ex, is_return_ex, prediction_dec, next_pc_valid, new_mem_request, output predicted_pc, prediction, use_prediction, use_ras, flush);
modport fetch (input predicted_pc, prediction, use_prediction, branch_taken, flush, njump_pc, jump_pc, use_ras, output if_pc, next_pc, next_pc_valid, new_mem_request);
modport decode (output dec_pc);
modport branch_unit (output branch_taken, prediction_dec, branch_ex, is_return_ex, ex_pc, njump_pc, jump_pc);
endinterface
interface func_unit_ex_interface;
logic new_request_dec;
logic new_request;
logic ready;
modport decode (input ready, output new_request_dec, new_request);
modport unit (output ready, input new_request_dec, new_request);
endinterface
interface ras_interface;
logic push;
logic pop;
logic [31:0] new_addr;
logic [31:0] addr;
logic valid;
modport branch_unit (output push, pop, new_addr);
modport self (input push, pop, new_addr, output addr, valid);
modport fetch (input addr, valid);
endinterface
// SystemVerilog does not permit dynamic indexing of arrays of interfaces so to mux between signals
// of an array of interfaces it is necessary to aggregate them in one interface and mux the aggregate signals instead
// The units instantiate a generic interface who's signal names must match those below in the unit modport
// Alternative would be to use structs instead
//interface writeback_unit_interface;
// logic done [NUM_WB_UNITS-1:0];
// logic early_done [NUM_WB_UNITS-1:0];
//
// logic accepted [NUM_WB_UNITS-1:0];
// logic [XLEN-1:0] rd [NUM_WB_UNITS-1:0];
//
// genvar i;
// for (i=0; i< NUM_WB_UNITS; i++) begin : member
// modport unit (output .done(done[i]), .early_done(early_done[i]), .rd(rd[i]), input .accepted(accepted[i]));
// end
//
// modport writeback (input done, early_done, rd, output accepted);
//endinterface
//Dummy interface can be used to compile a single unit as top-level for development purposes
interface unit_writeback_interface;
logic done ;
logic early_done;
logic accepted;
logic [XLEN-1:0] rd;
modport writeback (input done, early_done, rd, output accepted);
modport unit (output done, early_done, rd, input accepted);
endinterface
//********************************
interface csr_exception_interface;
logic valid;
exception_code_t code;
logic [31:0] pc;
logic [31:0] addr;
logic illegal_instruction; //invalid CSR, invalid CSR op, or priviledge
logic[31:0] csr_pc;
modport csr (input valid, code, pc, addr, output illegal_instruction, csr_pc);
modport econtrol (output valid, code, pc, addr, input illegal_instruction, csr_pc);
endinterface
interface csr_inputs_interface;
logic [XLEN-1:0] rs1;
logic [11:0] csr_addr;
logic system_op; //fn3 == 3'b000
logic [1:0] csr_op;
logic zero_operand;//(rs1 == x0 or zimm[4:0] == 4'b0000) and RS/RC or RSI/RCI
modport decode (output rs1, csr_addr, system_op, csr_op, zero_operand);
modport unit (input rs1, csr_addr, system_op, csr_op, zero_operand);
endinterface
interface register_file_decode_interface;
logic[4:0] future_rd_addr; //if not a storing instruction required to be zero
logic[4:0] rs1_addr;
logic[XLEN-1:0] rs1_data;
logic[4:0] rs2_addr; //if not used required to be zero
logic[XLEN-1:0] rs2_data;
instruction_id_t id;
logic rs1_conflict;
logic rs2_conflict;
logic rd_conflict;
logic instruction_issued;
modport decode (output future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, input rs1_conflict, rs2_conflict, rd_conflict, rs1_data, rs2_data);
modport unit (input future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, output rs1_conflict, rs2_conflict, rd_conflict, rs1_data, rs2_data);
endinterface
interface register_file_writeback_interface;
logic[4:0] rd_addr;
logic[4:0] rd_addr_early;
logic valid_write;
logic valid_write_early;
logic[XLEN-1:0] rd_data;
instruction_id_t id;
instruction_id_t id_early;
modport writeback (output rd_addr, rd_addr_early, valid_write, valid_write_early, rd_data, id, id_early);
modport unit (input rd_addr, rd_addr_early, valid_write, valid_write_early, rd_data, id, id_early);
endinterface
interface exception_wb_interface_wb;
logic exception;
logic [3:0] exception_code;
logic [31:0] exception_addr;
modport wb (output exception, exception_code, exception_addr);
modport csr (input exception, exception_code, exception_addr);
endinterface
interface inflight_queue_interface;
logic[INFLIGHT_QUEUE_DEPTH-1:0] pop;
logic[INFLIGHT_QUEUE_DEPTH-1:0] shift_pop;
logic new_issue;
inflight_queue_packet data_in;
inflight_queue_packet[INFLIGHT_QUEUE_DEPTH-1:0] data_out;
logic [INFLIGHT_QUEUE_DEPTH-1:0] valid;
modport queue (input pop, data_in, new_issue, output data_out, shift_pop, valid);
modport decode (output data_in, new_issue);
modport wb (input data_in, shift_pop, valid, data_out, output pop);
endinterface
interface id_generator_interface;
logic complete;
instruction_id_t complete_id;
logic advance;
instruction_id_t issue_id;
logic id_avaliable;
modport generator (input complete, complete_id, advance, output issue_id, id_avaliable);
modport decode (input issue_id, id_avaliable, output advance);
modport wb (output complete, complete_id);
endinterface
interface instruction_buffer_interface;
logic push;
logic pop;
logic flush;
instruction_buffer_packet data_in;
instruction_buffer_packet data_out;
logic valid;
logic full;
logic early_full;
modport buffer (input push, pop, flush, data_in, output data_out, valid, full, early_full);
modport fetch (input full, early_full, output push, data_in, flush);
modport decode (input valid, data_out, output pop);
//modport exception_control (output flush);
endinterface
interface fifo_interface #(parameter DATA_WIDTH = 32);//#(parameter type data_type = logic[31:0]);
logic push;
logic pop;
logic [DATA_WIDTH-1:0] data_in;
logic [DATA_WIDTH-1:0] data_out;
logic valid;
logic full;
logic empty;
logic early_full;
logic early_valid;
modport enqueue (input early_full, full, empty, output data_in, push);
modport dequeue (input early_valid, valid, data_out, output pop);
modport structure(input push, pop, data_in, output data_out, early_valid, valid, early_full, full, empty);
endinterface
interface axi_interface;
logic arready;
logic arvalid;
logic [C_M_AXI_ADDR_WIDTH-1:0] araddr;
logic [7:0] arlen;
logic [2:0] arsize;
logic [1:0] arburst;
logic [3:0] arcache;
logic [5:0] arid;
//read data
logic rready;
logic rvalid;
logic [C_M_AXI_DATA_WIDTH-1:0] rdata;
logic [1:0] rresp;
logic rlast;
logic [5:0] rid;
//Write channel
//write address
logic awready;
logic awvalid;
logic [C_M_AXI_ADDR_WIDTH-1:0] awaddr;
logic [7:0] awlen;
logic [2:0] awsize;
logic [1:0] awburst;
logic [3:0] awcache;
logic [5:0] awid;
//write data
logic wready;
logic wvalid;
logic [C_M_AXI_DATA_WIDTH-1:0] wdata;
logic [(C_M_AXI_DATA_WIDTH/8)-1:0] wstrb;
logic wlast;
//write response
logic bready;
logic bvalid;
logic [1:0] bresp;
logic [5:0] bid;
modport master (input arready, rvalid, rdata, rresp, rlast, rid, awready, wready, bvalid, bresp, bid,
output arvalid, araddr, arlen, arsize, arburst, arcache, arid, rready, awvalid, awaddr, awlen, awsize, awburst, awcache, awid,
wvalid, wdata, wstrb, wlast, bready);
modport slave (input arvalid, araddr, arlen, arsize, arburst, arcache,
rready,
awvalid, awaddr, awlen, awsize, awburst, awcache, arid,
wvalid, wdata, wstrb, wlast, awid,
bready,
output arready, rvalid, rdata, rresp, rlast, rid,
awready,
wready,
bvalid, bresp, bid);
endinterface
interface avalon_interface;
logic [31:0] addr;
logic read;
logic write;
logic [3:0] byteenable;
logic [31:0] readdata;
logic [31:0] writedata;
logic waitrequest;
logic readdatavalid;
logic writeresponsevalid;
modport master (input readdata, waitrequest, readdatavalid, writeresponsevalid,
output addr, read, write, byteenable, writedata);
modport slave (output readdata, waitrequest, readdatavalid, writeresponsevalid,
input addr, read, write, byteenable, writedata);
endinterface
interface l1_arbiter_request_interface;
logic [31:0] addr;
logic [31:0] data ;
logic rnw ;
logic [3:0] be;
logic [4:0] size;
logic is_amo;
logic [4:0] amo;
logic request;
logic ack;
function l2_request_t to_l2 (input bit[L2_SUB_ID_W-1:0] sub_id);
to_l2.addr = addr[31:2];
to_l2.rnw = rnw;
to_l2.be = be;
to_l2.is_amo = is_amo;
to_l2.amo_type_or_burst_size = is_amo ? amo : size;
to_l2.sub_id = sub_id;
endfunction
modport requester (output addr, data, rnw, be, size, is_amo, amo, request, input ack);
modport arb (import to_l2, input addr, data, rnw, be, size, is_amo, amo, request, output ack);
endinterface
interface l1_arbiter_return_interface;
logic [31:2] inv_addr;
logic inv_valid;
logic inv_ack;
logic [31:0] data;
logic data_valid;
modport requester (input inv_addr, inv_valid, data, data_valid, output inv_ack);
modport arb (output inv_addr, inv_valid, data, data_valid, input inv_ack);
endinterface
interface mmu_interface;
//From TLB
logic new_request;
logic execute;
logic rnw;
logic [31:0] virtual_address;
//TLB response
logic write_entry;
logic [19:0] new_phys_addr;
//From CSR
logic [21:0] ppn;
logic mxr; //Make eXecutable Readable
logic pum; //Protect User Memory
logic privilege;
modport mmu (input virtual_address, new_request, execute, rnw, ppn, mxr, pum, privilege, output write_entry, new_phys_addr);
modport tlb (input write_entry, new_phys_addr, output new_request, virtual_address, execute, rnw);
modport csr (output ppn, mxr, pum, privilege);
endinterface
interface tlb_interface;
logic [31:0] virtual_address;
logic new_request;
logic rnw;
logic execute;
logic complete;
logic [31:0] physical_address;
logic flush;
logic flush_complete;
modport tlb (input virtual_address, new_request, flush, rnw, execute, output complete, physical_address, flush_complete);
modport mem (output new_request, virtual_address, rnw, execute, input complete, physical_address);
modport fence (output flush, input flush_complete);
endinterface
interface ls_sub_unit_interface;
logic data_valid;
logic ready;
logic new_request;
logic ack;
modport sub_unit (input ack, new_request, output data_valid, ready);
modport ls (output ack, new_request, input data_valid, ready);
endinterface
interface fetch_sub_unit_interface;
logic [31:0] stage1_addr;
logic [31:0] stage2_addr;
logic [31:0] data_out;
logic data_valid;
logic ready;
logic new_request;
modport sub_unit (input stage1_addr, stage2_addr, new_request, output data_out, data_valid, ready);
modport fetch (output stage1_addr, stage2_addr, new_request, input data_out, data_valid, ready);
endinterface

74
core/itag_banks.sv Normal file
View file

@ -0,0 +1,74 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module itag_banks(
input logic clk,
input logic rst,
input logic[31:0] stage1_addr,
input logic[31:0] stage2_addr,
input logic[0:ICACHE_WAYS-1] update_way,
input logic update,
input logic stage1_adv,
output tag_hit,
output logic[0:ICACHE_WAYS-1] tag_hit_way
);
typedef logic [ICACHE_TAG_W : 0] itag_entry_t;
function logic[ICACHE_TAG_W-1:0] getTag(logic[31:0] addr);
return addr[31 : 32 - ICACHE_TAG_W];
endfunction
function logic[ICACHE_LINE_ADDR_W-1:0] getLineAddr(logic[31:0] addr);
return addr[ICACHE_LINE_ADDR_W + ICACHE_SUB_LINE_ADDR_W + 1 : ICACHE_SUB_LINE_ADDR_W + 2];
endfunction
itag_entry_t tag_line[0:ICACHE_WAYS - 1];
itag_entry_t stage2_tag;
assign stage2_tag = {1'b1, getTag(stage2_addr)};
genvar i;
generate
for (i=0; i < ICACHE_WAYS; i=i+1) begin : tag_bank_gen
tag_bank #(ICACHE_TAG_W+1, ICACHE_LINES) itag_bank (.*,
.en_a(stage1_adv), .wen_a('0),
.addr_a(getLineAddr(stage1_addr)),
.data_in_a('0), .data_out_a(tag_line[i]),
.en_b(update), .wen_b(update_way[i]),
.addr_b(getLineAddr(stage2_addr)),
.data_in_b(stage2_tag), .data_out_b()
);
assign tag_hit_way[i] = (stage2_tag == tag_line[i]);
end
endgenerate
assign tag_hit = |tag_hit_way;
endmodule

101
core/l1_arbiter.sv Normal file
View file

@ -0,0 +1,101 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
import l2_config_and_types::*;
module l1_arbiter
(
input logic clk,
input logic rst,
l2_requester_interface.requester l2,
output sc_complete,
output sc_success,
l1_arbiter_request_interface.arb l1_request[3:0],
l1_arbiter_return_interface.arb l1_response[3:0]
);
l2_request_t[3:0] l2_requests;
logic push_ready;
assign l2.inv_ack = l1_response[L1_DCACHE_ID].inv_ack;
assign l2.rd_data_ack = l2.rd_data_valid;
assign sc_complete = l2.con_valid;
assign sc_success = l2.con_result;
//arbiter can pop address FIFO at a different rate than the data FIFO, so check that both have space.
assign push_ready = ~l2.request_full & ~l2.data_full;
assign l1_request[L1_DCACHE_ID].ack = l1_request[L1_DCACHE_ID].request & push_ready;
assign l1_request[L1_DMMU_ID].ack = l1_request[L1_DMMU_ID].request & push_ready & ~l1_request[L1_DCACHE_ID].request;
assign l1_request[L1_ICACHE_ID].ack = l1_request[L1_ICACHE_ID].request & push_ready & ~l1_request[L1_DCACHE_ID].request & ~l1_request[L1_DMMU_ID].request;
assign l1_request[L1_IMMU_ID].ack = l1_request[L1_IMMU_ID].request & push_ready & ~l1_request[L1_DCACHE_ID].request & ~l1_request[L1_DMMU_ID].request & ~l1_request[L1_ICACHE_ID].request;
assign l2.request_push = push_ready & (l1_request[L1_DCACHE_ID].request | l1_request[L1_DMMU_ID].request | l1_request[L1_ICACHE_ID].request | l1_request[L1_IMMU_ID].request);
assign l2.wr_data_push = push_ready & l1_request[L1_DCACHE_ID].request & ~l1_request[L1_DCACHE_ID].rnw; //Assumes data cache has highest priority
always_comb begin
l2_requests[L1_DCACHE_ID].addr = l1_request[L1_DCACHE_ID].addr[31:2];
l2_requests[L1_DCACHE_ID].rnw = l1_request[L1_DCACHE_ID].rnw;
l2_requests[L1_DCACHE_ID].be = l1_request[L1_DCACHE_ID].be;
l2_requests[L1_DCACHE_ID].is_amo = l1_request[L1_DCACHE_ID].is_amo;
l2_requests[L1_DCACHE_ID].amo_type_or_burst_size = l1_request[L1_DCACHE_ID].is_amo ? l1_request[L1_DCACHE_ID].amo : l1_request[L1_DCACHE_ID].size;
l2_requests[L1_DCACHE_ID].sub_id = L1_DCACHE_ID;
end
// assign l2_requests[L1_DCACHE_ID] = l1_request[L1_DCACHE_ID].to_l2(L1_DCACHE_ID);
assign l2_requests[L1_DMMU_ID] = l1_request[L1_DMMU_ID].to_l2(L1_DMMU_ID);
assign l2_requests[L1_ICACHE_ID] = l1_request[L1_ICACHE_ID].to_l2(L1_ICACHE_ID);
assign l2_requests[L1_IMMU_ID] = l1_request[L1_IMMU_ID].to_l2(L1_IMMU_ID);
always_comb begin
if (l1_request[L1_DCACHE_ID].request)
l2.request = l2_requests[L1_DCACHE_ID];
else if (l1_request[L1_DMMU_ID].request)
l2.request = l2_requests[L1_DMMU_ID];
else if (l1_request[L1_ICACHE_ID].request)
l2.request = l2_requests[L1_ICACHE_ID];
else
l2.request = l2_requests[L1_IMMU_ID];
end
assign l2.wr_data = l1_request[L1_DCACHE_ID].data;
assign l1_response[L1_DCACHE_ID].data = l2.rd_data;
assign l1_response[L1_DMMU_ID].data = l2.rd_data;
assign l1_response[L1_ICACHE_ID].data = l2.rd_data;
assign l1_response[L1_IMMU_ID].data = l2.rd_data;
assign l1_response[L1_DCACHE_ID].data_valid = l2.rd_data_valid && (l2.rd_sub_id == L1_DCACHE_ID);
assign l1_response[L1_DMMU_ID].data_valid = l2.rd_data_valid && (l2.rd_sub_id == L1_DMMU_ID);
assign l1_response[L1_ICACHE_ID].data_valid = l2.rd_data_valid && (l2.rd_sub_id == L1_ICACHE_ID);
assign l1_response[L1_IMMU_ID].data_valid = l2.rd_data_valid && (l2.rd_sub_id == L1_IMMU_ID);
assign l1_response[L1_DCACHE_ID].inv_addr = l2.inv_addr;
assign l1_response[L1_DCACHE_ID].inv_valid = l2.inv_valid;
endmodule

314
core/load_store_unit.sv Normal file
View file

@ -0,0 +1,314 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module load_store_unit (
input logic clk,
input logic rst,
input load_store_inputs_t ls_inputs,
func_unit_ex_interface.unit ls_ex,
input logic dcache_on,
input logic clear_reservation,
tlb_interface.mem tlb,
l1_arbiter_request_interface.requester l1_request,
l1_arbiter_return_interface.requester l1_response,
input sc_complete,
input sc_success,
axi_interface.master m_axi,
avalon_interface.master m_avalon,
bram_interface.user data_bram,
output logic inorder,
unit_writeback_interface.unit ls_wb
);
localparam NUM_SUB_UNITS = 3;
localparam NUM_SUB_UNITS_W = $clog2(NUM_SUB_UNITS);
localparam BRAM_ID = 0;
localparam BUS_ID = 1;
localparam DCACHE_ID = 2;
//Should be equal to pipeline depth of longest load/store subunit
localparam ATTRIBUTES_DEPTH = 4;
typedef enum bit [2:0] {BU = 3'b000, HU = 3'b001, BS = 3'b010, HS = 3'b011, W = 3'b100} sign_type;
data_access_shared_inputs_t d_inputs;
ls_sub_unit_interface ls_sub[NUM_SUB_UNITS-1:0]();
logic issue_request;
logic data_valid;
logic load_complete;
logic [31:0] virtual_address;
logic [3:0]be;
logic [31:0] unit_muxed_load_data;
logic [31:0] aligned_load_data;
logic [31:0] final_load_data;
logic [31:0] rs2_muxed;
logic [31:0] most_recent_load;
logic [31:0] forwarded_data;
logic [31:0] previous_load;
logic [31:0] stage1_raw_data;
logic unaligned_addr;
logic bus_access;
logic bram_access;
logic cache_access;
logic [31:0] unit_data_array [NUM_SUB_UNITS-1:0];
//AMO support
//LR -- invalidates line if tag hit
//SC -- blocks until response
//AMO ops -- invalidates line if tag hit, forwards old value to writeback and updates value before writing to cache
logic reservation;
logic lr;
logic sc;
logic is_amo;
logic [4:0] amo_op;
typedef struct packed{
logic[1:0] unit_id;
logic [2:0] fn3;
logic[1:0] byte_addr;
} load_attributes_t;
load_attributes_t load_attributes_in, stage2_attr;
load_store_inputs_t stage1;
//FIFOs
fifo_interface #(.DATA_WIDTH($bits(load_store_inputs_t))) input_fifo();
fifo_interface #(.DATA_WIDTH($bits(load_attributes_t))) load_attributes();
fifo_interface #(.DATA_WIDTH(XLEN)) wb_fifo();
/////////////////////////////////////////
/*********************************
* Primary control signals
*********************************/
assign issue_request = ((stage1.load_store_forward & (data_valid | ~load_attributes.valid)) | (~stage1.load_store_forward)) & input_fifo.valid & ls_sub[DCACHE_ID].ready & ls_sub[BRAM_ID].ready & ls_sub[BUS_ID].ready;
assign data_valid = ls_sub[DCACHE_ID].data_valid | ls_sub[BRAM_ID].data_valid | ls_sub[BUS_ID].data_valid;
assign load_complete = data_valid & ~wb_fifo.full;
assign bram_access = tlb.physical_address[31:32-SCRATCH_BIT_CHECK] == SCRATCH_ADDR_L[31:32-SCRATCH_BIT_CHECK];
assign bus_access = tlb.physical_address[31:32-BUS_BIT_CHECK] == BUS_ADDR_L[31:32-BUS_BIT_CHECK];
assign cache_access = tlb.physical_address[31:32-MEMORY_BIT_CHECK] == MEMORY_ADDR_L[31:32-MEMORY_BIT_CHECK];
assign ls_sub[BRAM_ID].new_request = bram_access & issue_request;
assign ls_sub[BUS_ID].new_request = bus_access & issue_request;
assign ls_sub[DCACHE_ID].new_request = cache_access & issue_request;
assign ls_sub[BRAM_ID].ack = ls_sub[BRAM_ID].data_valid & ~wb_fifo.full;
assign ls_sub[BUS_ID].ack = ls_sub[BUS_ID].data_valid & ~wb_fifo.full;
assign ls_sub[DCACHE_ID].ack = ls_sub[DCACHE_ID].data_valid & ~wb_fifo.full;
/*********************************************/
/*********************************
* Input FIFO
*********************************/
lutram_fifo #(.DATA_WIDTH($bits(load_store_inputs_t)), .FIFO_DEPTH(LS_INPUT_BUFFER_DEPTH)) ls_input_fifo (.fifo(input_fifo), .*);
assign input_fifo.data_in = ls_inputs;
assign input_fifo.push = ls_ex.new_request_dec;
assign ls_ex.ready = ~input_fifo.full;
assign input_fifo.pop = issue_request;
assign inorder = input_fifo.valid;
assign stage1 = input_fifo.data_out;
/*********************************
* TLB interface
*********************************/
assign virtual_address = stage1.virtual_address;// + 32'(signed'(stage1.imm)); <-- In decode/issue stage
assign tlb.virtual_address = virtual_address;
assign tlb.new_request = input_fifo.valid;
assign tlb.execute = 0;
assign tlb.rnw = stage1.load & ~stage1.store;
/*********************************************/
/*********************************
* Alignment Exception
*********************************/
always_comb begin
case(stage1.fn3)
LS_H_fn3 : unaligned_addr = virtual_address[0];
LS_W_fn3 : unaligned_addr = |virtual_address[1:0];
default : unaligned_addr = 0;
endcase
end
/*********************************************/
/*********************************
* Input Processing
* (byte enables, input muxing)
*********************************/
/*Byte enable generation
* Only set on store
* SW: all bytes
* SH: upper or lower half of bytes
* SB: specific byte
*/
always_comb begin
for (integer i = 0; i < XLEN/8; i = i+ 1) begin
be[i] = stage1.store && (
((stage1.fn3 == LS_W_fn3)) ||
((stage1.fn3 == LS_H_fn3) && (virtual_address[1] == i[1])) ||
((stage1.fn3 == LS_B_fn3) && (virtual_address[1:0] == i)));
end
end
assign most_recent_load = data_valid ? final_load_data : previous_load;
assign stage1_raw_data = stage1.load_store_forward ? most_recent_load : stage1.rs2;
//AMO identification for dcache
assign lr = stage1.is_amo && (stage1.amo == AMO_LR);
assign sc = stage1.is_amo && (stage1.amo == AMO_SC);
assign is_amo = stage1.is_amo & ~(lr | sc);
assign amo_op = stage1.amo;
//Shared inputs
assign d_inputs.addr = tlb.physical_address;
assign d_inputs.load = stage1.load;
assign d_inputs.store = stage1.store;
assign d_inputs.be = be;
assign d_inputs.fn3 = stage1.fn3;
always_comb begin
unique case(stage1.fn3) //<--011, 110, 111, 100, 101 unused
LS_B_fn3 : d_inputs.data_in = {4{stage1_raw_data[7:0]}};
LS_H_fn3 : d_inputs.data_in = {2{stage1_raw_data[15:0]}};
LS_W_fn3 : d_inputs.data_in = stage1_raw_data;
endcase
end
/*********************************
* Load attributes FIFO
*********************************/
lutram_fifo #(.DATA_WIDTH($bits(load_attributes_t)), .FIFO_DEPTH(ATTRIBUTES_DEPTH)) attributes_fifo (.fifo(load_attributes), .*);
assign load_attributes.pop = load_complete;
assign load_attributes.push = issue_request & stage1.load;
assign load_attributes.data_in = load_attributes_in;
assign stage2_attr = load_attributes.data_out;
assign load_attributes_in.unit_id = cache_access ? DCACHE_ID : (bus_access ? BUS_ID : BRAM_ID);
assign load_attributes_in.fn3 = stage1.fn3;
assign load_attributes_in.byte_addr = virtual_address[1:0];
/*********************************
* Unit Instantiation
*********************************/
//BRAM
generate if (USE_SCRATCH_MEM)
dbram d_bram (.clk(clk), .rst(rst), .ls_inputs(d_inputs), .ls(ls_sub[BRAM_ID]), .data_out(unit_data_array[BRAM_ID]), .*);
else
assign ls_sub[BRAM_ID].ready = 1;
endgenerate
generate
if(FPGA_VENDOR == "xilinx") //AXI BUS
axi_master axi_bus (.clk(clk), .rst(rst), .ls_inputs(d_inputs), .size({1'b0,stage1.fn3[1:0]}), .m_axi(m_axi),.ls(ls_sub[BUS_ID]), .data_out(unit_data_array[BUS_ID])); //Lower two bits of fn3 match AXI specification for request size (byte/halfword/word)
else begin //Avalon bus
avalon_master avalon_bus(.clk(clk), .rst(rst),
.addr(m_avalon.addr),
.avread(m_avalon.read),
.avwrite(m_avalon.write),
.byteenable(m_avalon.byteenable),
.readdata(m_avalon.readdata),
.writedata(m_avalon.writedata),
.waitrequest(m_avalon.waitrequest),
.readdatavalid(m_avalon.readdatavalid),
.writeresponsevalid(m_avalon.writeresponsevalid),
.addr_in(d_inputs.addr),
.data_in(d_inputs.data_in),
.data_out(unit_data_array[BUS_ID]),
.data_valid(ls_sub[BUS_ID].data_valid),
.ready(ls_sub[BUS_ID].ready),
.new_request(ls_sub[BUS_ID].new_request),
.rnw(d_inputs.load),
.be(d_inputs.be),
.data_ack(ls_sub[BUS_ID].ack)
);
end
endgenerate
//Cache
generate if (USE_DCACHE)
dcache data_cache (.clk(clk), .rst(rst), .ls_inputs(d_inputs), .ls(ls_sub[DCACHE_ID]), .is_amo(is_amo), .use_forwarded_data( stage1.load_store_forward), .forwarded_data(most_recent_load), .data_out(unit_data_array[DCACHE_ID]), .*);
else
assign ls_sub[DCACHE_ID].ready = 1;
endgenerate
/*************************************
* Output Muxing
*************************************/
//unit mux
assign unit_muxed_load_data = unit_data_array[stage2_attr.unit_id];
//Byte select
always_comb begin
aligned_load_data[31:16] = unit_muxed_load_data[31:16];
aligned_load_data[15:8] = (stage2_attr.byte_addr == 2'b00) ? unit_muxed_load_data[15:8] : unit_muxed_load_data[31:24];
case(stage2_attr.byte_addr)
2'b00 : aligned_load_data[7:0] = unit_muxed_load_data[7:0];
2'b01 : aligned_load_data[7:0] = unit_muxed_load_data[15:8];
2'b10 : aligned_load_data[7:0] = unit_muxed_load_data[23:16];
2'b11 : aligned_load_data[7:0] = unit_muxed_load_data[31:24];
endcase
end
//Sign extending
always_comb begin
unique case(stage2_attr.fn3)
LS_B_fn3 : final_load_data = 32'(signed'(aligned_load_data[7:0]));
LS_H_fn3 : final_load_data = 32'(signed'(aligned_load_data[15:0]));
LS_W_fn3 : final_load_data = aligned_load_data;
//unused 011
L_BU_fn3 : final_load_data = 32'(unsigned'(aligned_load_data[7:0]));
L_HU_fn3 : final_load_data = 32'(unsigned'(aligned_load_data[15:0]));
//unused 110
//unused 111
endcase
end
always_ff @ (posedge clk) begin
if (data_valid)
previous_load <= final_load_data;
end
/*********************************
* Output FIFO
*********************************/
lutram_fifo #(.DATA_WIDTH(XLEN), .FIFO_DEPTH(LS_OUTPUT_BUFFER_DEPTH)) output_fifo (.fifo(wb_fifo), .*);
assign wb_fifo.data_in = final_load_data;
assign wb_fifo.push = load_complete;
assign wb_fifo.pop = ls_wb.accepted;
assign ls_wb.rd = wb_fifo.data_out;
assign ls_wb.done = wb_fifo.valid;
assign ls_wb.early_done = wb_fifo.early_valid;
/*********************************************/
endmodule

49
core/lut_ram.sv Normal file
View file

@ -0,0 +1,49 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module lut_ram #(
parameter WIDTH = 32,
parameter DEPTH = 32
)
(
input logic clk,
input logic[$clog2(DEPTH)-1:0] waddr,
input logic[$clog2(DEPTH)-1:0] raddr,
input logic ram_write,
input logic[WIDTH-1:0] new_ram_data,
output logic[WIDTH-1:0] ram_data_out
);
(* ramstyle = "MLAB, no_rw_check" *) logic [WIDTH-1:0] ram [DEPTH-1:0];
initial begin
for (integer i=0; i<DEPTH; i=i+1) begin
ram[i] = '0;
end
end
always_ff @ (posedge clk) begin
if (ram_write)
ram[waddr] <= new_ram_data;
end
assign ram_data_out = ram[raddr];
endmodule

80
core/lutram_fifo.sv Normal file
View file

@ -0,0 +1,80 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
/*
* LUT RAM FIFO implementation. Not underflow/overflow safe.
* Intended for small FIFO depths.
*/
module lutram_fifo #(parameter DATA_WIDTH = 42, parameter FIFO_DEPTH = 4)
(
input logic clk,
input logic rst,
fifo_interface.structure fifo
);
logic[DATA_WIDTH-1:0] lut_ram[FIFO_DEPTH-1:0];
logic[$clog2(FIFO_DEPTH)-1:0] write_index;
logic[$clog2(FIFO_DEPTH)-1:0] read_index;
logic count_v [FIFO_DEPTH:0];
////////////////////////////////////////////////////////
//implementation
assign fifo.data_out = lut_ram[read_index];
always_ff @ (posedge clk) begin
if (rst) begin
read_index <= '0;
write_index <= '0;
end
else begin
read_index <= read_index + fifo.pop;
write_index <= write_index + fifo.push;
end
end
assign fifo.early_full = count_v[FIFO_DEPTH-1] | count_v[FIFO_DEPTH];
assign fifo.full = count_v[FIFO_DEPTH];
assign fifo.valid = ~count_v[0];
always_ff @ (posedge clk) begin
if (fifo.push)
lut_ram[write_index] <= fifo.data_in;
end
//set bit indicates occupancy, index zero is empty.
always_ff @ (posedge clk) begin
if (rst) begin
count_v[0] <= 1;
for (int i = 1; i <= FIFO_DEPTH; i++) count_v[i] <= 0;
end
else if (fifo.push & ~fifo.pop)
count_v <= {count_v[FIFO_DEPTH-1:0], 1'b0};
else if (~fifo.push & fifo.pop)
count_v <= {1'b0, count_v[FIFO_DEPTH:1]};
end
//pushing, or more than one, or at least one and not popping
assign fifo.early_valid = fifo.push | (~count_v[0] & ~count_v[1]) | (~count_v[0] & ~fifo.pop);
endmodule

146
core/mmu.sv Normal file
View file

@ -0,0 +1,146 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module mmu
(
input logic clk,
input logic rst,
mmu_interface.mmu mmu,
l1_arbiter_request_interface.requester l1_request,
l1_arbiter_return_interface.requester l1_response,
output mmu_exception
);
typedef struct packed{
logic [11:0] ppn1;
logic [9:0] ppn0;
logic [1:0] reserved;
logic d;
logic a;
logic g;
logic u;
logic x;
logic w;
logic r;
logic v;
} pte_t;
logic [31:0] request_addr;
logic [19:0] request_addr_input_a;
logic [9:0] request_addr_input_b;
logic privilege_check;
logic permissions_check;
logic second_request;
logic access_exception;
typedef enum logic[1:0] {IDLE, REQUEST, WAIT} mmu_state_t;
mmu_state_t mmu_state;
assign l1_request.rnw = 1;
assign l1_request.be = '1;
assign l1_request.size = '0;
assign l1_request.is_amo = 0;
assign l1_request.amo = 0;
pte_t pte;
assign pte = l1_response.data;
assign mmu_exception = access_exception;
//assign request_addr = (mmu_state == IDLE) ? {mmu.ppn[19:0], 12'd0} + {mmu.virtual_address[31:22], 2'b00} : {pte.ppn1,pte.ppn0, 12'd0} + {mmu.virtual_address[21:12], 2'b00};
assign request_addr_input_a = (mmu_state == IDLE) ? mmu.ppn[19:0] : {pte.ppn1[9:0],pte.ppn0};
assign request_addr_input_b = (mmu_state == IDLE) ? mmu.virtual_address[31:22] : mmu.virtual_address[21:12];
assign request_addr = {request_addr_input_a, 12'd0} + {request_addr_input_b, 2'b00};
always_ff @ (posedge clk) begin
mmu. new_phys_addr[19:10] <= pte.ppn1[9:0];
if (~l1_request.request)
l1_request.addr <= request_addr;
if (second_request)
mmu. new_phys_addr[9:0] <= pte.ppn0;
else
mmu. new_phys_addr[9:0] <= mmu.virtual_address[21:12];
end
//Not ((user-mode and non-user page) OR (supervisor-mode and user-page and user protected))
assign privilege_check = !(
((mmu.privilege == USER) && ~pte.u) |
((mmu.privilege == SUPERVISOR) && pte.u && mmu.pum)
);
assign permissions_check = privilege_check & ((mmu.execute & pte.x) | //execute and exec bit set
(~mmu.execute & //load-store
((mmu.rnw & (pte.r | (pte.x & mmu.mxr))) | //read and (read bit set or (execute and MXR))
(~mmu.rnw & pte.w))));
always_ff @ (posedge clk) begin
if (rst) begin
mmu_state <= IDLE;
l1_request.request <= 0;
mmu.write_entry <= 0;
second_request <= 0;
access_exception <= 0;
end
else begin
unique case (mmu_state)
IDLE: begin
mmu.write_entry <= 0;
second_request <= 0;
access_exception <= 0;
if (mmu.new_request & ~mmu.write_entry) begin //~mmu.write_entry for handshaking
mmu_state <= REQUEST;
l1_request.request <= 1;
end
end
REQUEST: begin
if (l1_request.ack) begin
mmu_state <= WAIT;
l1_request.request <= 0;
end
end
WAIT: begin
if (l1_response.data_valid) begin
if (~pte.v | (~pte.r & pte.w) | (~pte.r & ~pte.x & second_request)) begin //invalid pte
mmu_state <= IDLE;
access_exception <= 1;
end
else if (pte.r | pte.x) begin //leaf pte found
mmu_state <= IDLE;
if (permissions_check)
mmu.write_entry <= 1;
else
access_exception <= 1;
end
else begin //Non-leaf pte, request next level
mmu_state <= REQUEST;
l1_request.request <= 1;
second_request <= 1;
end
end
end
endcase
end
end
endmodule

44
core/msb.sv Normal file
View file

@ -0,0 +1,44 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module msb
(
input logic [31:0] msb_input,
output logic [4:0] msb
);
logic [2:0] sub_msb [3:0];
logic [3:0] bit_found;
//Finds MSB for 4x 8-bit segments in parallel
//Is smaller and faster than checking the full width sequentially (i.e. from 0 to 31)
always_comb begin
for (int i=0; i<4; i=i+1) begin
bit_found[i] = |msb_input[i*8+:8];
sub_msb[i] = 0;
for (int j=1;j<8; j++) begin
if (msb_input[(i*8)+j])
sub_msb[i] = j;
end
end
msb = {2'b0,sub_msb[0]};
for (int i=1; i<4; i=i+1) begin
if(bit_found[i]) msb = {i[1:0],sub_msb[i]};
end
end
endmodule

66
core/mul.sv Normal file
View file

@ -0,0 +1,66 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module mul
#(
parameter CYCLES = 4
)
(
input logic clk,
input logic new_request,
input logic [1:0] op,
output logic done,
output logic [1:0] completed_op,
input logic [31:0] A,
input logic [31:0] B,
output logic [63:0] P
);
logic [32:0] A_r;
logic [32:0] B_r;
logic [65:0] result [0:CYCLES-1];
logic valid [0:CYCLES];
logic[1:0] mul_type [0:CYCLES];
logic unsigned_A_op;
logic unsigned_B_op;
assign unsigned_A_op = (op == 2'b11);
assign unsigned_B_op =op[1];
always_ff @ (posedge clk) begin
A_r <= signed'({A[31] & ~unsigned_A_op, A});
B_r <= signed'({B[31] & ~unsigned_B_op,B});
valid[0] <= new_request;
mul_type[0] <= op;
valid[1] <= valid[0];
mul_type[1] <= mul_type[0];
result[0] <= signed'(A_r) * signed'(B_r);
for (int i = 0; i < CYCLES-1; i = i+1) begin
result[i+1] <= result[i];
valid[i+2] <= valid[i+1];
mul_type[i+2] <= mul_type[i+1];
end
end
assign P = result[CYCLES-1][63:0];
assign done = valid[CYCLES];
assign completed_op = mul_type[CYCLES];
endmodule

93
core/mul_unit.sv Normal file
View file

@ -0,0 +1,93 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module mul_unit(
input logic clk,
input logic rst,
func_unit_ex_interface.unit mul_ex,
input mul_inputs_t mul_inputs,
unit_writeback_interface.unit mul_wb//writeback_unit_interface_dummy.unit mul_wb//
);
parameter MUL_CYCLES = 1;
parameter FIFO_DEPTH = 2;
logic [$clog2(FIFO_DEPTH+1)-1:0] inflight_count;
struct packed{
logic [31:0] upper;
logic [31:0] lower;
} mul_result;
logic [31:0] result;
logic [1:0] mul_done_op;
logic mul_done;
fifo_interface #(.DATA_WIDTH(XLEN)) wb_fifo();
always_ff @(posedge clk) begin
if (rst)
inflight_count <= 0;
else if (mul_ex.new_request_dec & ~mul_wb.accepted)
inflight_count <= inflight_count + 1;
else if (~mul_ex.new_request_dec & mul_wb.accepted)
inflight_count <= inflight_count - 1;
end
//Multiply pathway fully pipelined
always_ff @(posedge clk) begin
if (rst)
mul_ex.ready <= 1;
else if (mul_ex.new_request_dec && ~mul_wb.accepted && inflight_count == (FIFO_DEPTH-1))
mul_ex.ready <= 0;
else if (mul_wb.accepted)
mul_ex.ready <= 1;
end
mul #(MUL_CYCLES) multiplier (.*, .A(mul_inputs.rs1), .B(mul_inputs.rs2),
.P(mul_result), .new_request(mul_ex.new_request_dec), .op(mul_inputs.op),
.done(mul_done), .completed_op(mul_done_op));
always_comb begin
case (mul_done_op)
MUL_fn3[1:0] : result <= mul_result.lower;
MULH_fn3[1:0] : result <= mul_result.upper;
MULHSU_fn3[1:0] : result <= mul_result.upper;
MULHU_fn3[1:0] : result <= mul_result.upper;
endcase
end
/*********************************
* Output FIFO
*********************************/
lutram_fifo #(.DATA_WIDTH(XLEN), .FIFO_DEPTH(FIFO_DEPTH)) output_fifo (.fifo(wb_fifo), .*);
assign wb_fifo.data_in = result;
assign wb_fifo.push = mul_done;
assign wb_fifo.pop = mul_wb.accepted;
assign mul_wb.rd = wb_fifo.data_out;
assign mul_wb.done = wb_fifo.valid;
assign mul_wb.early_done = wb_fifo.early_valid;//mul_done | (mul_wb.done & ~mul_wb.accepted);
/*********************************************/
endmodule

88
core/normdiv.sv Normal file
View file

@ -0,0 +1,88 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module normdiv
#(
parameter C_WIDTH = 32
)
(
input logic clk,
input logic rst,
input logic start,
input logic ack,
input logic [C_WIDTH-1:0] A,
input logic [C_WIDTH-1:0] B,
output logic [C_WIDTH-1:0] Q,
output logic [C_WIDTH-1:0] R,
output logic complete
);
logic running;
logic terminate;
logic [C_WIDTH:0] new_PR;
logic [C_WIDTH:0] PR;
logic [$clog2(C_WIDTH):0] count;
assign new_PR = {1'b0, PR} - {1'b0, B};
always_ff @ (posedge clk) begin
if (start) begin
count <= C_WIDTH;
PR <= {{(C_WIDTH-2){1'b0}}, A[C_WIDTH-1]};
Q <= {A[C_WIDTH-2:0], 1'b0};
end
else if (~terminate) begin
if (new_PR[C_WIDTH]) begin
PR <= {PR[C_WIDTH-1:0], Q[C_WIDTH-1]};
Q <= {Q[C_WIDTH-2:0], 1'b0};
end
else begin
PR <= {new_PR[C_WIDTH-1:0], Q[C_WIDTH-1]};
Q <= {Q[C_WIDTH-2:0], 1'b1};
end
count <= count - 1;
end
end
assign R = PR[C_WIDTH:1];
assign terminate = (count == 0);
always_ff @ (posedge clk) begin
if (rst) begin
running <= 0;
complete <= 0;
end
else begin
if (start) begin
running <= 1;
complete <= 0;
end
else if (running & terminate) begin
running <= 0;
complete <= 1;
end
else if (ack) begin
running <= 0;
complete <= 0;
end
end
end
endmodule

View file

@ -0,0 +1,33 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module one_hot_to_integer
#(
parameter C_WIDTH = 32
)
(
input logic [C_WIDTH-1:0] one_hot,
output logic [$clog2(C_WIDTH)-1:0] int_out
);
always_comb begin
int_out = 0;
for (int i=1; i < C_WIDTH; i=i+1)
if (one_hot[i]) int_out = i;
end
endmodule

115
core/quickdiv.sv Normal file
View file

@ -0,0 +1,115 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module quickdiv
#(
parameter C_WIDTH = 32
)
(
input logic clk,
input logic rst,
input logic start,
input logic ack,
input logic [C_WIDTH-1:0] A,
input logic [C_WIDTH-1:0] B,
output logic [C_WIDTH-1:0] Q,
output logic [C_WIDTH-1:0] R,
output logic complete
);
logic running;
logic terminate;
logic [C_WIDTH:0] A1;
logic [C_WIDTH-1:0] A2;
logic [C_WIDTH - 1:0] new_R;
logic [C_WIDTH - 1:0] new_Q_bit;
logic [C_WIDTH-1:0] Q_bit1;
logic [C_WIDTH-1:0] Q_bit2;
logic [C_WIDTH-1:0] B1;
logic [C_WIDTH-1:0] B2;
logic [C_WIDTH-1:0] B_r;
logic [$clog2(C_WIDTH)-1:0] R_MSB;
logic [$clog2(C_WIDTH)-1:0] B_MSB, B_MSB_r;
logic [$clog2(C_WIDTH)-1:0] MSB_delta;
msb msb_r (.msb_input(R), .msb(R_MSB));
msb msb_b (.msb_input(B), .msb(B_MSB));
assign MSB_delta = R_MSB - B_MSB_r;
assign Q_bit1 = (1'b1 << MSB_delta);
assign Q_bit2 = {1'b0, Q_bit1[C_WIDTH-1:1]};
assign new_Q_bit = (A1[C_WIDTH] ? Q_bit2 : Q_bit1);
assign new_R = A1[C_WIDTH] ? A2 : A1[C_WIDTH-1:0];
assign B1 = (B_r << MSB_delta);
assign B2 = {1'b0,B1[C_WIDTH-1:1]};
assign A1 = R - B1;
assign A2 = R - B2;
always_ff @ (posedge clk) begin
if (rst) begin
running <= 0;
complete <= 0;
end
else begin
if (start) begin
running <= 1;
complete <= 0;
end
else if (running & terminate) begin
running <= 0;
complete <= 1;
end
else if (ack) begin
running <= 0;
complete <= 0;
end
end
end
assign terminate = (R < B_r);
always_ff @ (posedge clk) begin
B_MSB_r <= B_MSB;
end
always_ff @ (posedge clk) begin
if (start) begin
Q <= 0;
R <= A;
B_r <= B;
end
else if (~terminate) begin
for (int i=0; i < 32; i++)
if(new_Q_bit[i])
Q[i] <= 1;
R <= new_R;
end
end
endmodule

69
core/ras.sv Normal file
View file

@ -0,0 +1,69 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module ras (
input logic clk,
input logic rst,
ras_interface.self ras
);
logic[31:0] lut_ram [RAS_DEPTH-1:0];
logic[$clog2(RAS_DEPTH)-1:0] read_index;
logic[$clog2(RAS_DEPTH)-1:0] write_index;
logic valid_chain[RAS_DEPTH-1:0];
logic valid_chain_update;
///////////////////////////////////////////////////////
//For simulation purposes
initial
for (int i=0; i <RAS_DEPTH; i++) begin
lut_ram[i] = 0;
valid_chain[i] = 0;
end
///////////////////////////////////////////////////////
assign ras.addr = lut_ram[read_index];
assign ras.valid = valid_chain[read_index];
always_ff @ (posedge clk) begin
if (ras.push)
lut_ram[write_index] <= ras.new_addr;
end
//Rolls over when full, most recent calls will be correct, but calls greater than depth
//will be lost.
always_ff @ (posedge clk) begin
if (rst)
read_index <= 0;
else if (ras.push & ~ras.pop)
read_index <= write_index;
else if (ras.pop & ~ras.push)
read_index <= read_index - 1;
end
assign write_index = (ras.push & ~ras.pop) ? (read_index + valid_chain[read_index]) : read_index;
assign valid_chain_update = ras.push | ras.pop;
always_ff @ (posedge clk) begin
if (valid_chain_update)
valid_chain[write_index] <= ras.push;
end
endmodule

79
core/register_file.sv Normal file
View file

@ -0,0 +1,79 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module register_file(
input logic clk,
input logic rst,
input logic inorder,
register_file_writeback_interface.unit rf_wb,
register_file_decode_interface.unit rf_decode
);
(* ramstyle = "MLAB, no_rw_check" *) logic [XLEN-1:0] register [0:31];
logic inuse [0:31];
(* ramstyle = "MLAB, no_rw_check" *) logic [$clog2(INFLIGHT_QUEUE_DEPTH)-1:0] in_use_by [0:31];
logic rs1_feedforward;
logic rs2_feedforward;
//End of signal declarations
assign rs1_feedforward = (rf_decode.rs1_addr == rf_wb.rd_addr) && rf_wb.valid_write && (in_use_by[rf_wb.rd_addr] == rf_wb.id);
assign rs2_feedforward = (rf_decode.rs2_addr == rf_wb.rd_addr) && rf_wb.valid_write && (in_use_by[rf_wb.rd_addr] == rf_wb.id);
//Assign zero to r0 and initialize all registers to zero
initial begin
for (integer i=0; i<32; i=i+1) begin
register[i] = '0;
inuse[i] = 0;
in_use_by[i] = '0;
end
end
always_ff @ (posedge clk) begin
if (rf_wb.valid_write && rf_wb.rd_addr != 0 && (in_use_by[rf_wb.rd_addr] == rf_wb.id || inorder)) //inorder needed for case when multiple outstanding writes to this register (common pattern: load, store, load) where the first load hasn't completed by the second causes an exception. Without inorder we wouldn't commit the first load
register[rf_wb.rd_addr] <= rf_wb.rd_data;
end
always_ff @ (posedge clk) begin
if (rf_decode.instruction_issued && rf_decode.future_rd_addr != 0 )
in_use_by[rf_decode.future_rd_addr] <= rf_decode.id;
end
genvar i;
generate
for (i= 1; i < 32; i=i+1) begin : inuse_g
always_ff @ (posedge clk) begin
if (rst)
inuse[i] <= 0;
else if (rf_decode.instruction_issued && rf_decode.future_rd_addr == i)
inuse[i] <= 1;
else if ( rf_wb.valid_write && (rf_wb.rd_addr == i) && (in_use_by[rf_wb.rd_addr] == rf_wb.id))// || inorder <-- when exception has occurred
inuse[i] <= 0;
end
end
endgenerate
assign rf_decode.rs1_data = rs1_feedforward ? rf_wb.rd_data : register[rf_decode.rs1_addr];
assign rf_decode.rs2_data = rs2_feedforward ? rf_wb.rd_data : register[rf_decode.rs2_addr];
assign rf_decode.rs1_conflict = inuse[rf_decode.rs1_addr] & ~rs1_feedforward;
assign rf_decode.rs2_conflict = inuse[rf_decode.rs2_addr] & ~rs2_feedforward;
endmodule

69
core/tag_bank.sv Normal file
View file

@ -0,0 +1,69 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module tag_bank #(
parameter WIDTH = 32,
parameter LINES = 512
)
(
input logic clk,
input logic rst,
input logic[$clog2(LINES)-1:0] addr_a,
input logic[$clog2(LINES)-1:0] addr_b,
input logic en_a,
input logic en_b,
input logic wen_a,
input logic wen_b,
input logic [WIDTH-1:0] data_in_a,
input logic [WIDTH-1:0] data_in_b,
output logic [WIDTH-1:0] data_out_a,
output logic [WIDTH-1:0] data_out_b
);
(* ramstyle = "no_rw_check" *) logic [WIDTH-1:0] tag_entry [LINES-1:0];
integer i;
initial for (i=0; i<LINES; i=i+1) tag_entry[i] = 0;
always_ff @ (posedge clk) begin
if (en_a) begin
if (wen_a)
tag_entry[addr_a] <= data_in_a;
else
data_out_a <= tag_entry[addr_a];
end
end
always_ff @ (posedge clk) begin
if (en_b) begin
if (wen_b) begin
tag_entry[addr_b] <= data_in_b;
end
else begin
data_out_b <= tag_entry[addr_b];
end
end
end
endmodule

175
core/taiga.sv Normal file
View file

@ -0,0 +1,175 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module taiga (
input logic clk,
input logic rst,
bram_interface.user instruction_bram,
bram_interface.user data_bram,
axi_interface.master m_axi,
avalon_interface.master m_avalon,
l2_requester_interface.requester l2,
input logic interrupt,
//debug
output logic[31:0] if2_pc_debug,
output logic[31:0] dec_pc_debug
);
l1_arbiter_request_interface l1_request[3:0]();
l1_arbiter_return_interface l1_response[3:0]();
logic sc_complete;
logic sc_success;
branch_table_interface bt();
ras_interface ras();
register_file_decode_interface rf_decode();
alu_inputs_t alu_inputs;
load_store_inputs_t ls_inputs;
branch_inputs_t branch_inputs;
mul_inputs_t mul_inputs;
div_inputs_t div_inputs;
csr_inputs_interface csr_inputs();
func_unit_ex_interface branch_ex();
func_unit_ex_interface alu_ex();
func_unit_ex_interface ls_ex();
func_unit_ex_interface csr_ex();
func_unit_ex_interface mul_ex();
func_unit_ex_interface div_ex();
instruction_buffer_interface ib();
inflight_queue_interface iq();
id_generator_interface id_gen();
unit_writeback_interface unit_wb [NUM_WB_UNITS-1:0]();
//writeback_unit_interface unit_wb();
register_file_writeback_interface rf_wb();
csr_exception_interface csr_exception();
tlb_interface itlb();
tlb_interface dtlb();
logic tlb_on;
logic [9:0] asid;
logic return_from_exception;
mmu_interface immu();
mmu_interface dmmu();
logic inorder;
//Branch Unit and Fetch Unit
logic branch_taken;
logic [31:0] pc_offset;
logic[31:0] jalr_rs1;
logic jalr;
//Decode Unit and Fetch Unit
logic [31:0] if2_pc;
logic [31:0] instruction;
logic dec_advance;
logic flush;
logic illegal_instruction;
logic [31:0] dec_pc;
logic [31:0] pc_ex;
logic instruction_issued_no_rd;
logic instruction_complete;
assign instruction_issued = dec_advance;
assign if2_pc_debug = if2_pc;
assign dec_pc_debug = dec_pc;
/*************************************
* Memory Interface
*************************************/
generate if (USE_MMU || USE_ICACHE || USE_DCACHE)
l1_arbiter arb(.*);
endgenerate
/*************************************
* CPU Front end
*************************************/
fetch fetch_block (.*, .icache_on('1), .tlb(itlb), .l1_request(l1_request[L1_ICACHE_ID]), .l1_response(l1_response[L1_ICACHE_ID]), .exception(1'b0));
branch_table bt_block (.*);
ras ras_block(.*);
generate if (USE_MMU) begin
tlb_lut_ram #(ITLB_WAYS, ITLB_DEPTH) i_tlb (.*, .tlb(itlb), .mmu(immu));
mmu i_mmu (.*, .mmu(immu) , .l1_request(l1_request[L1_IMMU_ID]), .l1_response(l1_response[L1_IMMU_ID]), .mmu_exception());
end
else begin
assign itlb.complete = 1;
assign itlb.physical_address = itlb.virtual_address;
end
endgenerate
instruction_buffer inst_buffer(.*);
/*************************************
* Decode/Issue/Control
*************************************/
decode decode_block (.*);
register_file register_file_block (.*);
id_generator id_gen_block (.*);
inflight_queue inst_queue(.*);
/*************************************
* Units
*************************************/
branch_unit branch_unit_block (.*, .branch_wb(unit_wb[BRANCH_UNIT_ID].unit));
alu_unit alu_unit_block (.*, .alu_wb(unit_wb[ALU_UNIT_ID].unit));
load_store_unit load_store_unit_block (.*, .dcache_on(1'b1), .clear_reservation(1'b0), .tlb(dtlb), .ls_wb(unit_wb[LS_UNIT_ID].unit), .l1_request(l1_request[L1_DCACHE_ID]), .l1_response(l1_response[L1_DCACHE_ID]));
generate if (USE_MMU) begin
tlb_lut_ram #(DTLB_WAYS, DTLB_DEPTH) d_tlb (.*, .tlb(dtlb), .mmu(dmmu));
mmu d_mmu (.*, .mmu(dmmu), .l1_request(l1_request[L1_DMMU_ID]), .l1_response(l1_response[L1_DMMU_ID]), .mmu_exception());
end
else begin
assign dtlb.complete = 1;
assign dtlb.physical_address = dtlb.virtual_address;
end
endgenerate
csr_unit csr_unit_block (.*, .csr_wb(unit_wb[CSR_UNIT_ID].unit));
generate if (USE_MUL)
mul_unit mul_unit_block (.*, .mul_wb(unit_wb[MUL_UNIT_ID].unit));
endgenerate
generate if (USE_DIV)
div_unit div_unit_block (.*, .div_wb(unit_wb[DIV_UNIT_ID].unit));
endgenerate
/*************************************
* Writeback Mux
*************************************/
write_back write_back_mux (.*);
endmodule

118
core/taiga_config.sv Normal file
View file

@ -0,0 +1,118 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
package taiga_config;
parameter FPGA_VENDOR = "xilinx"; //xilinx or intel
parameter XLEN = 32;
parameter ADDR_W = 32;
parameter CPU_ID = 0;//32 bit value
parameter bit[31:0] RESET_VEC = 32'h80000000;
parameter ASIDLEN = 7;//pid
parameter PAGE_ADDR_W = 12;
parameter TIMER_W = 48; //32 days @ 100MHz
parameter USE_DIV = 1;
parameter USE_MUL = 1;
parameter USE_VARIABLE_LATENCY_DIV = 1;
parameter NUM_WB_UNITS = 6;
parameter WB_UNITS_WIDTH = $clog2(NUM_WB_UNITS);
typedef enum {//bit [WB_UNITS_WIDTH-1:0] {
ALU_UNIT_ID = 0,
BRANCH_UNIT_ID=1,
CSR_UNIT_ID = 2,
LS_UNIT_ID = 3,
MUL_UNIT_ID = 4,
DIV_UNIT_ID = 5
} unit_ids;
parameter INFLIGHT_QUEUE_DEPTH = 4;
parameter FETCH_BUFFER_DEPTH = 4;
parameter LS_INPUT_BUFFER_DEPTH=4;
parameter LS_OUTPUT_BUFFER_DEPTH=2;
parameter DIV_INPUT_BUFFER_DEPTH=2;
parameter DIV_OUTPUT_BUFFER_DEPTH=2;
//Address space
parameter USE_SCRATCH_MEM = 1;
parameter SCRATCH_ADDR_L = 32'h80000000;
parameter SCRATCH_ADDR_H = 32'h8000FFFF;
parameter SCRATCH_BIT_CHECK = 16;
parameter MEMORY_ADDR_L = 32'h20000000;
parameter MEMORY_ADDR_H = 32'h3FFFFFFF;
parameter MEMORY_BIT_CHECK = 4;
parameter BUS_ADDR_L = 32'h60000000;
parameter BUS_ADDR_H = 32'h6FFFFFFF;
parameter BUS_BIT_CHECK = 4;
//Bus
parameter C_M_AXI_ADDR_WIDTH = 32;
parameter C_M_AXI_DATA_WIDTH = 32;
parameter USE_MMU = 1;
//Caches
//Size in bytes: (DCACHE_LINES * DCACHE_WAYS * DCACHE_LINE_W * 4)
parameter USE_DCACHE = 1;
parameter DCACHE_LINES = 128;
parameter DCACHE_WAYS = 2;
parameter DCACHE_LINE_ADDR_W = $clog2(DCACHE_LINES);
parameter DCACHE_LINE_W = 8; //In words
parameter DCACHE_SUB_LINE_ADDR_W = $clog2(DCACHE_LINE_W);
parameter DCACHE_TAG_W = ADDR_W - DCACHE_LINE_ADDR_W - DCACHE_SUB_LINE_ADDR_W - 2;
parameter DTLB_WAYS = 2;
parameter DTLB_DEPTH = 32;
//Size in bytes: (ICACHE_LINES * ICACHE_WAYS * ICACHE_LINE_W * 4)
//For optimal BRAM packing lines should not be less than 512
parameter USE_ICACHE = 1;
parameter ICACHE_LINES = 128;
parameter ICACHE_WAYS = 2;
parameter ICACHE_LINE_ADDR_W = $clog2(ICACHE_LINES);
parameter ICACHE_LINE_W = 8; //In words
parameter ICACHE_SUB_LINE_ADDR_W = $clog2(ICACHE_LINE_W);
parameter ICACHE_TAG_W = ADDR_W - ICACHE_LINE_ADDR_W - ICACHE_SUB_LINE_ADDR_W - 2;
parameter USE_BRANCH_PREDICTOR = 1;
parameter BRANCH_TABLE_ENTRIES = 1024;
parameter RAS_DEPTH = 8;
parameter ITLB_WAYS = 2;
parameter ITLB_DEPTH = 32;
typedef enum bit [1:0] {
L1_DCACHE_ID = 2'd0,
L1_DMMU_ID = 2'd1,
L1_ICACHE_ID = 2'd2,
L1_IMMU_ID = 2'd3
} l1_connection_id;
endpackage

358
core/taiga_types.sv Normal file
View file

@ -0,0 +1,358 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
package taiga_types;
import taiga_config::*;
typedef enum bit [6:0] {
LUI = 7'b0110111,
AUIPC = 7'b0010111,
JAL = 7'b1101111,
JALR = 7'b1100111,
BRANCH = 7'b1100011,
LOAD = 7'b0000011,
STORE = 7'b0100011,
ARITH_IMM = 7'b0010011,
ARITH = 7'b0110011,//includes mul/div
FENCE = 7'b0001111,
AMO = 7'b0101111,
SYSTEM = 7'b1110011
//end of RV32I
} opcodes_t;
typedef enum bit [2:0] {
ADD_SUB_fn3 = 3'b000,
SLL_fn3 = 3'b001,
SLT_fn3 = 3'b010,
SLTU_fn3 = 3'b011,
XOR_fn3 = 3'b100,
OR_fn3 = 3'b110,
SRA_fn3 = 3'b101,
AND_fn3 = 3'b111
} fn3_arith_t;
typedef enum bit [2:0] {
LS_B_fn3 = 3'b000,
LS_H_fn3 = 3'b001,
LS_W_fn3 = 3'b010,
//unused 011
L_BU_fn3 = 3'b100,
L_HU_fn3 = 3'b101
//unused 110
//unused 111
} fn3_ls_t;
typedef enum bit [2:0] {
BEQ_fn3 = 3'b000,
BNE_fn3 = 3'b001,
//010 unused
//011 unused
BLT_fn3 = 3'b100,
BGE_fn3 = 3'b101,
BLTU_fn3 = 3'b110,
BGEU_fn3 = 3'b111
} fn3_branch_t;
typedef enum bit [2:0] {
MUL_fn3 = 3'b000,
MULH_fn3 = 3'b001,
MULHSU_fn3 = 3'b010,
MULHU_fn3 = 3'b011,
DIV_fn3 = 3'b100,
DIVU_fn3 = 3'b101,
REM_fn3 = 3'b110,
REMU_fn3 = 3'b111
} fn3_mul_div_t;
typedef enum bit [11:0] {
//Machine info
MVENDORID = 12'hF11,
MARCHID = 12'hF12,
MIMPID = 12'hF13,
MHARTID = 12'hF14,
//Machine trap setup
MSTATUS = 12'h300,
MISA = 12'h301,
MEDELEG = 12'h302,
MIDELEG = 12'h303,
MIE = 12'h304,
MTVEC = 12'h305,
//Machine trap handling
MSCRATCH = 12'h340,
MEPC = 12'h341,
MCAUSE = 12'h342,
MBADADDR = 12'h343,
MIP = 12'h344,
//Machine Counters
MCYCLE = 12'hB00,
MTIME = 12'hB01,
MINSTRET = 12'hB02,
MCYCLEH = 12'hB80,
MTIMEH = 12'hB81,
MINSTRETH = 12'hB82,
//Machine Counter setup
MUCOUNTEREN = 12'h310,
MSCOUNTEREN = 12'h311,
//Deltas
MUCYCLE_DELTA = 12'h700,
MUTIME_DELTA = 12'h701,
MUINSTRET_DELTA = 12'h702,
MSCYCLE_DELTA = 12'h704,
MSTIME_DELTA = 12'h705,
MSINSTRET_DELTA = 12'h706,
MUCYCLE_DELTAH = 12'h780,
MUTIME_DELTAH = 12'h781,
MUINSTRET_DELTAH = 12'h782,
MSCYCLE_DELTAH = 12'h784,
MSTIME_DELTAH = 12'h785,
MSINSTRET_DELTAH = 12'h786,
//Supervisor regs
//Supervisor Trap Setup
SSTATUS = 12'h100,
SEDELEG = 12'h102,
SIDELEG = 12'h103,
SIE = 12'h104,
STVEC = 12'h105,
//Supervisor trap handling
SSCRATCH = 12'h140,
SEPC = 12'h141,
SCAUSE = 12'hD42,
SBADADDR = 12'hD43,
SIP = 12'h144,
//Supervisor Protection and Translation
SPTBR = 12'h180,
//Supervisor counters
SCYCLE = 12'hD00,
STIME = 12'hD01,
SINSTRET = 12'hD02,
SCYCLEH = 12'hD80,
STIMEH = 12'hD81,
SINSTRETH = 12'hD82,
//User regs
//USER Floating Point
FFLAGS = 12'h001,
FRM = 12'h002,
FCSR = 12'h003,
//User Counter Timers
CYCLE = 12'hC00,
TIME = 12'hC01,
INSTRET = 12'hC02,
CYCLEH = 12'hC80,
TIMEH = 12'hC81,
INSTRETH = 12'hC82
} csr_t;
typedef enum bit [2:0] {
NONCSR_fn3 = 3'b000,
RW_fn3 = 3'b001,
RS_fn3 = 3'b010,
RC_fn3 = 3'b011,
// unused 3'b100,
RWI_fn3 = 3'b101,
RSI_fn3 = 3'b110,
RCI_fn3 = 3'b111
} fn3_csr_t;
typedef enum bit [1:0] {
CSR_RW = 2'b01,
CSR_RS = 2'b10,
CSR_RC = 2'b11
} csr_op_t;
const bit[1:0] CSR_READ_ONLY = 2'b11;
typedef enum bit [1:0] {
USER = 2'b00,
SUPERVISOR = 2'b01,
HYPERVISOR =2'b10,
MACHINE = 2'b11
} privilege_t;
typedef enum bit [4:0] {
BARE = 5'd0,
SV32 = 5'd8
} vm_t;
typedef enum bit [3:0] {
INST_ADDR_MISSALIGNED = 4'd0,
INST_FAULT = 4'd1,
ILLEGAL_INST = 4'd2,
BREAK = 4'd3,
LOAD_ADDR_MISSALIGNED = 4'd4,
LOAD_FAULT = 4'd5,
STORE_AMO_ADDR_MISSALIGNED = 4'd6,
STORE_AMO_FAULT = 4'd7,
ECALL_U = 4'd8,
ECALL_S = 4'd9,
ECALL_H = 4'd10,
ECALL_M = 4'd11
} exception_code_t;
parameter ECODE_W = 4;
typedef enum bit [3:0] {
U_SOFTWARE_INTERRUPT = 4'd0,
S_SOFTWARE_INTERRUPT = 4'd1,
H_SOFTWARE_INTERRUPT = 4'd2,
M_SOFTWARE_INTERRUPT = 4'd3,
U_TIMER_INTERRUPT = 4'd4,
S_TIMER_INTERRUPT = 4'd5,
H_TIMER_INTERRUPT = 4'd6,
M_TIMER_INTERRUPT = 4'd7,
U_EXTERNAL_INTERRUPT = 4'd8,
S_EXTERNAL_INTERRUPT = 4'd9,
H_EXTERNAL_INTERRUPT = 4'd10,
M_EXTERNAL_INTERRUPT = 4'd11
} interrupt_code_t;
typedef enum bit [1:0] {
ALU_SLT = 2'b00,
ALU_LOGIC = 2'b01,
ALU_SHIFT =2'b10,
ALU_ADD_SUB = 2'b11
} alu_op_t;
typedef logic[$clog2(INFLIGHT_QUEUE_DEPTH)-1:0] instruction_id_t;
typedef struct packed{
logic [WB_UNITS_WIDTH-1:0] unit_id;
logic [4:0] rd_addr;
instruction_id_t id;
} inflight_queue_packet;
typedef struct packed{
logic [31:0] instruction;
logic [31:0] pc;
logic uses_rs1;
logic uses_rs2;
logic uses_rd;
logic prediction;
} instruction_buffer_packet;
typedef struct packed{
logic [XLEN-1:0] in1;
logic [XLEN-1:0] in2;
logic [2:0] fn3;
logic add;
logic arith;
logic left_shift;
logic [XLEN-1:0] shifter_in;
logic sltu;
logic [1:0] op;
}alu_inputs_t;
typedef struct packed{
logic [XLEN-1:0] rs1;
logic [XLEN-1:0] rs2;
logic [2:0] fn3;
logic [31:0] dec_pc;
logic use_signed;
logic jal;
logic jalr;
logic rdx0;
logic[4:0] rs1_addr;
logic[4:0] rd_addr;
logic branch_compare;
logic[19:0] jal_imm;
logic[11:0] jalr_imm;
logic[11:0] br_imm;
logic prediction;
} branch_inputs_t;
typedef enum bit [4:0] {
AMO_LR = 5'b00010,
AMO_SC = 5'b00011,
AMO_SWAP = 5'b00001,
AMO_ADD = 5'b00000,
AMO_XOR = 5'b00100,
AMO_AND = 5'b01100,
AMO_OR = 5'b01000,
AMO_MIN = 5'b10000,
AMO_MAX = 5'b10100,
AMO_MINU = 5'b11000,
AMO_MAXU = 5'b11100
} amo_t;
typedef struct packed{
logic [XLEN-1:0] rs1_load;
logic [XLEN-1:0] rs2;
logic [4:0] op;
}amo_alu_inputs_t;
typedef struct packed{
logic [XLEN-1:0] virtual_address;
logic [XLEN-1:0] rs2;
logic [2:0] fn3;
logic [4:0] amo;
logic is_amo;
logic load;
logic store;
logic load_store_forward;
//exception support
logic [31:0] pc;
instruction_id_t id;
} load_store_inputs_t;
typedef struct packed{
logic [XLEN-1:0] rs1;
logic [XLEN-1:0] rs2;
logic [1:0] op;
} mul_inputs_t;
typedef struct packed{
logic [XLEN-1:0] rs1;
logic [XLEN-1:0] rs2;
logic [1:0] op;
logic reuse_result;
logic div_zero;
} div_inputs_t;
typedef struct packed{
logic [31:2] addr;
logic [31:0] data;
logic rnw;
logic [0:3] be;
logic [2:0] size;
logic con;
} to_l1_arbiter_packet;
typedef struct {
logic [31:0] addr;
logic load;
logic store;
logic [3:0] be;
logic [2:0] fn3;
logic [31:0] data_in;
} data_access_shared_inputs_t;
endpackage

141
core/tlb_lut_ram.sv Normal file
View file

@ -0,0 +1,141 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module tlb_lut_ram #(
parameter WAYS = 2,
parameter DEPTH = 32
)
(
input logic clk,
input logic rst,
input logic tlb_on,
input logic [ASIDLEN-1:0] asid,
mmu_interface.tlb mmu,
tlb_interface.tlb tlb
);
localparam TLB_TAG_W = 32-12-$clog2(DEPTH);
typedef struct packed {
logic valid;
logic [TLB_TAG_W-1:0] tag;
logic [19:0] phys_addr;
} tlb_entry_t;
logic [$clog2(DEPTH)-1:0] tlb_read_addr;
logic [$clog2(DEPTH)-1:0] tlb_write_addr;
logic [TLB_TAG_W-1:0] virtual_tag;
tlb_entry_t ram [0:DEPTH-1][0:WAYS-1];
logic [0:DEPTH-1] valid [0:WAYS-1];
logic [0:WAYS-1] tag_hit;
logic [0:WAYS-1] replacement_way;
tlb_entry_t ram_data [0:WAYS-1];
tlb_entry_t new_entry;
logic flush_in_progress;
logic [$clog2(DEPTH)-1:0] flush_addr;
logic hit;
logic [0:WAYS-1] tlb_write;
assign virtual_tag = tlb.virtual_address[31:32-TLB_TAG_W];
assign tlb_read_addr = tlb.virtual_address[$clog2(DEPTH)+11:12];
assign tlb_write_addr = tlb.flush ? flush_addr : tlb_read_addr;
assign tlb_write = tlb.flush ? {WAYS{flush_in_progress}} : (replacement_way & {WAYS{mmu.write_entry}});
assign new_entry.valid = ~tlb.flush;
assign new_entry.tag = virtual_tag;
assign new_entry.phys_addr = mmu.new_phys_addr;
genvar i;
generate
for (i=0; i<WAYS; i=i+1) begin : lut_rams
lut_ram #(.WIDTH($bits(tlb_entry_t)), .DEPTH(DEPTH)) ram (.clk(clk),
.waddr(tlb_write_addr), .ram_write(tlb_write[i]), .new_ram_data(new_entry),
.raddr(tlb_read_addr), .ram_data_out(ram_data[i]));
end
endgenerate
cycler #(.C_WIDTH(WAYS)) replacement_policy (.*, .en(1'b1), .one_hot(replacement_way));
always_ff @ (posedge clk) begin
if (rst)
flush_in_progress <= 0;
else if (tlb.flush_complete)
flush_in_progress <= 0;
else if (tlb.flush)
flush_in_progress <= 1;
end
always_ff @ (posedge clk) begin
if (rst)
flush_addr <= 0;
else if (flush_in_progress)
flush_addr <= flush_addr + 1;
end
always_ff @ (posedge clk) begin
if (rst)
tlb.flush_complete <= 0;
else
tlb.flush_complete <= (flush_addr == DEPTH - 1);
end
always_comb begin
for (integer i=0; i<WAYS; i=i+1) begin
tag_hit[i] = {ram_data[i].valid, ram_data[i].tag} == {1'b1, virtual_tag};
end
end
always_ff @ (posedge clk) begin
if (rst)
mmu.new_request <= 0;
else if (mmu.write_entry)
mmu.new_request <= 0;
else if (tlb_on & ~hit & tlb.new_request)
mmu.new_request <= 1;
end
assign mmu.virtual_address = tlb.virtual_address;
assign mmu.execute = tlb.execute;
assign mmu.rnw = tlb.rnw;
assign hit = |tag_hit;
assign tlb.complete = hit | ~tlb_on;
always_comb begin
tlb.physical_address[11:0] = tlb.virtual_address[11:0];
tlb.physical_address[31:12] = tlb.virtual_address[31:12];
for (integer i=0; i<WAYS; i=i+1) begin
if(tag_hit[i] & tlb_on) tlb.physical_address[31:12] = ram_data[i].phys_addr;
end
end
endmodule

133
core/write_back.sv Normal file
View file

@ -0,0 +1,133 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module write_back(
input logic clk,
input logic rst,
input logic inorder,
unit_writeback_interface.writeback unit_wb[NUM_WB_UNITS-1:0],
register_file_writeback_interface.writeback rf_wb,
inflight_queue_interface.wb iq,
id_generator_interface.wb id_gen,
output logic instruction_complete
);
logic done [NUM_WB_UNITS-1:0];
logic early_done [NUM_WB_UNITS-1:0];
logic accepted [NUM_WB_UNITS-1:0];
logic [XLEN-1:0] rd [NUM_WB_UNITS-1:0];
logic not_in_queue;
logic [4:0] rd_addr, rd_addr_r;
logic [WB_UNITS_WIDTH-1:0] unit_id, unit_id_r;
logic [$clog2(INFLIGHT_QUEUE_DEPTH)-1:0] iq_index, iq_index_corrected, iq_index_r;
instruction_id_t issue_id, issue_id_r;
//Re-assigning interface inputs to array types so that they can be dynamically indexed
genvar i;
generate
for (i=0; i< NUM_WB_UNITS; i++) begin : interface_to_array_g
assign done[i] = unit_wb[i].done;
assign early_done[i] = unit_wb[i].early_done;
assign rd[i] = unit_wb[i].rd;
assign unit_wb[i].accepted = accepted[i];
end
endgenerate
//Unit output selection. Oldest unit with instruction complete if in out-of-order mode, otherwise oldest unit
always_comb begin
//queue input
not_in_queue = 1;
unit_id = iq.data_in.unit_id;
issue_id = iq.data_in.id;
rd_addr = iq.data_in.rd_addr;
iq_index = 0;
//queue outputs
for (int i=0; i<INFLIGHT_QUEUE_DEPTH; i=i+1) begin
if ( (iq.valid[i] && ~iq.pop[i]) //only consider valid entries and not the one completing this cycle
&& (inorder || (~inorder && early_done[iq.data_out[i].unit_id]))) begin //if inorder set find oldest valid instruction, otherwise find oldest instruction that is done
not_in_queue = 0;
unit_id = iq.data_out[i].unit_id;
issue_id = iq.data_out[i].id;
rd_addr = iq.data_out[i].rd_addr;
iq_index = i;
end
end
end
always_ff @(posedge clk) begin
if (rst)
instruction_complete <= 0;
else
instruction_complete <= early_done[unit_id];
end
//As we decide our popping logic one cycle in advance we have to perform a correction in some cases
assign iq_index_corrected = (~not_in_queue & iq.shift_pop[iq_index]) ? iq_index + 1: iq_index;
always_ff @(posedge clk) begin
iq_index_r <= iq_index_corrected;
unit_id_r <= unit_id;
issue_id_r <= issue_id;
rd_addr_r <= rd_addr;
end
//assign instruction_complete = unit_wb.done[unit_id];//iq.data_out[iq_index].valid & unit_wb.done[unit_id];
assign rf_wb.rd_addr = rd_addr_r;
assign rf_wb.id = issue_id_r;
assign rf_wb.rd_data = rd[unit_id_r];
assign rf_wb.valid_write = instruction_complete;
assign rf_wb.rd_addr_early = rd_addr;
assign rf_wb.id_early = issue_id;
assign rf_wb.valid_write_early = early_done[unit_id];
generate
for (i=0; i<INFLIGHT_QUEUE_DEPTH; i=i+1) begin : iq_pop
always_ff @(posedge clk) begin
if (rst)
iq.pop[i] <= 0;
else
iq.pop[i] <= early_done[unit_id] && (iq_index_corrected == i);
end
end
endgenerate
generate
for (i=0; i<NUM_WB_UNITS; i=i+1) begin : wb_mux
always_ff @(posedge clk) begin
if (rst)
accepted[i] <= 0;
else
accepted[i] <= early_done[i] && (unit_id == i);
end
end
endgenerate
//ID generator signals
assign id_gen.complete = instruction_complete;
assign id_gen.complete_id = issue_id_r;
endmodule

View file

@ -0,0 +1,81 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import taiga_config::*;
import taiga_types::*;
module xilinx_byte_enable_ram #(
parameter LINES = 8192,
parameter preload_file = "",
parameter USE_PRELOAD_FILE = 0
)
(
input logic clk,
input logic[$clog2(LINES)-1:0] addr_a,
input logic en_a,
input logic[XLEN/8-1:0] be_a,
input logic[XLEN-1:0] data_in_a,
output logic[XLEN-1:0] data_out_a,
input logic[$clog2(LINES)-1:0] addr_b,
input logic en_b,
input logic[XLEN/8-1:0] be_b,
input logic[XLEN-1:0] data_in_b,
output logic[XLEN-1:0] data_out_b
);
logic [31:0] ram [LINES-1:0];
initial
begin
if(USE_PRELOAD_FILE)
$readmemh(preload_file,ram, 0, LINES-1);
end
always_ff @(posedge clk) begin
if (en_a) begin
for (int i=0; i < 4; i++) begin
if (be_a[i])
ram[addr_a][8*i+:8] <= data_in_a[8*i+:8];
end
end
end
always_ff @(posedge clk) begin
if (en_a) begin
if(~|be_a)
data_out_a <= ram[addr_a];
end
end
always_ff @(posedge clk) begin
if (en_b) begin
for (int i=0; i < 4; i++) begin
if (be_b[i])
ram[addr_b][8*i+:8] <= data_in_b[8*i+:8];
end
end
end
always_ff @(posedge clk) begin
if (en_b) begin
if(~|be_b)
data_out_b <= ram[addr_b];
end
end
endmodule

278
l2_arbiter/l2_arbiter.sv Normal file
View file

@ -0,0 +1,278 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import l2_config_and_types::*;
import taiga_types::*;
module l2_arbiter (
input logic clk,
input logic rst,
l2_requester_interface.arbiter request [L2_NUM_PORTS-1:0],
l2_memory_interface.arbiter mem
);
l2_arbitration_interface arb();
//FIFO interfaces
l2_fifo_interface #(.DATA_WIDTH($bits(l2_request_t))) input_fifos [L2_NUM_PORTS-1:0]();
l2_fifo_interface #(.DATA_WIDTH(32)) input_data_fifos [L2_NUM_PORTS-1:0]();
l2_fifo_interface #(.DATA_WIDTH(30)) inv_response_fifos [L2_NUM_PORTS-1:0]();
l2_fifo_interface #(.DATA_WIDTH(32 + L2_SUB_ID_W)) returndata_fifos [L2_NUM_PORTS-1:0]();
l2_fifo_interface #(.DATA_WIDTH($bits(l2_mem_request_t))) mem_addr_fifo();
l2_fifo_interface #(.DATA_WIDTH(32)) mem_data_fifo();
l2_fifo_interface #(.DATA_WIDTH($bits(l2_data_attributes_t))) data_attributes();
l2_fifo_interface #(.DATA_WIDTH(32 + L2_ID_W)) mem_returndata_fifo();
logic advance;
l2_request_t arb_request;
l2_mem_request_t mem_request;
logic reserv_valid;
logic reserv_lr;
logic reserv_sc;
logic reserv_store;
l2_request_t requests [L2_NUM_PORTS-1:0];
l2_request_t reserv_request;
logic [$clog2(L2_NUM_PORTS)-1:0] reserv_id;
logic [L2_NUM_PORTS-1:0] reserv_id_v;
logic write_done;
logic [4:0] burst_count;
l2_data_attributes_t new_attr;
l2_data_attributes_t current_attr;
logic [31:0] input_data [L2_NUM_PORTS-1:0];
l2_mem_return_data_t mem_return_data;
l2_return_data_t return_data [L2_NUM_PORTS-1:0];
logic [L2_NUM_PORTS-1:0] return_push;
logic wr_clk, rd_clk;
assign wr_clk = clk;
assign rd_clk = clk;
//Implementation
//************************************
/*************************************
* Input Request FIFOs
*************************************/
genvar i;
generate
for (i=0; i < L2_NUM_PORTS; i++) begin
//Requester FIFO side
assign input_fifos[i].push = request[i].request_push;
assign input_fifos[i].data_in = request[i].request;
assign input_fifos[i].pop = input_fifos[i].valid & arb.grantee_v[i] & ~mem_addr_fifo.full;
assign request[i].request_full = input_fifos[i].full;
//FIFO instantiation
l2_fifo #(.DATA_WIDTH($bits(l2_request_t)), .FIFO_DEPTH(L2_INPUT_FIFO_DEPTHS[i])) input_fifo (.*, .fifo(input_fifos[i]));
//Arbiter FIFO side
assign requests[i] = input_fifos[i].data_out;
assign arb.requests[i] = input_fifos[i].valid;
end
endgenerate
/*************************************
* Input Data FIFOs
*************************************/
generate
for (i=0; i < L2_NUM_PORTS; i++) begin
//Requester FIFO side
assign input_data_fifos[i].push = request[i].wr_data_push;
assign input_data_fifos[i].data_in = request[i].wr_data;
assign request[i].data_full = input_data_fifos[i].full;
//FIFO instantiation
l2_fifo #(.DATA_WIDTH(32), .FIFO_DEPTH(L2_INPUT_FIFO_DEPTHS[i])) input_data_fifo (.*, .fifo(input_data_fifos[i]));
//Arbiter FIFO side
assign input_data_fifos[i].pop = (data_attributes.valid && (current_attr.id == i) && ~mem_data_fifo.full);
assign input_data[i] = input_data_fifos[i].data_out;
end
endgenerate
/*************************************
* Arbitration
*************************************/
l2_round_robin rr (.*);
assign advance = arb.grantee_valid & ~mem_addr_fifo.full;
assign arb.strobe = advance;
assign mem_addr_fifo.push = advance;
assign mem_addr_fifo.pop = mem.request_pop;
assign mem.request_valid = mem_addr_fifo.valid;
assign arb_request = requests[arb.grantee_i];
assign mem_request.addr = arb_request.addr;
assign mem_request.be = arb_request.be;
assign mem_request.rnw = arb_request.rnw;
assign mem_request.is_amo = arb_request.is_amo;
assign mem_request.amo_type_or_burst_size = arb_request.amo_type_or_burst_size;
assign mem_request.id = {arb.grantee_i, arb_request.sub_id};
assign mem_addr_fifo.data_in = mem_request;
assign mem.request = mem_addr_fifo.data_out;
l2_fifo #(.DATA_WIDTH($bits(l2_mem_request_t)), .FIFO_DEPTH(L2_MEM_ADDR_FIFO_DEPTH)) input_fifo (.*, .fifo(mem_addr_fifo));
/*************************************
* Reservation Support
*************************************/
always_ff @(posedge clk) begin
if (advance) begin
reserv_request <= requests[arb.grantee_i];
reserv_id <= arb.grantee_i;
reserv_id_v <= arb.grantee_v;
end
end
always_ff @(posedge clk) begin
if (rst)
reserv_valid <= 0;
else
reserv_valid <= advance;
end
assign reserv_lr = (reserv_request.is_amo && reserv_request.amo_type_or_burst_size == AMO_LR);
assign reserv_sc = (reserv_request.is_amo && reserv_request.amo_type_or_burst_size == AMO_SC);
assign reserv_store = ~reserv_request.rnw | (reserv_request.is_amo && reserv_request.amo_type_or_burst_size != AMO_LR);
l2_reservation_logic reserv (.*,
.addr(reserv_request.addr),
.id(reserv_id),
.strobe(reserv_valid),
.lr (reserv_lr),
.sc (reserv_sc),
.store (reserv_store),
.abort(mem.abort)
);
//sc response
generate
for (i=0; i < L2_NUM_PORTS; i++) begin
always_ff @(posedge clk) begin
if (rst) begin
request[i].con_result <= 0;
request[i].con_valid <= 0;
end
else begin
request[i].con_result <= ~mem.abort;
request[i].con_valid <= reserv_sc & reserv_valid & reserv_id_v[i];
end
end
end
endgenerate
//inv response
generate
for (i=0; i < L2_NUM_PORTS; i++) begin
//Requester FIFO side
assign inv_response_fifos[i].pop = request[i].inv_ack;
assign request[i].inv_addr = inv_response_fifos[i].data_out;
assign request[i].inv_valid = inv_response_fifos[i].valid;
//FIFO instantiation
l2_fifo #(.DATA_WIDTH(30), .FIFO_DEPTH(L2_INVALIDATION_FIFO_DEPTHS[i])) inv_response_fifo (.*, .fifo(inv_response_fifos[i]));
//Arbiter side
assign inv_response_fifos[i].push = reserv_valid & reserv_store & ~reserv_id_v[i];
assign inv_response_fifos[i].data_in = requests[i].addr;
end
endgenerate
/*************************************
* Data stage
*************************************/
assign new_attr.id = reserv_id;
assign new_attr.burst_size = reserv_request.amo_type_or_burst_size;
assign new_attr.abort = mem.abort;
assign data_attributes.data_in = new_attr;
assign data_attributes.push = reserv_valid & ~reserv_request.rnw & ~mem.abort;
l2_fifo #(.DATA_WIDTH($bits(l2_data_attributes_t)), .FIFO_DEPTH(L2_DATA_ATTRIBUTES_FIFO_DEPTH)) data_attributes_fifo (.*, .fifo(data_attributes));
assign data_attributes.pop = write_done;
assign current_attr = data_attributes.data_out;
always_ff @(posedge clk) begin
if (rst)
burst_count <= 0;
else if (write_done)
burst_count <= 0;
else if (data_attributes.valid & ~mem_data_fifo.full)
burst_count <= burst_count + 1;
end
assign write_done = data_attributes.valid & ~mem_data_fifo.full & (burst_count == current_attr.burst_size);
l2_fifo #(.DATA_WIDTH($bits(32)), .FIFO_DEPTH(L2_MEM_ADDR_FIFO_DEPTH)) mem_data (.*, .fifo(mem_data_fifo));
assign mem_data_fifo.push = data_attributes.valid & ~mem_data_fifo.full & ~current_attr.abort;
assign mem_data_fifo.data_in = input_data[current_attr.id];
assign mem.wr_data = mem_data_fifo.data_out;
assign mem.wr_data_valid = mem_data_fifo.valid;
assign mem_data_fifo.pop = mem.wr_data_read;
/*************************************
* Read response
*************************************/
l2_fifo # (.DATA_WIDTH(32 + L2_ID_W), .FIFO_DEPTH(L2_MEM_ADDR_FIFO_DEPTH)) mem_returndata (.*, .fifo(mem_returndata_fifo));
assign mem_returndata_fifo.push = mem.rd_data_valid;
assign mem_returndata_fifo.data_in = {mem.rd_id, mem.rd_data};
assign mem_return_data = mem_returndata_fifo.data_out;
assign mem_returndata_fifo.pop = mem_returndata_fifo.valid;
always_comb begin
return_push = '0;
return_push[mem_return_data.id] = mem_returndata_fifo.valid;
end
generate
for (i=0; i < L2_NUM_PORTS; i++) begin
//Requester FIFO side
assign returndata_fifos[i].pop = request[i].rd_data_ack;
assign return_data[i] = returndata_fifos[i].data_out;
assign request[i].rd_data =return_data[i].data;
assign request[i].rd_sub_id = return_data[i].sub_id;
assign request[i].rd_data_valid = returndata_fifos[i].valid;
//FIFO instantiation
l2_fifo #(.DATA_WIDTH(32 + L2_SUB_ID_W), .FIFO_DEPTH(L2_READ_RETURN_FIFO_DEPTHS[i])) returndata_fifo (.*, .fifo(returndata_fifos[i]));
//Arbiter side
assign returndata_fifos[i].push = return_push[i];
assign returndata_fifos[i].data_in = {mem_return_data.sub_id, mem_return_data.data};
end
endgenerate
endmodule

View file

@ -0,0 +1,78 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
package l2_config_and_types;
parameter L2_NUM_PORTS = 2;
parameter L2_SUB_ID_W = 2;
parameter integer L2_INPUT_FIFO_DEPTHS [7 : 0] = {4, 4, 4, 4, 4, 4, 4, 4};
parameter integer L2_INVALIDATION_FIFO_DEPTHS [7 : 0] = {4, 4, 4, 4, 4, 4, 4, 4};
parameter integer L2_READ_RETURN_FIFO_DEPTHS [7 : 0] = {1, 1, 1, 1, 1, 1, 1, 1};//depth 1, rd_ack will be trimmed
parameter L2_MEM_ADDR_FIFO_DEPTH = 8;
parameter L2_DATA_ATTRIBUTES_FIFO_DEPTH = 16;//Sized larger to remove need to check full status
//Convenience derivative parameters
parameter L2_ID_W = $clog2(L2_NUM_PORTS) + L2_SUB_ID_W;
typedef struct packed{
logic [29:0] addr;
logic [3:0] be;
logic rnw;
logic is_amo;
logic [4:0] amo_type_or_burst_size;
logic [L2_SUB_ID_W-1:0] sub_id;
} l2_request_t;
typedef struct packed{
logic [29:0] addr;
logic [3:0] be;
logic rnw;
logic is_amo;
logic [4:0] amo_type_or_burst_size;
logic [L2_ID_W-1:0] id;
} l2_mem_request_t;
typedef struct packed{
logic [$clog2(L2_NUM_PORTS)-1:0] id;
logic [4:0] burst_size;
logic abort;
} l2_data_attributes_t;
typedef struct packed{
logic [$clog2(L2_NUM_PORTS)-1:0] id;
logic [L2_SUB_ID_W-1:0] sub_id;
logic [31:0] data;
} l2_mem_return_data_t;
typedef struct packed{
logic [L2_SUB_ID_W-1:0] sub_id;
logic [31:0] data;
} l2_return_data_t;
endpackage

98
l2_arbiter/l2_fifo.sv Normal file
View file

@ -0,0 +1,98 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module l2_fifo #(parameter DATA_WIDTH = 32, parameter FIFO_DEPTH = 4, parameter ASYNC = 0)
(
input logic clk,
input logic wr_clk,
input logic rd_clk,
input logic rst,
l2_fifo_interface.structure fifo
);
generate if (ASYNC) begin
end
else
begin
if (FIFO_DEPTH == 1) begin
always_ff @ (posedge clk) begin
if (rst)
fifo.valid <= 0;
else if (fifo.push)
fifo.valid <= 1;
else
fifo.valid <= 0;
end
always_ff @ (posedge clk) begin
if (fifo.push)
fifo.data_out <= fifo.data_in;
end
end
else begin
logic[DATA_WIDTH-1:0] lut_ram[FIFO_DEPTH-1:0];
logic[$clog2(FIFO_DEPTH)-1:0] write_index;
logic[$clog2(FIFO_DEPTH)-1:0] read_index;
logic count_v [FIFO_DEPTH:0];
////////////////////////////////////////////////////////
//implementation
assign fifo.data_out = lut_ram[read_index];
always_ff @ (posedge clk) begin
if (rst) begin
read_index <= '0;
write_index <= '0;
end
else begin
read_index <= read_index + fifo.pop;
write_index <= write_index + fifo.push;
end
end
assign fifo.full = count_v[FIFO_DEPTH];
assign fifo.valid = ~count_v[0];
always_ff @ (posedge clk) begin
if (fifo.push)
lut_ram[write_index] <= fifo.data_in;
end
always_ff @ (posedge clk) begin
if (rst) begin
count_v[0] <= 1;
for (int i = 1; i <= FIFO_DEPTH; i++) count_v[i] <= 0;
end
else if (fifo.push & ~fifo.pop)
count_v <= {count_v[FIFO_DEPTH-1:0], 1'b0};
else if (~fifo.push & fifo.pop)
count_v <= {1'b0, count_v[FIFO_DEPTH:1]};
end
end
end
endgenerate
endmodule

102
l2_arbiter/l2_interfaces.sv Normal file
View file

@ -0,0 +1,102 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import l2_config_and_types::*;
interface l2_requester_interface;
l2_request_t request;
logic request_push;
logic request_full;
logic [31:2] inv_addr;
logic inv_valid;
logic inv_ack;
logic con_result;
logic con_valid;
logic [31:0] wr_data;
logic wr_data_push;
logic data_full;
logic [31:0] rd_data;
logic [L2_SUB_ID_W-1:0] rd_sub_id;
logic rd_data_valid;
logic rd_data_ack;
modport requester (output request, request_push, input request_full,
input inv_addr, inv_valid, output inv_ack,
input con_result, con_valid,
output wr_data, wr_data_push, input data_full,
input rd_data, rd_sub_id, rd_data_valid, output rd_data_ack);
modport arbiter (input request, request_push, output request_full,
output inv_addr, inv_valid, input inv_ack,
output con_result, con_valid,
input wr_data, wr_data_push, output data_full,
output rd_data, rd_sub_id, rd_data_valid, input rd_data_ack);
endinterface
interface l2_memory_interface;
l2_mem_request_t request;
logic request_pop;
logic request_valid;
logic abort;
logic [31:0] wr_data;
logic wr_data_valid;
logic wr_data_read;
logic [31:0] rd_data;
logic [L2_ID_W-1:0] rd_id;
logic rd_data_valid;
modport arbiter (output request, request_valid, abort, input request_pop,
output wr_data, wr_data_valid, input wr_data_read,
input rd_data, rd_id, rd_data_valid);
modport memory (input request, request_valid, abort, output request_pop,
input wr_data, wr_data_valid, output wr_data_read,
output rd_data, rd_id, rd_data_valid);
endinterface
interface l2_fifo_interface #(parameter DATA_WIDTH = 32);
logic push;
logic pop;
logic [DATA_WIDTH-1:0] data_in;
logic [DATA_WIDTH-1:0] data_out;
logic valid;
logic full;
logic empty;
modport enqueue (input full, empty, output data_in, push);
modport dequeue (input valid, data_out, output pop);
modport structure(input push, pop, data_in, output data_out, valid, full, empty);
endinterface
interface l2_arbitration_interface;
logic [L2_NUM_PORTS-1:0] requests;
logic [$clog2(L2_NUM_PORTS)-1:0] grantee_i;
logic [L2_NUM_PORTS-1:0] grantee_v;
logic grantee_valid;
logic strobe;
modport arbiter (input requests, strobe, output grantee_i, grantee_v , grantee_valid);
modport requester (output requests, strobe, input grantee_i, grantee_v , grantee_valid);
endinterface

View file

@ -0,0 +1,73 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import l2_config_and_types::*;
module l2_reservation_logic
(
input logic clk,
input logic rst,
input logic [31:2] addr,
input logic [$clog2(L2_NUM_PORTS)-1:0] id,
input logic strobe,
input logic lr,
input logic sc,
input logic store, //includes read-modify-write AMOs
output logic abort
);
logic [31:2] reservation_address [L2_NUM_PORTS-1:0];
logic [L2_NUM_PORTS-1:0] reservation;
logic [L2_NUM_PORTS-1:0] address_match;
logic [L2_NUM_PORTS-1:0] revoke_reservation;
always_comb begin
for (int i = 0; i < L2_NUM_PORTS; i++) begin
address_match[i] = (reservation_address[i] == addr);
revoke_reservation[i] = sc | (store & address_match[i]);
end
end
always_ff @(posedge clk) begin
for (int i = 0; i < L2_NUM_PORTS; i++) begin
if (rst)
reservation[i] <= 0;
else if (strobe) begin
if (revoke_reservation[i])
reservation[i] <= 0;
else if (lr)
reservation[i] <= 1;
end
end
end
always_ff @(posedge clk) begin
if (strobe & lr)
reservation_address[id] <= addr;
end
assign abort = sc && (~reservation[id] || (reservation[id] && ~address_match[id]));
endmodule

View file

@ -0,0 +1,75 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
import l2_config_and_types::*;
module l2_round_robin
(
input logic clk,
input logic rst,
l2_arbitration_interface.arbiter arb
);
logic [$clog2(L2_NUM_PORTS)-1:0] state;
logic[$clog2(L2_NUM_PORTS)-1:0] muxes [L2_NUM_PORTS-1:0];
generate if(L2_NUM_PORTS == 1)
begin
assign arb.grantee_valid = arb.requests[0];
assign arb.grantee_v = arb.requests;
assign arb.grantee_i = 0;
end
else
begin
//Lowest priority to current state
always_ff @(posedge clk) begin
if (rst)
state <= 0;
else if (arb.strobe)
state <= arb.grantee_i;
end
//ex: state 0, highest priority to L2_NUM_PORTS-1
always_comb begin
for (int i = 0; i < L2_NUM_PORTS; i++) begin
muxes[i] = i;
for (int j = 0; j < L2_NUM_PORTS; j++) begin
if (arb.requests[(i+j) % L2_NUM_PORTS])
muxes[i] = (i+j) % L2_NUM_PORTS;
end
end
end
//Select mux output based on current state
assign arb.grantee_i = muxes[state];
//Integer to one-hot
always_comb begin
arb.grantee_v = '0;
arb.grantee_v[arb.grantee_i] = 1;
end
//any valid request
assign arb.grantee_valid = |arb.requests;
end
endgenerate
endmodule