Preliminary AMO implementation

This commit is contained in:
Florian Zaruba 2018-09-15 21:59:41 +02:00
parent 60cff9edcb
commit a3773b1364
No known key found for this signature in database
GPG key ID: E742FFE8EC38A792
14 changed files with 508 additions and 226 deletions

View file

@ -17,7 +17,7 @@ test_case ?= core_test
# QuestaSim Version
questa_version ?= ${QUESTASIM_VERSION}
# verilator version
verilator ?= ${VERILATOR_ROOT}/bin/verilator
verilator ?= verilator
# traget option
target-options ?=
# additional definess

View file

@ -186,17 +186,52 @@ package ariane_pkg;
// ----------------------
// Extract Bytes from Op
// ----------------------
// TODO: Add atomics
function automatic logic [1:0] extract_transfer_size (fu_op op);
case (op)
LD, SD: return 2'b11;
LW, LWU, SW: return 2'b10;
LD, SD,
AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD,
AMO_ANDD, AMO_ORD,
AMO_XORD, AMO_MAXD,
AMO_MAXDU, AMO_MIND,
AMO_MINDU: begin
return 2'b11;
end
LW, LWU, SW,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW,
AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW,
AMO_MAXWU, AMO_MINW,
AMO_MINWU: begin
return 2'b10;
end
LH, LHU, SH: return 2'b01;
LB, SB, LBU: return 2'b00;
default: return 2'b11;
endcase
endfunction
function automatic logic is_amo_op (fu_op op);
case (op)
AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD,
AMO_ANDD, AMO_ORD,
AMO_XORD, AMO_MAXD,
AMO_MAXDU, AMO_MIND,
AMO_MINDU,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW,
AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW,
AMO_MAXWU, AMO_MINW,
AMO_MINWU: begin
return 1'b1;
end
default: return 1'b0;
endcase
endfunction
typedef struct packed {
logic valid;
logic [63:0] vaddr;
@ -245,7 +280,8 @@ package ariane_pkg;
// Atomics
// --------------------
typedef enum logic [3:0] {
AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND, AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU
AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND,
AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU
} amo_t;
typedef struct packed {

View file

@ -127,12 +127,25 @@ package riscv;
logic [6:0] opcode;
} utype_t;
// atomic instructions
typedef struct packed {
logic [31:27] funct5;
logic aq;
logic rl;
logic [24:20] rs2;
logic [19:15] rs1;
logic [14:20] funct3;
logic [11:7] rd;
logic [6:0] opcode;
} atype_t;
typedef union packed {
logic [31:0] instr;
rtype_t rtype;
itype_t itype;
stype_t stype;
utype_t utype;
atype_t atype;
} instruction_t;
// --------------------

View file

@ -201,6 +201,10 @@ module ariane #(
// ----------------
dcache_req_i_t [2:0] dcache_req_ports_ex_cache;
dcache_req_o_t [2:0] dcache_req_ports_cache_ex;
logic amo_commit;
logic amo_valid;
logic amo_sc_succ;
logic amo_flush;
// --------------
// Frontend
@ -383,6 +387,8 @@ module ariane #(
// Commit
// ---------
commit_stage commit_stage_i (
.clk_i,
.rst_ni,
.halt_i ( halt_ctrl ),
.flush_dcache_i ( dcache_flush_ctrl_cache ),
.exception_o ( ex_commit ),
@ -395,6 +401,9 @@ module ariane #(
.waddr_o ( waddr_commit_id ),
.wdata_o ( wdata_commit_id ),
.we_o ( we_commit_id ),
.amo_commit_o ( amo_commit ),
.amo_valid_i ( amo_valid ),
.amo_sc_succ_i ( amo_sc_succ ),
.commit_lsu_o ( lsu_commit_commit_ex ),
.commit_lsu_ready_i ( lsu_commit_ready_ex_commit ),
.commit_csr_o ( csr_commit_commit_ex ),
@ -487,6 +496,7 @@ module ariane #(
.flush_tlb_o ( flush_tlb_ctrl_ex ),
.flush_dcache_o ( dcache_flush_ctrl_cache ),
.flush_dcache_ack_i ( dcache_flush_ack_cache_ctrl ),
.flush_amo_o ( amo_flush ),
.halt_csr_i ( halt_csr_ctrl ),
.halt_o ( halt_ctrl ),
@ -525,12 +535,13 @@ module ariane #(
.dcache_enable_i ( dcache_en_csr_nbdcache ),
.dcache_flush_i ( dcache_flush_ctrl_cache ),
.dcache_flush_ack_o ( dcache_flush_ack_cache_ctrl ),
// from PTW, Load Unit and Store Unit
.dcache_amo_commit_i ( 1'b0 ),
.dcache_amo_valid_o ( ),
.dcache_amo_result_o ( ),
.dcache_amo_flush_i ( 1'b0 ),
// to commit stage
.dcache_amo_commit_i ( amo_commit ),
.dcache_amo_valid_o ( amo_valid ),
.dcache_amo_sc_succ_o ( amo_sc_succ ),
.dcache_amo_flush_i ( amo_flush ),
.dcache_miss_o ( dcache_miss_cache_perf ),
// from PTW, Load Unit and Store Unit
.dcache_req_ports_i ( dcache_req_ports_ex_cache ),
.dcache_req_ports_o ( dcache_req_ports_cache_ex ),
// memory side

View file

@ -0,0 +1,56 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 15.09.2018
// Description: Combinatorial AMO unit
module amo_alu (
// AMO interface
input ariane_pkg::amo_t amo_op,
input logic [63:0] amo_operand_a,
input logic [63:0] amo_operand_b,
output logic [63:0] amo_result_o // result of atomic memory operation
);
// TODO(zarubaf) Very crude first implementation
always_comb begin
amo_result_o = '0;
case (amo_op)
ariane_pkg::AMO_SWAP: begin
amo_result_o = amo_operand_b;
end
ariane_pkg::AMO_ADD: begin
amo_result_o = $signed(amo_operand_a) + $signed(amo_operand_b);
end
ariane_pkg::AMO_AND: begin
amo_result_o = amo_operand_a & amo_operand_b;
end
ariane_pkg::AMO_OR: begin
amo_result_o = amo_operand_a & amo_operand_b;
end
ariane_pkg::AMO_XOR: begin
amo_result_o = amo_operand_a ^ amo_operand_b;
end
ariane_pkg::AMO_MAX: begin
amo_result_o = ($signed(amo_operand_a) > $signed(amo_operand_b)) ? amo_operand_a : amo_operand_b;
end
ariane_pkg::AMO_MAXU: begin
amo_result_o = (amo_operand_a > amo_operand_b) ? amo_operand_a : amo_operand_b;
end
ariane_pkg::AMO_MIN: begin
amo_result_o = ($signed(amo_operand_a) < $signed(amo_operand_b)) ? amo_operand_a : amo_operand_b;
end
ariane_pkg::AMO_MINU: begin
amo_result_o = (amo_operand_a > amo_operand_b) ? amo_operand_a : amo_operand_b;
end
default:;
endcase
end
endmodule

View file

@ -28,11 +28,14 @@ module cache_ctrl #(
input logic flush_i,
input logic bypass_i, // enable cache
output logic busy_o,
// Core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// Atomic memory operations
input logic amo_commit_i,
input logic amo_flush_i,
output logic amo_valid_o,
output logic amo_sc_succ_o, // store conditional was successful
// SRAM interface
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
@ -50,7 +53,7 @@ module cache_ctrl #(
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss
input logic [63:0] critical_word_i,
input logic critical_word_valid_i,
// bypass ports
input logic bypass_gnt_i,
input logic bypass_valid_i,
input logic [63:0] bypass_data_i,
@ -60,18 +63,18 @@ module cache_ctrl #(
input logic mshr_index_matches_i
);
// 0 IDLE
// 1 WAIT_TAG
// 2 WAIT_TAG_BYPASSED
// 3 STORE_REQ
// 4 WAIT_REFILL_VALID
// 5 WAIT_REFILL_GNT
// 6 WAIT_TAG_SAVED
// 7 WAIT_MSHR
// 8 WAIT_CRITICAL_WORD
enum logic [3:0] {
IDLE, WAIT_TAG, WAIT_TAG_BYPASSED, STORE_REQ, WAIT_REFILL_VALID, WAIT_REFILL_GNT, WAIT_TAG_SAVED, WAIT_MSHR, WAIT_CRITICAL_WORD
IDLE, // 0
WAIT_TAG, // 1
WAIT_TAG_BYPASSED, // 2
STORE_REQ, // 3
WAIT_REFILL_VALID, // 4
WAIT_REFILL_GNT, // 5
WAIT_TAG_SAVED, // 6
WAIT_MSHR, // 7
WAIT_CRITICAL_WORD, // 8
WAIT_AMO_COMMIT, // 9
STORE_AMO // 10
} state_d, state_q;
typedef struct packed {
@ -79,12 +82,25 @@ module cache_ctrl #(
logic [DCACHE_TAG_WIDTH-1:0] tag;
logic [7:0] be;
logic [1:0] size;
amo_t amo;
logic we;
logic [63:0] wdata;
logic bypass;
} mem_req_t;
logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;
// the word we loaded previously, needed as an operand for the AMOs
logic [63:0] loaded_word_d, loaded_word_q;
logic load_loaded_word;
amo_t amo_op;
logic [63:0] amo_operand_a;
logic [63:0] amo_operand_b;
logic [63:0] amo_result_o;
assign amo_operand_a = loaded_word_q;
assign amo_operand_b = mem_req_q.wdata;
assign amo_op = mem_req_q.amo;
assign busy_o = (state_q != IDLE);
@ -101,6 +117,8 @@ module cache_ctrl #(
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
end
assign loaded_word_d = req_port_o.data_rvalid;
// --------------
// Cache FSM
// --------------
@ -109,12 +127,15 @@ module cache_ctrl #(
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
// cache-line offset -> multiple of 64
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
load_loaded_word = 1'b0;
// default assignments
state_d = state_q;
mem_req_d = mem_req_q;
hit_way_d = hit_way_q;
amo_sc_succ_o = 1'b1;
amo_valid_o = 1'b0;
// output assignments
req_port_o.data_gnt = 1'b0;
req_port_o.data_rvalid = 1'b0;
@ -135,7 +156,7 @@ module cache_ctrl #(
IDLE: begin
// a new request arrived
if (req_port_i.data_req && !flush_i) begin
// request the cache line - we can do this specualtive
// request the cache line - we can do this speculatively
req_o = '1;
// save index, be and we
@ -145,6 +166,7 @@ module cache_ctrl #(
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.amo = req_port_i.amo_op;
// Bypass mode, check for uncacheable address here as well
if (bypass_i) begin
@ -172,7 +194,8 @@ module cache_ctrl #(
WAIT_TAG, WAIT_TAG_SAVED: begin
// depending on where we come from
// For the store case the tag comes in the same cycle
tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : req_port_i.address_tag;
tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag
: req_port_i.address_tag;
// we speculatively request another transfer
if (req_port_i.data_req && !flush_i) begin
req_o = '1;
@ -185,8 +208,8 @@ module cache_ctrl #(
// ------------
if (|hit_way_i) begin
// we can request another cache-line if this was a load
// make another request
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
// make another request (if that one was no AMO)
if (req_port_i.data_req && !mem_req_q.we && !flush_i && mem_req_q.amo == AMO_NONE) begin
state_d = WAIT_TAG; // switch back to WAIT_TAG
mem_req_d.index = req_port_i.address_index;
mem_req_d.be = req_port_i.data_be;
@ -194,13 +217,14 @@ module cache_ctrl #(
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.tag = req_port_i.address_tag;
mem_req_d.amo = req_port_i.amo_op;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = gnt_i;
if (!gnt_i) begin
state_d = IDLE;
end
end else begin
state_d = IDLE;
end
@ -215,12 +239,18 @@ module cache_ctrl #(
// report data for a read
if (!mem_req_q.we) begin
req_port_o.data_rvalid = 1'b1;
// else this was a store so we need an extra step to handle it
end else begin
state_d = STORE_REQ;
hit_way_d = hit_way_i;
end
// we've got a hit and an AMO was requested
if (mem_req_q.amo != AMO_NONE && !amo_flush_i) begin
state_d = WAIT_AMO_COMMIT;
// save the load we just reported
load_loaded_word = 1'b1;
end
// ------------
// MISS CASE
// ------------
@ -273,7 +303,7 @@ module cache_ctrl #(
// ~> we are here as we need a second round of memory access for a store
STORE_REQ: begin
// check if the MSHR still doesn't match
mshr_addr_o = {mem_req_d.tag, mem_req_q.index};
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// We need to re-check for MSHR aliasing here as the store requires at least
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
@ -293,7 +323,7 @@ module cache_ctrl #(
data_o.valid = 1'b1;
// got a grant ~> this is finished now
if (gnt_i) begin
if (gnt_i && mem_req_q.amo != AMO_NONE) begin
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
@ -375,7 +405,7 @@ module cache_ctrl #(
req_port_o.data_rvalid = 1'b1;
req_port_o.data_rdata = critical_word_i;
// we can make another request
if (req_port_i.data_req) begin
if (req_port_i.data_req && mem_req_q.amo == AMO_NONE) begin
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.be = req_port_i.data_be;
@ -383,7 +413,7 @@ module cache_ctrl #(
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.tag = req_port_i.address_tag;
mem_req_d.amo = req_port_i.amo_op;
state_d = IDLE;
@ -393,10 +423,16 @@ module cache_ctrl #(
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = 1'b1;
end
end else begin
state_d = IDLE;
end
// we've got a hit and an AMO was requested
if (mem_req_q.amo != AMO_NONE && !amo_flush_i) begin
state_d = WAIT_AMO_COMMIT;
// save the load we just reported
load_loaded_word = 1'b1;
end
end
end
// ~> wait until the bypass request is valid
@ -407,6 +443,35 @@ module cache_ctrl #(
req_port_o.data_rvalid = 1'b1;
state_d = IDLE;
end
// potentially this request was an AMO
if (mem_req_q.amo != AMO_NONE && !amo_flush_i) begin
state_d = WAIT_AMO_COMMIT;
// save the load we just reported
load_loaded_word = 1'b1;
end
end
// ------------------------
// Atomic Memory Operation
// ------------------------
// ~> wait until we committed the AMO
WAIT_AMO_COMMIT: begin
if (amo_commit_i) begin
state_d = STORE_AMO;
end
// AMO was flushed, go back to IDLE
if (amo_flush_i) begin
state_d = IDLE;
end
end
// non-speculative store-part of AMO
STORE_AMO: begin
// address is still saved
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = amo_result_o;
// the AMO will still be in the cache
state_d = STORE_REQ;
amo_valid_o = 1'b1;
end
endcase
@ -416,6 +481,13 @@ module cache_ctrl #(
end
end
amo_alu i_amo_alu (
.amo_op ( amo_op ),
.amo_operand_a ( amo_operand_a ),
.amo_operand_b ( amo_operand_b ),
.amo_result_o ( amo_result_o )
);
// --------------
// Registers
// --------------
@ -424,10 +496,14 @@ module cache_ctrl #(
state_q <= IDLE;
mem_req_q <= '0;
hit_way_q <= '0;
loaded_word_q <= '0;
end else begin
state_q <= state_d;
mem_req_q <= mem_req_d;
hit_way_q <= hit_way_d;
if (load_loaded_word) begin
loaded_word_q <= loaded_word_d;
end
end
end
@ -443,18 +519,3 @@ module cache_ctrl #(
`endif
`endif
endmodule
module AMO_alu (
input logic clk_i,
input logic rst_ni,
// AMO interface
input logic amo_commit_i, // commit atomic memory operation
output logic amo_valid_o, // we have a valid AMO result
output logic [63:0] amo_result_o, // result of atomic memory operation
input logic amo_flush_i // forget about AMO
);
endmodule

View file

@ -48,7 +48,7 @@ module std_cache_subsystem #(
// AMO interface (not functional yet)
input logic dcache_amo_commit_i, // commit atomic memory operation
output logic dcache_amo_valid_o, // we have a valid AMO result
output logic [63:0] dcache_amo_result_o, // result of atomic memory operation
output logic dcache_amo_sc_succ_o, // result of store conditional
input logic dcache_amo_flush_i, // forget about AMO
// Request ports
input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU
@ -93,7 +93,7 @@ module std_cache_subsystem #(
.bypass_if ( dcache_bypass_if ),
.amo_commit_i ( dcache_amo_commit_i ),
.amo_valid_o ( dcache_amo_valid_o ),
.amo_result_o ( dcache_amo_result_o ),
.amo_sc_succ_o ( dcache_amo_sc_succ_o ),
.amo_flush_i ( dcache_amo_flush_i ),
.req_ports_i ( dcache_req_ports_i ),
.req_ports_o ( dcache_req_ports_o )

View file

@ -16,7 +16,7 @@ import ariane_pkg::*;
import std_cache_pkg::*;
module std_nbdcache #(
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000
parameter logic [63:0] CACHE_START_ADDR = 64'h8000_0000
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
@ -24,12 +24,13 @@ module std_nbdcache #(
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
output logic miss_o, // we missed on a LD/ST
// AMO interface
input logic amo_commit_i, // commit atomic memory operation
output logic amo_valid_o, // we have a valid AMO result
output logic [63:0] amo_result_o, // result of atomic memory operation
input logic amo_flush_i, // forget about AMO
output logic amo_sc_succ_o, // store conditional was successful
input logic amo_flush_i, // forget about pending AMO
// Request ports
input dcache_req_i_t [2:0] req_ports_i, // request ports
output dcache_req_o_t [2:0] req_ports_o, // request ports
@ -83,6 +84,13 @@ module std_nbdcache #(
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
logic [2:0] amo_valid;
logic [2:0] amo_sc_succ;
// only one unit can produce a result
assign amo_valid_o = |amo_valid;
assign amo_sc_succ = |amo_sc_succ;
// ------------------
// Cache Controller
// ------------------
@ -92,13 +100,15 @@ module std_nbdcache #(
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_cache_ctrl (
.bypass_i ( ~enable_i ),
.busy_o ( busy [i] ),
// from core
.req_port_i ( req_ports_i [i] ),
.req_port_o ( req_ports_o [i] ),
.amo_flush_i,
.amo_commit_i,
.amo_valid_o ( amo_valid [i] ),
.amo_sc_succ_o ( amo_sc_succ [i] ),
// to SRAM array
.req_o ( req [i+1] ),
.addr_o ( addr [i+1] ),
.gnt_i ( gnt [i+1] ),
@ -118,9 +128,9 @@ module std_nbdcache #(
.bypass_valid_i ( bypass_valid [i] ),
.bypass_data_i ( bypass_data [i] ),
.mshr_addr_o ( mshr_addr [i] ), // TODO
.mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO
.mshr_index_matches_i ( mshr_index_matches[i] ), // TODO
.mshr_addr_o ( mshr_addr [i] ),
.mshr_addr_matches_i ( mshr_addr_matches [i] ),
.mshr_index_matches_i ( mshr_index_matches[i] ),
.*
);
end
@ -132,6 +142,7 @@ module std_nbdcache #(
miss_handler #(
.NR_PORTS ( 3 )
) i_miss_handler (
.flush_i ( flush_i ),
.busy_i ( |busy ),
.miss_req_i ( miss_req ),
.miss_gnt_o ( miss_gnt ),
@ -150,6 +161,8 @@ module std_nbdcache #(
.be_o ( be [0] ),
.data_o ( wdata [0] ),
.we_o ( we [0] ),
.bypass_if,
.data_if,
.*
);

View file

@ -18,6 +18,7 @@ module commit_stage #(
parameter int unsigned NR_COMMIT_PORTS = 2
)(
input logic clk_i,
input logic rst_ni,
input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller
@ -32,7 +33,10 @@ module commit_stage #(
output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address
output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data
output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable
// Atomic memory operations
output logic amo_commit_o,
input logic amo_valid_i,
input logic amo_sc_succ_i,
// to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
output logic [63:0] pc_o,
// to/from CSR file
@ -49,9 +53,14 @@ module commit_stage #(
output logic fence_o, // flush D$ and pipeline
output logic sfence_vma_o // flush TLBs and pipeline
);
// we need to wait for AMOS
logic comitting_amo_q, comitting_amo_d;
logic [4:0] amo_reg_addr_q, amo_reg_addr_d;
assign waddr_o[0] = commit_instr_i[0].rd[4:0];
assign waddr_o[1] = commit_instr_i[1].rd[4:0];
logic [NR_COMMIT_PORTS-1:0] is_amo;
for (genvar i = 0; i < NR_COMMIT_PORTS; i++) begin
assign is_amo[i] = is_amo_op(commit_instr_i[i].op);
end
assign pc_o = commit_instr_i[0].pc;
@ -69,7 +78,7 @@ module commit_stage #(
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
wdata_o[0] = commit_instr_i[0].result;
wdata_o[0] = (amo_valid_i) ? commit_instr_i[0].result : amo_sc_succ_i;
wdata_o[1] = commit_instr_i[1].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = 64'b0;
@ -80,7 +89,9 @@ module commit_stage #(
// we will not commit the instruction if we took an exception
// and we do not commit the instruction if we requested a halt
// furthermore if the debugger is requesting to debug do not commit this instruction if we are not yet in debug mode
if (commit_instr_i[0].valid && !halt_i && (!debug_req_i || debug_mode_i)) begin
// also check that there is no atomic memory operation committing, right now this is the only operation
// which will take longer than one cycle to commit
if (commit_instr_i[0].valid && !halt_i && (!debug_req_i || debug_mode_i) && !comitting_amo_q) begin
commit_ack_o[0] = 1'b1;
// register will be the all zero register.
@ -104,6 +115,15 @@ module commit_stage #(
end
end
// check whether instruction 0 is an AMO
if (is_amo[0]) begin
// do not write the instruction now
we_o[0] = 1'b0;
// we are committing an AMO, wait for the result
amo_commit_o = 1'b1;
// save the write address 0
amo_reg_addr_d = waddr_o[0];
end
// ---------
// CSR Logic
// ---------
@ -145,18 +165,46 @@ module commit_stage #(
end
end
// -----------------
// Commit Port 2
// -----------------
// check if the second instruction can be committed as well and the first wasn't a CSR instruction
// also if we are in single step mode don't retire the second instruction
if (commit_ack_o[0] && commit_instr_i[1].valid && !halt_i && !(commit_instr_i[0].fu inside {CSR}) && !flush_dcache_i && !single_step_i) begin
if (commit_ack_o[0] && commit_instr_i[1].valid
&& !halt_i
&& !(commit_instr_i[0].fu inside {CSR})
&& !flush_dcache_i
&& !single_step_i
&& !is_amo[0] // we are only retiring AMOs on port 0
&& !is_amo[1]) begin
// only if the first instruction didn't throw an exception and this instruction won't throw an exception
// and the operator is of type ALU, LOAD, CTRL_FLOW, MULT
if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin
if (!exception_o.valid && !commit_instr_i[1].ex.valid
&& (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin
we_o[1] = 1'b1;
commit_ack_o[1] = 1'b1;
end
end
end
// AMO Commit logic
always_comb begin
comitting_amo_d = comitting_amo_q;
waddr_o[0] = commit_instr_i[0].rd[4:0];
waddr_o[1] = commit_instr_i[1].rd[4:0];
// set the mutex and wait for a valid answer
if (amo_commit_o) begin
comitting_amo_d = 1'b1;
end
// reset the mutex
if (amo_valid_i) begin
comitting_amo_d = 1'b0;
waddr_o[0] = amo_reg_addr_q;
end
end
// -----------------------------
// Exception & Interrupt Logic
// -----------------------------
@ -205,4 +253,14 @@ module commit_stage #(
exception_o.valid = 1'b0;
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
comitting_amo_q <= 1'b0;
amo_reg_addr_q <= '0;
end else begin
comitting_amo_q <= comitting_amo_d;
amo_reg_addr_q <= amo_reg_addr_d;
end
end
endmodule

View file

@ -26,6 +26,7 @@ module controller (
output logic flush_dcache_o, // Flush DCache
input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush
output logic flush_tlb_o, // Flush TLBs
output logic flush_amo_o, // Flush all pending AMOs
input logic halt_csr_i, // Halt request from CSR (WFI instruction)
output logic halt_o, // Halt signal to commit stage
@ -56,6 +57,7 @@ module controller (
flush_tlb_o = 1'b0;
flush_dcache = 1'b0;
flush_icache_o = 1'b0;
flush_amo_o = 1'b0;
// ------------
// Mis-predict
// ------------
@ -77,6 +79,7 @@ module controller (
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_amo_o = 1'b1;
flush_dcache = 1'b1;
fence_active_d = 1'b1;
@ -92,6 +95,7 @@ module controller (
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_icache_o = 1'b1;
flush_amo_o = 1'b1;
flush_dcache = 1'b1;
fence_active_d = 1'b1;
@ -114,6 +118,8 @@ module controller (
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_amo_o = 1'b1;
flush_tlb_o = 1'b1;
end
@ -125,6 +131,8 @@ module controller (
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_amo_o = 1'b1;
flush_ex_o = 1'b1;
end
@ -140,6 +148,7 @@ module controller (
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_amo_o = 1'b1;
end
end

View file

@ -390,12 +390,13 @@ module decoder (
endcase
end
`ifdef ENABLE_ATOMICS
riscv::OpcodeAmo: begin
// we are going to use the load unit for AMOs
instruction_o.fu = LOAD;
instruction_o.rd[4:0] = instr.stype.imm0;
instruction_o.rs1[4:0] = instr.itype.rs1;
instruction_o.rs1[4:0] = instr.atype.rs2;
instruction_o.rs2[4:0] = instr.atype.rs2;
instruction_o.rd[4:0] = instr.atype.rd;
// TODO(zarubaf): Ordering
// words
if (instr.stype.funct3 == 3'h2) begin
unique case (instr.instr[31:27])
@ -432,7 +433,6 @@ module decoder (
illegal_instr = 1'b1;
end
end
`endif
// --------------------------------
// Control Flow Instructions

View file

@ -41,7 +41,9 @@ module load_unit (
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o
);
enum logic [2:0] {IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH} NS, CS;
enum logic [2:0] { IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET,
ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH
} state_d, state_q;
// in order to decouple the response interface from the request interface we need a
// a queue which can hold all outstanding memory requests
struct packed {
@ -56,7 +58,8 @@ module load_unit (
assign vaddr_o = lsu_ctrl_i.vaddr;
// this is a read-only interface so set the write enable to 0
assign req_port_o.data_we = 1'b0;
assign req_port_o.data_wdata = '0;
// we need operand b for the AMOs
assign req_port_o.data_wdata = lsu_ctrl_i.data;
// compose the queue data, control is handled in the FSM
assign in_data = {lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.operator};
// output address
@ -72,7 +75,7 @@ module load_unit (
// ---------------
always_comb begin : load_control
// default assignments
NS = CS;
state_d = state_q;
load_data_d = load_data_q;
translation_req_o = 1'b0;
req_port_o.data_req = 1'b0;
@ -83,7 +86,7 @@ module load_unit (
req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operator);
pop_ld_o = 1'b0;
case (CS)
case (state_q)
IDLE: begin
// we've got a new load request
if (valid_i) begin
@ -96,18 +99,18 @@ module load_unit (
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin
NS = WAIT_GNT;
state_d = WAIT_GNT;
end else begin
if (dtlb_hit_i) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
NS = SEND_TAG;
state_d = SEND_TAG;
pop_ld_o = 1'b1;
end else
NS = ABORT_TRANSACTION;
state_d = ABORT_TRANSACTION;
end
end else begin
// wait for the store buffer to train and the page offset to not match anymore
NS = WAIT_PAGE_OFFSET;
state_d = WAIT_PAGE_OFFSET;
end
end
end
@ -116,7 +119,7 @@ module load_unit (
WAIT_PAGE_OFFSET: begin
// we make a new request as soon as the page offset does not match anymore
if (!page_offset_matches_i) begin
NS = WAIT_GNT;
state_d = WAIT_GNT;
end
end
@ -127,14 +130,14 @@ module load_unit (
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// redo the request by going back to the wait gnt state
NS = WAIT_TRANSLATION;
state_d = WAIT_TRANSLATION;
end
WAIT_TRANSLATION: begin
translation_req_o = 1'b1;
// we've got a hit and we can continue with the request process
if (dtlb_hit_i)
NS = WAIT_GNT;
state_d = WAIT_GNT;
end
WAIT_GNT: begin
@ -146,17 +149,17 @@ module load_unit (
if (req_port_i.data_gnt) begin
// so we send the tag in the next cycle
if (dtlb_hit_i) begin
NS = SEND_TAG;
state_d = SEND_TAG;
pop_ld_o = 1'b1;
end else // should we not have hit on the TLB abort this transaction an retry later
NS = ABORT_TRANSACTION;
state_d = ABORT_TRANSACTION;
end
// otherwise we keep waiting on our grant
end
// we know for sure that the tag we want to send is valid
SEND_TAG: begin
req_port_o.tag_valid = 1'b1;
NS = IDLE;
state_d = IDLE;
// we can make a new request here if we got one
if (valid_i) begin
// start the translation process even though we do not know if the addresses match
@ -168,19 +171,19 @@ module load_unit (
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin
NS = WAIT_GNT;
state_d = WAIT_GNT;
end else begin
// we got a grant so we can send the tag in the next cycle
if (dtlb_hit_i) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
NS = SEND_TAG;
state_d = SEND_TAG;
pop_ld_o = 1'b1;
end else // we missed on the TLB -> wait for the translation
NS = ABORT_TRANSACTION;
state_d = ABORT_TRANSACTION;
end
end else begin
// wait for the store buffer to train and the page offset to not match anymore
NS = WAIT_PAGE_OFFSET;
state_d = WAIT_PAGE_OFFSET;
end
end
// ----------
@ -198,7 +201,7 @@ module load_unit (
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// we've killed the current request so we can go back to idle
NS = IDLE;
state_d = IDLE;
end
endcase
@ -206,8 +209,8 @@ module load_unit (
// we got an exception
if (ex_i.valid && valid_i) begin
// the next state will be the idle state
NS = IDLE;
// pop load - but only if we are not getting an rvalid in here - otherwise we will over-wright an incoming transaction
state_d = IDLE;
// pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction
if (!req_port_i.data_rvalid)
pop_ld_o = 1'b1;
end
@ -219,7 +222,7 @@ module load_unit (
// if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage
if (flush_i) begin
NS = WAIT_FLUSH;
state_d = WAIT_FLUSH;
end
end
@ -232,7 +235,7 @@ module load_unit (
// output the queue data directly, the valid signal is set corresponding to the process above
trans_id_o = load_data_q.trans_id;
// we got an rvalid and are currently not flushing and not aborting the request
if (req_port_i.data_rvalid && CS != WAIT_FLUSH) begin
if (req_port_i.data_rvalid && state_q != WAIT_FLUSH) begin
// we killed the request
if(!req_port_o.kill_req)
valid_o = 1'b1;
@ -249,7 +252,7 @@ module load_unit (
valid_o = 1'b1;
trans_id_o = lsu_ctrl_i.trans_id;
// if we are waiting for the translation to finish do not give a valid signal yet
end else if (CS == WAIT_TRANSLATION) begin
end else if (state_q == WAIT_TRANSLATION) begin
valid_o = 1'b0;
end
@ -259,10 +262,10 @@ module load_unit (
// latch physical address for the tag cycle (one cycle after applying the index)
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
CS <= IDLE;
state_q <= IDLE;
load_data_q <= '0;
end else begin
CS <= NS;
state_q <= state_d;
load_data_q <= load_data_d;
end
end
@ -272,31 +275,21 @@ module load_unit (
// ---------------
always_comb begin : amo_op_select
req_port_o.amo_op = AMO_NONE;
// map the operators to BUS AMOS the downstream circuit understands
// e.g.: remove the size field as this will be encoded in another signal
if (lsu_ctrl_i.valid) begin
case (lsu_ctrl_i.operator)
AMO_LRW: req_port_o.amo_op = AMO_LR;
AMO_LRD: req_port_o.amo_op = AMO_LR;
AMO_SCW: req_port_o.amo_op = AMO_SC;
AMO_SCD: req_port_o.amo_op = AMO_SC;
AMO_SWAPW: req_port_o.amo_op = AMO_SWAP;
AMO_ADDW: req_port_o.amo_op = AMO_ADD;
AMO_ANDW: req_port_o.amo_op = AMO_AND;
AMO_ORW: req_port_o.amo_op = AMO_OR;
AMO_XORW: req_port_o.amo_op = AMO_XOR;
AMO_MAXW: req_port_o.amo_op = AMO_MAX;
AMO_MAXWU: req_port_o.amo_op = AMO_MAXU;
AMO_MINW: req_port_o.amo_op = AMO_MIN;
AMO_MINWU: req_port_o.amo_op = AMO_MINU;
AMO_SWAPD: req_port_o.amo_op = AMO_SWAP;
AMO_ADDD: req_port_o.amo_op = AMO_ADD;
AMO_ANDD: req_port_o.amo_op = AMO_AND;
AMO_ORD: req_port_o.amo_op = AMO_OR;
AMO_XORD: req_port_o.amo_op = AMO_XOR;
AMO_MAXD: req_port_o.amo_op = AMO_MAX;
AMO_MAXDU: req_port_o.amo_op = AMO_MAXU;
AMO_MIND: req_port_o.amo_op = AMO_MIN;
AMO_MINDU: req_port_o.amo_op = AMO_MINU;
AMO_LRW, AMO_LRD: req_port_o.amo_op = AMO_LR;
AMO_SCW, AMO_SCD: req_port_o.amo_op = AMO_SC;
AMO_SWAPW, AMO_SWAPD: req_port_o.amo_op = AMO_SWAP;
AMO_ADDW, AMO_ADDD: req_port_o.amo_op = AMO_ADD;
AMO_ANDW, AMO_ANDD: req_port_o.amo_op = AMO_AND;
AMO_ORW, AMO_ORD: req_port_o.amo_op = AMO_OR;
AMO_XORW, AMO_XORD: req_port_o.amo_op = AMO_XOR;
AMO_MAXW, AMO_MAXD: req_port_o.amo_op = AMO_MAX;
AMO_MAXWU, AMO_MAXDU: req_port_o.amo_op = AMO_MAXU;
AMO_MINW, AMO_MIND: req_port_o.amo_op = AMO_MIN;
AMO_MINWU, AMO_MINDU: req_port_o.amo_op = AMO_MINU;
default: req_port_o.amo_op = AMO_NONE;
endcase
end
@ -305,7 +298,6 @@ module load_unit (
// ---------------
// Sign Extend
// ---------------
logic [63:0] shifted_data;
// realign as needed
@ -332,7 +324,8 @@ module load_unit (
// prepare these signals for faster selection in the next cycle
assign signed_d = load_data_d.operator inside {LW, LH, LB};
// all AMOs are sign extended
assign signed_d = load_data_d.operator inside {LW, LH, LB} | is_amo_op(load_data_d.operator);
assign fp_sign_d = 1'b0;
assign idx_d = (load_data_d.operator inside {LW}) ? load_data_d.address_offset + 3 :
(load_data_d.operator inside {LH}) ? load_data_d.address_offset + 1 :
@ -362,7 +355,15 @@ module load_unit (
// result mux
always_comb begin
unique case (load_data_q.operator)
LW, LWU: result_o = {{32{sign_bit}}, shifted_data[31:0]};
LW, LWU,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW,
AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW,
AMO_MAXWU, AMO_MINW,
AMO_MINWU: begin
result_o = {{32{sign_bit}}, shifted_data[31:0]};
end
LH, LHU: result_o = {{48{sign_bit}}, shifted_data[15:0]};
LB, LBU: result_o = {{56{sign_bit}}, shifted_data[7:0]};
default: result_o = shifted_data;

View file

@ -73,6 +73,7 @@ module lsu #(
logic pop_st;
logic pop_ld;
// ------------------------------
// Address Generation Unit (AGU)
// ------------------------------
// virtual address as calculated by the AGU in the first cycle
@ -147,7 +148,7 @@ module lsu #(
.lsu_paddr_o ( mmu_paddr ),
.lsu_exception_o ( mmu_exception ),
.lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request
// connecting PTW to D$ IF (aka mem arbiter
// connecting PTW to D$ IF
.req_port_i ( dcache_req_ports_i [0] ),
.req_port_o ( dcache_req_ports_o [0] ),
// icache address translation requests
@ -270,9 +271,18 @@ module lsu #(
// 12 bit are the same anyway
// and we can always generate the byte enable from the address at hand
case (operator_i)
LD, SD: // double word
LD, SD,
AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD,
AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND,
AMO_MINDU: begin // double word
be_i = 8'b1111_1111;
LW, LWU, SW: // word
end
LW, LWU, SW,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW,
AMO_MINWU: begin// word
case (vaddr_i[2:0])
3'b000: be_i = 8'b0000_1111;
3'b001: be_i = 8'b0001_1110;
@ -281,7 +291,8 @@ module lsu #(
3'b100: be_i = 8'b1111_0000;
default:;
endcase
LH, LHU, SH: // half word
end
LH, LHU, SH: begin // half word
case (vaddr_i[2:0])
3'b000: be_i = 8'b0000_0011;
3'b001: be_i = 8'b0000_0110;
@ -292,7 +303,8 @@ module lsu #(
3'b110: be_i = 8'b1100_0000;
default:;
endcase
LB, LBU, SB: // byte
end
LB, LBU, SB: begin // byte
case (vaddr_i[2:0])
3'b000: be_i = 8'b0000_0001;
3'b001: be_i = 8'b0000_0010;
@ -303,8 +315,10 @@ module lsu #(
3'b110: be_i = 8'b0100_0000;
3'b111: be_i = 8'b1000_0000;
endcase
default:
end
default: begin
be_i = 8'b0;
end
endcase
end
@ -327,21 +341,31 @@ module lsu #(
if (lsu_ctrl.valid) begin
case (lsu_ctrl.operator)
// double word
LD, SD: begin
if (lsu_ctrl.vaddr[2:0] != 3'b000)
LD, SD,
AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD,
AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND,
AMO_MINDU: begin
if (lsu_ctrl.vaddr[2:0] != 3'b000) begin
data_misaligned = 1'b1;
end
end
// word
LW, LWU, SW: begin
if (lsu_ctrl.vaddr[1:0] != 2'b00)
LW, LWU, SW,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW,
AMO_MINWU: begin
if (lsu_ctrl.vaddr[1:0] != 2'b00) begin
data_misaligned = 1'b1;
end
end
// half word
LH, LHU, SH: begin
if (lsu_ctrl.vaddr[0] != 1'b0)
if (lsu_ctrl.vaddr[0] != 1'b0) begin
data_misaligned = 1'b1;
end
end
// byte -> is always aligned
default:;
endcase