diff --git a/CHANGELOG.md b/CHANGELOG.md index f131bc769..483e014ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added + +- Commit log feature +- Support for A-Extension + ### 3.0.0 ### Added diff --git a/Makefile b/Makefile index 50e28a52b..e498b69ba 100755 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ test_case ?= core_test # QuestaSim Version questa_version ?= ${QUESTASIM_VERSION} # verilator version -verilator ?= ${VERILATOR_ROOT}/bin/verilator +verilator ?= verilator # traget option target-options ?= # additional definess @@ -134,10 +134,10 @@ $(dpi-library)/ariane_dpi.so: $(dpi) sim: build - vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ - +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ - $(QUESTASIM_FLAGS) \ - -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " log -r /*; run -all; exit" \ + vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ + +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " log -r /*; run -all; exit" \ ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options) simc: build @@ -212,9 +212,9 @@ $(addsuffix -verilator,$(riscv-asm-tests)): verilate run-asm-tests-verilator: $(addsuffix -verilator, $(riscv-asm-tests)) # split into two halfs for travis jobs (otherwise they will time out) -run-asm-tests1-verilator: $(addsuffix -verilator, $(filter rv64ui-p-% ,$(riscv-asm-tests))) +run-asm-tests1-verilator: $(addsuffix -verilator, $(filter rv64ui-v-% ,$(riscv-asm-tests))) -run-asm-tests2-verilator: $(addsuffix -verilator, $(filter-out rv64ui-p-% ,$(riscv-asm-tests))) +run-asm-tests2-verilator: $(addsuffix -verilator, $(filter-out rv64ui-v-% ,$(riscv-asm-tests))) $(addsuffix -verilator,$(riscv-benchmarks)): verilate diff --git a/README.md b/README.md index 41aa7d885..72ec74dc7 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Ariane RISC-V CPU -Ariane is a 6-stage, single issue, in-order CPU which implements the 64-bit RISC-V instruction set. It fully implements I, M and C extensions as specified in Volume I: User-Level ISA V 2.1 as well as the draft privilege extension 1.10. It implements three privilege levels M, S, U to fully support a Unix-like operating system. Furthermore it is compliant to the draft external debug spec 0.13. +Ariane is a 6-stage, single issue, in-order CPU which implements the 64-bit RISC-V instruction set. It fully implements I, M, A and C extensions as specified in Volume I: User-Level ISA V 2.3 as well as the draft privilege extension 1.10. It implements three privilege levels M, S, U to fully support a Unix-like operating system. Furthermore it is compliant to the draft external debug spec 0.13. It has configurable size, separate TLBs, a hardware PTW and branch-prediction (branch target buffer and branch history table). The primary design goal was on reducing critical path length. @@ -44,29 +44,39 @@ Both, the Verilator model as well as the Questa simulation will produce trace lo $ spike-dasm < trace_core_00_0.dasm > logfile.txt ``` -### Running Applications +### Running User-Space Applications -It is possible to run user-space binaries on Ariane with `riscv-pk` ([link](https://github.com/riscv/riscv-pk)). As Ariane currently does not support atomics and floating point extensions make sure that you configure `riscv-pk` with: -`--with-arch=rv64imc`. In particular inside the `riscv-pk` directory do: +It is possible to run user-space binaries on Ariane with `riscv-pk` ([link](https://github.com/riscv/riscv-pk)). ``` $ mkdir build $ cd build -$ ../configure --prefix=$RISCV --host=riscv64-unknown-elf --with-arch=rv64imc +$ ../configure --prefix=$RISCV --host=riscv64-unknown-elf $ make $ make install ``` Then to run a RISC-V ELF using the Verilator model do: +``` +$ echo ' +#include + +int main(int argc, char const *argv[]) { + printf("Hello Ariane!\\n"); + return 0; +}' > hello.c +$ riscv64-unknown-elf-gcc hello.c -o hello.elf +``` + ``` $ make verilate -$ work-ver/Variane_testharness /path/to/pk path/to/riscv.elf +$ work-ver/Variane_testharness $RISCV/riscv64-unknown-elf/bin/pk hello.elf ``` If you want to use QuestaSim to run it you can use the following command: ``` -$ make simc riscv-test=/path/to/pk target-options=path/to/riscv.elf +$ make simc riscv-test-dir=$RISCV/riscv64-unknown-elf/bin riscv-test=pk target-options=hello.elf ``` > Be patient! RTL simulation is way slower than Spike. If you think that you ran into problems you can inspect the trace files. @@ -77,7 +87,7 @@ Coming. ## Planned Improvements -While developing Ariane it has become evident that, in order to support Linux, the atomic extension is going to be mandatory. While the core is currently booting Linux by emulating Atomics in BBL (in a single core environment this is trivially met by disabling interrupts) this is not the behavior which is intended. For that reason we are going to fully support all atomic extensions in the very near future. +> Atomics are implemented for a single core environment. They will semantically fail in a multi-core setup. ## Going Beyond @@ -92,27 +102,44 @@ If you call `simc` instead of `sim` it will run without the GUI. QuestaSim uses ### CI Testsuites and Randomized Constrained Testing with Torture -We provide two CI configuration files for Travis CI and GitLab CI that run the RISCV assembly tests, the RISCV benchmarks and a randomized RISCV Torture test. The difference between the two is that Travis CI runs these tests only on Verilator, whereas GitLab CI runs the same tests on QuestaSim and Verilator. +We provide two CI configuration files for Travis CI and GitLab CI that run the RISCV assembly tests, the RISCV benchmarks and a randomized RISCV Torture test. The difference between the two is that Travis CI runs these tests only on Verilator, whereas GitLab CI runs the same tests on QuestaSim and Verilator. -If you would like to run the CI test suites locally on your machine, follow any of the two scripts `ci.travis-ci-emul.sh` and `ci.travis-ci-emul.sh` (depending on whether you have QuestaSim or not). In particular, you have to get the required packages for your system, the paths in `ci/path-setup.sh` to match your setup, and run the installation and build scripts prior to running any of the tests suites. +If you would like to run the CI test suites locally on your machine, follow any of the two scripts `ci/travis-ci-emul.sh` and `ci/travis-ci-emul.sh` (depending on whether you have QuestaSim or not). In particular, you have to get the required packages for your system, the paths in `ci/path-setup.sh` to match your setup, and run the installation and build scripts prior to running any of the tests suites. Once everything is set up and installed, you can run the tests suites as follows (using Verilator): ``` -$ make verilate -$ make run-asm-tests-verilator -$ make run-benchmarks-verilator +$ make verilate +$ make run-asm-tests-verilator +$ make run-benchmarks-verilator ``` In order to run randomized Torture tests, you first have to generate the randomized program prior to running the simulation: ``` +$ ./ci/get-torture.sh $ make torture-gen $ make torture-rtest-verilator +``` +This runs the randomized program on Spike and on the RTL target, and checks whether the two signatures match. The random instruction mix can be configured in the `./tmp/riscv-torture/config/default.config` file. + +Ariane can dump a trace-log in Questa which can be easily diffed against Spike with commit log enabled. In `include/ariane_pkg.sv` set: + +```verilog +localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1; +``` +This will dump a file called `trace_core_*_*_commit.log`. + +This can be helpful for debugging long traces (e.g.: torture traces). To compile Spike with the commit log feature do: ``` -This runs the randomized program on Spike and on the RTL target, and checks whether the two signatures match. The random instruction mix can be configured in the `./tmp/riscv-torture/config/default.config` file. - +$ apt-get install device-tree-compiler +$ mkdir build +$ cd build +$ ../configure --prefix=$RISCV --with-fesvr=$RISCV --enable-commitlog +$ make +$ [sudo] make install +``` # Contributing diff --git a/bootrom/bootrom.sv b/bootrom/bootrom.sv index 52efe8251..8add90eaf 100644 --- a/bootrom/bootrom.sv +++ b/bootrom/bootrom.sv @@ -20,15 +20,15 @@ module bootrom ( input logic [63:0] addr_i, output logic [63:0] rdata_o ); - localparam int RomSize = 141; + localparam int RomSize = 143; const logic [RomSize-1:0][63:0] mem = { - 64'h0064, - 64'h65646e65_7478652d, - 64'h73747075_72726574, - 64'h6e690073_65676e61, - 64'h7200656c_646e6168, - 64'h70007265_6c6c6f72, + 64'h00646564_6e657478, + 64'h652d7374_70757272, + 64'h65746e69_00736567, + 64'h6e617200_656c646e, + 64'h6168702c_78756e69, + 64'h6c007265_6c6c6f72, 64'h746e6f63_2d747075, 64'h72726574_6e690073, 64'h6c6c6563_2d747075, @@ -60,7 +60,7 @@ module bootrom ( 64'h4b000000_10000000, 64'h03000000_07000000, 64'h01000000_03000000, - 64'h01000000_ae000000, + 64'h01000000_b4000000, 64'h10000000_03000000, 64'h00000000_30746e69, 64'h6c632c76_63736972, @@ -68,7 +68,7 @@ module bootrom ( 64'h03000000_00000030, 64'h30303030_30324074, 64'h6e696c63_01000000, - 64'ha7000000_00000000, + 64'had000000_00000000, 64'h03000000_00007375, 64'h622d656c_706d6973, 64'h00636f73_2d657261, @@ -91,6 +91,8 @@ module bootrom ( 64'h6f6d656d_01000000, 64'h02000000_02000000, 64'h02000000_01000000, + 64'ha5000000_04000000, + 64'h03000000_01000000, 64'h9f000000_04000000, 64'h03000000_00006374, 64'h6e692d75_70632c76, @@ -143,11 +145,11 @@ module bootrom ( 64'h00000000_01000000, 64'h00000000_00000000, 64'h00000000_00000000, - 64'he8020000_c2000000, + 64'hf8020000_c8000000, 64'h00000000_10000000, 64'h11000000_28000000, - 64'h20030000_38000000, - 64'he2030000_edfe0dd0, + 64'h30030000_38000000, + 64'hf8030000_edfe0dd0, 64'h00000000_00000000, 64'h00000000_00000000, 64'h00000000_00000000, diff --git a/ci/default.config b/ci/default.config index 71a0b3766..f405d4d8e 100644 --- a/ci/default.config +++ b/ci/default.config @@ -1,7 +1,7 @@ torture.generator.nseqs 1000 torture.generator.memsize 1024 torture.generator.fprnd 0 -torture.generator.amo false +torture.generator.amo true torture.generator.mul true torture.generator.divider true torture.generator.segment true diff --git a/ci/riscv-asm-tests.list b/ci/riscv-asm-tests.list index 3ecc8933c..01e12844b 100644 --- a/ci/riscv-asm-tests.list +++ b/ci/riscv-asm-tests.list @@ -127,3 +127,41 @@ rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw +rv64ua-p-amoadd_d +rv64ua-p-amoadd_w +rv64ua-p-amoor_d +rv64ua-p-amoor_w +rv64ua-p-amoand_d +rv64ua-p-amoand_w +rv64ua-p-amoswap_d +rv64ua-p-amoswap_w +rv64ua-p-amoxor_d +rv64ua-p-amoxor_w +rv64ua-p-amomax_d +rv64ua-p-amomaxu_d +rv64ua-p-amomaxu_w +rv64ua-p-amomax_w +rv64ua-p-amomin_d +rv64ua-p-amomin_w +rv64ua-p-amominu_d +rv64ua-p-amominu_w +rv64ua-p-lrsc +rv64ua-v-amoadd_d +rv64ua-v-amoadd_w +rv64ua-v-amoor_d +rv64ua-v-amoor_w +rv64ua-v-amoand_d +rv64ua-v-amoand_w +rv64ua-v-amoswap_d +rv64ua-v-amoswap_w +rv64ua-v-amoxor_d +rv64ua-v-amoxor_w +rv64ua-v-amomax_d +rv64ua-v-amomaxu_d +rv64ua-v-amomaxu_w +rv64ua-v-amomax_w +rv64ua-v-amomin_d +rv64ua-v-amomin_w +rv64ua-v-amominu_d +rv64ua-v-amominu_w +rv64ua-v-lrsc \ No newline at end of file diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 6b0076dac..b920db551 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -32,7 +32,9 @@ package ariane_pkg; localparam BITS_SATURATION_COUNTER = 2; localparam NR_COMMIT_PORTS = 2; - localparam logic [63:0] ISA_CODE = (1 << 2) // C - Compressed extension + localparam logic [63:0] ISA_CODE = + | (1 << 0) // A - Atomic extension + | (1 << 2) // C - Compressed extension | (1 << 8) // I - RV32I/64I/128I base ISA | (1 << 12) // M - Integer Multiply/Divide extension | (0 << 13) // N - User level interrupts supported @@ -55,10 +57,16 @@ package ariane_pkg; dataaddr: dm::DataAddr }; + + // enables a commit log which matches spikes commit log format for easier trace comparison + localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b0; + + // ------------- Dangerouse ------------- // if set to zero a flush will not invalidate the cache-lines, in a single core environment // where coherence is not necessary this can improve performance. This needs to be switched on // when more than one core is in a system - localparam logic INVALIDATE_ON_FLUSH = 1'b0; + localparam logic INVALIDATE_ON_FLUSH = 1'b1; + // --------------- // Fetch Stage // --------------- @@ -138,7 +146,13 @@ package ariane_pkg; } bht_prediction_t; typedef enum logic[3:0] { - NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR + NONE, // 0 + LOAD, // 1 + STORE, // 2 + ALU, // 3 + CTRL_FLOW, // 4 + MULT, // 5 + CSR // 6 } fu_t; localparam EXC_OFF_RST = 8'h80; @@ -148,10 +162,10 @@ package ariane_pkg; // --------------- // I$ - parameter int unsigned ICACHE_INDEX_WIDTH = 12; // in bit - parameter int unsigned ICACHE_TAG_WIDTH = 44; // in bit - parameter int unsigned ICACHE_SET_ASSOC = 4; - parameter int unsigned ICACHE_LINE_WIDTH = 128; // in bit + localparam int unsigned ICACHE_INDEX_WIDTH = 12; // in bit + localparam int unsigned ICACHE_TAG_WIDTH = 44; // in bit + localparam int unsigned ICACHE_SET_ASSOC = 4; + localparam int unsigned ICACHE_LINE_WIDTH = 128; // in bit // D$ localparam int unsigned DCACHE_INDEX_WIDTH = 12; @@ -188,17 +202,12 @@ package ariane_pkg; DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW } fu_op; - // ---------------------- - // Extract Bytes from Op - // ---------------------- - // TODO: Add atomics - function automatic logic [1:0] extract_transfer_size (fu_op op); - case (op) - LD, SD: return 2'b11; - LW, LWU, SW: return 2'b10; - LH, LHU, SH: return 2'b01; - LB, SB, LBU: return 2'b00; - default: return 2'b11; + function automatic logic is_amo (fu_op op); + case (op) inside + [AMO_LRW:AMO_MINDU]: begin + return 1'b1; + end + default: return 1'b0; endcase endfunction @@ -250,7 +259,8 @@ package ariane_pkg; // Atomics // -------------------- typedef enum logic [3:0] { - AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND, AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU + AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND, + AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU } amo_t; typedef struct packed { @@ -271,7 +281,6 @@ package ariane_pkg; // ---------------------- // cache request ports // ---------------------- - // I$ address translation requests typedef struct packed { logic fetch_valid; // address translation valid @@ -300,6 +309,24 @@ package ariane_pkg; exception_t ex; // we've encountered an exception } icache_dreq_o_t; + // AMO request going to cache. this request is unconditionally valid as soon + // as request goes high. + // Furthermore, those signals are kept stable until the response indicates + // completion by asserting ack. + typedef struct packed { + logic req; // this request is valid + amo_t amo_op; // atomic memory operation to perform + logic [1:0] size; // 2'b10 --> word operation, 2'b11 --> double word operation + logic [63:0] operand_a; // address + logic [63:0] operand_b; // data as layuoted in the register + } amo_req_t; + + // AMO response coming from cache. + typedef struct packed { + logic ack; // response is valid + logic [63:0] result; // sign-extended, result + } amo_resp_t; + // D$ data requests typedef struct packed { logic [DCACHE_INDEX_WIDTH-1:0] address_index; @@ -311,7 +338,6 @@ package ariane_pkg; logic [1:0] data_size; logic kill_req; logic tag_valid; - amo_t amo_op; } dcache_req_i_t; typedef struct packed { @@ -342,4 +368,92 @@ package ariane_pkg; return { {51 {instruction_i[31]}}, instruction_i[31], instruction_i[7], instruction_i[30:25], instruction_i[11:8], 1'b0 }; endfunction + // ---------------------- + // LSU Functions + // ---------------------- + // align data to address e.g.: shift data to be naturally 64 + function automatic logic [63:0] data_align (logic [2:0] addr, logic [63:0] data); + case (addr) + 3'b000: return data; + 3'b001: return {data[55:0], data[63:56]}; + 3'b010: return {data[47:0], data[63:48]}; + 3'b011: return {data[39:0], data[63:40]}; + 3'b100: return {data[31:0], data[63:32]}; + 3'b101: return {data[23:0], data[63:24]}; + 3'b110: return {data[15:0], data[63:16]}; + 3'b111: return {data[7:0], data[63:8]}; + endcase + return data; + endfunction + + // generate byte enable mask + function automatic logic [7:0] be_gen(logic [2:0] addr, logic [1:0] size); + case (size) + 2'b11: begin + return 8'b1111_1111; + end + 2'b10: begin + case (addr[2:0]) + 3'b000: return 8'b0000_1111; + 3'b001: return 8'b0001_1110; + 3'b010: return 8'b0011_1100; + 3'b011: return 8'b0111_1000; + 3'b100: return 8'b1111_0000; + endcase + end + 2'b01: begin + case (addr[2:0]) + 3'b000: return 8'b0000_0011; + 3'b001: return 8'b0000_0110; + 3'b010: return 8'b0000_1100; + 3'b011: return 8'b0001_1000; + 3'b100: return 8'b0011_0000; + 3'b101: return 8'b0110_0000; + 3'b110: return 8'b1100_0000; + endcase + end + 2'b00: begin + case (addr[2:0]) + 3'b000: return 8'b0000_0001; + 3'b001: return 8'b0000_0010; + 3'b010: return 8'b0000_0100; + 3'b011: return 8'b0000_1000; + 3'b100: return 8'b0001_0000; + 3'b101: return 8'b0010_0000; + 3'b110: return 8'b0100_0000; + 3'b111: return 8'b1000_0000; + endcase + end + endcase + return 8'b0; + endfunction + + // ---------------------- + // Extract Bytes from Op + // ---------------------- + function automatic logic [1:0] extract_transfer_size(fu_op op); + case (op) + LD, SD, + AMO_LRD, AMO_SCD, + AMO_SWAPD, AMO_ADDD, + AMO_ANDD, AMO_ORD, + AMO_XORD, AMO_MAXD, + AMO_MAXDU, AMO_MIND, + AMO_MINDU: begin + return 2'b11; + end + LW, LWU, SW, + AMO_LRW, AMO_SCW, + AMO_SWAPW, AMO_ADDW, + AMO_ANDW, AMO_ORW, + AMO_XORW, AMO_MAXW, + AMO_MAXWU, AMO_MINW, + AMO_MINWU: begin + return 2'b10; + end + LH, LHU, SH: return 2'b01; + LB, SB, LBU: return 2'b00; + default: return 2'b11; + endcase + endfunction endpackage diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index 429b936ae..7a223e444 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -127,12 +127,25 @@ package riscv; logic [6:0] opcode; } utype_t; + // atomic instructions + typedef struct packed { + logic [31:27] funct5; + logic aq; + logic rl; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } atype_t; + typedef union packed { logic [31:0] instr; rtype_t rtype; itype_t itype; stype_t stype; utype_t utype; + atype_t atype; } instruction_t; // -------------------- @@ -381,4 +394,23 @@ package riscv; function automatic logic [31:0] illegal (); return 32'h00000000; endfunction + + + // trace log compatible to spikes commit log feature + // pragma translate_off + function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result); + string rd_s; + + if (rd < 10) rd_s = $sformatf("x %0d", rd); + else rd_s = $sformatf("x%0d", rd); + + if (rd != 0) begin + // 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000 + return $sformatf("%d 0x%h (0x%h) %s 0x%h\n", priv_lvl, pc, instr, rd_s, result); + end else begin + // 0 0x000000008000019c (0x0040006f) + return $sformatf("%d 0x%h (0x%h)\n", priv_lvl, pc, instr); + end + endfunction + // pragma translate_on endpackage diff --git a/src/amo_buffer.sv b/src/amo_buffer.sv new file mode 100644 index 000000000..89ed8484a --- /dev/null +++ b/src/amo_buffer.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 20.09.2018 +// Description: Buffers AMO requests +// This unit buffers an atomic memory operations for the cache subsyste. +// Furthermore it handles interfacing with the commit stage + +module amo_buffer ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // pipeline flush + + input logic valid_i, // AMO is valid + output logic ready_o, // AMO unit is ready + input ariane_pkg::amo_t amo_op_i, // AMO Operation + input logic [63:0] paddr_i, // physical address of store which needs to be placed in the queue + input logic [63:0] data_i, // data which is placed in the queue + input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) + // D$ + output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem + input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem + // Auxiliary signals + input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage + input logic no_st_pending_i // there is currently no store pending anymore +); + logic flush_amo_buffer; + logic amo_valid; + + typedef struct packed { + ariane_pkg::amo_t op; + logic [63:0] paddr; + logic [63:0] data; + logic [1:0] size; + } amo_op_t ; + + amo_op_t amo_data_in, amo_data_out; + + // validate this request as soon as all stores have drained and the AMO is in the commit stage + assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid; + assign amo_req_o.amo_op = amo_data_out.op; + assign amo_req_o.size = amo_data_out.size; + assign amo_req_o.operand_a = amo_data_out.paddr; + assign amo_req_o.operand_b = amo_data_out.data; + + assign amo_data_in.op = amo_op_i; + assign amo_data_in.data = data_i; + assign amo_data_in.paddr = paddr_i; + assign amo_data_in.size = data_size_i; + + // only flush if we are currently not committing the AMO + // e.g.: it is not speculative anymore + assign flush_amo_buffer = flush_i & !amo_valid_commit_i; + + fifo_v2 #( + .DEPTH ( 1 ), + .ALM_EMPTY_TH ( 0 ), + .ALM_FULL_TH ( 0 ), + .dtype ( amo_op_t ) + ) i_amo_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_amo_buffer ), + .testmode_i ( 1'b0 ), + .full_o ( amo_valid ), + .empty_o ( ready_o ), + .alm_full_o ( ), // left open + .alm_empty_o ( ), // left open + .data_i ( amo_data_in ), + .push_i ( valid_i ), + .data_o ( amo_data_out ), + .pop_i ( amo_resp_i.ack ) + ); + +endmodule \ No newline at end of file diff --git a/src/ariane.sv b/src/ariane.sv index 4e989bf0e..0f8829049 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -129,6 +129,7 @@ module ariane #( logic lsu_commit_commit_ex; logic lsu_commit_ready_ex_commit; logic no_st_pending_ex_commit; + logic amo_valid_commit; // -------------- // ID <-> COMMIT // -------------- @@ -191,11 +192,20 @@ module ariane #( logic dcache_flush_ctrl_cache; logic dcache_flush_ack_cache_ctrl; logic set_debug_pc; + logic flush_commit; icache_areq_i_t icache_areq_ex_cache; icache_areq_o_t icache_areq_cache_ex; icache_dreq_i_t icache_dreq_if_cache; icache_dreq_o_t icache_dreq_cache_if; + + amo_req_t amo_req; + amo_resp_t amo_resp; + + logic debug_req; + // Disable debug during AMO commit + assign debug_req = debug_req_i & ~amo_valid_commit; + // ---------------- // DCache <-> * // ---------------- @@ -346,6 +356,9 @@ module ariane #( .lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit .lsu_exception_o ( lsu_exception_ex_id ), .no_st_pending_o ( no_st_pending_ex_commit ), + .amo_valid_commit_i ( amo_valid_commit ), + .amo_req_o ( amo_req ), + .amo_resp_i ( amo_resp ), // CSR .csr_ready_o ( csr_ready_ex_id ), .csr_valid_i ( csr_valid_id_ex ), @@ -385,11 +398,13 @@ module ariane #( // Commit // --------- commit_stage commit_stage_i ( + .clk_i, + .rst_ni, .halt_i ( halt_ctrl ), .flush_dcache_i ( dcache_flush_ctrl_cache ), .exception_o ( ex_commit ), .debug_mode_i ( debug_mode ), - .debug_req_i ( debug_req_i ), + .debug_req_i ( debug_req ), .single_step_i ( single_step_csr_commit ), .commit_instr_i ( commit_instr_id_commit ), .commit_ack_o ( commit_ack ), @@ -399,6 +414,8 @@ module ariane #( .we_o ( we_commit_id ), .commit_lsu_o ( lsu_commit_commit_ex ), .commit_lsu_ready_i ( lsu_commit_ready_ex_commit ), + .amo_valid_commit_o ( amo_valid_commit ), + .amo_resp_i ( amo_resp ), .commit_csr_o ( csr_commit_commit_ex ), .pc_o ( pc_commit ), .csr_op_o ( csr_op_commit_csr ), @@ -408,6 +425,7 @@ module ariane #( .fence_i_o ( fence_i_commit_controller ), .fence_o ( fence_commit_controller ), .sfence_vma_o ( sfence_vma_commit_controller ), + .flush_commit_o ( flush_commit ), .* ); @@ -419,8 +437,8 @@ module ariane #( ) csr_regfile_i ( .flush_o ( flush_csr_ctrl ), .halt_csr_o ( halt_csr_ctrl ), - .commit_ack_i ( commit_ack ), .commit_instr_i ( commit_instr_id_commit ), + .commit_ack_i ( commit_ack ), .ex_i ( ex_commit ), .csr_op_i ( csr_op_commit_csr ), .csr_addr_i ( csr_addr_ex_csr ), @@ -451,6 +469,10 @@ module ariane #( .perf_data_o ( data_csr_perf ), .perf_data_i ( data_perf_csr ), .perf_we_o ( we_csr_perf ), + .debug_req_i ( debug_req ), + .ipi_i, + .irq_i, + .time_irq_i, .* ); @@ -487,8 +509,8 @@ module ariane #( .flush_id_o ( flush_ctrl_id ), .flush_ex_o ( flush_ctrl_ex ), .flush_tlb_o ( flush_tlb_ctrl_ex ), - .flush_dcache_o ( dcache_flush_ctrl_cache ), - .flush_dcache_ack_i ( dcache_flush_ack_cache_ctrl ), + .flush_dcache_o ( dcache_flush_ctrl_cache ), + .flush_dcache_ack_i ( dcache_flush_ack_cache_ctrl ), .halt_csr_i ( halt_csr_ctrl ), .halt_o ( halt_ctrl ), @@ -501,6 +523,7 @@ module ariane #( .fence_i_i ( fence_i_commit_controller ), .fence_i ( fence_commit_controller ), .sfence_vma_i ( sfence_vma_commit_controller ), + .flush_commit_i ( flush_commit ), .flush_icache_o ( icache_flush_ctrl_cache ), .* @@ -527,12 +550,11 @@ module ariane #( .dcache_enable_i ( dcache_en_csr_nbdcache ), .dcache_flush_i ( dcache_flush_ctrl_cache ), .dcache_flush_ack_o ( dcache_flush_ack_cache_ctrl ), - // from PTW, Load Unit and Store Unit - .dcache_amo_commit_i ( 1'b0 ), - .dcache_amo_valid_o ( ), - .dcache_amo_result_o ( ), - .dcache_amo_flush_i ( 1'b0 ), + // to commit stage + .amo_req_i ( amo_req ), + .amo_resp_o ( amo_resp ), .dcache_miss_o ( dcache_miss_cache_perf ), + // from PTW, Load Unit and Store Unit .dcache_req_ports_i ( dcache_req_ports_ex_cache ), .dcache_req_ports_o ( dcache_req_ports_cache_ex ), // memory side diff --git a/src/axi_adapter.sv b/src/axi_adapter.sv index 7b7766132..d253e42db 100644 --- a/src/axi_adapter.sv +++ b/src/axi_adapter.sv @@ -16,12 +16,11 @@ */ import std_cache_pkg::*; - module axi_adapter #( - parameter int unsigned DATA_WIDTH = 256, - parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature - parameter int unsigned AXI_ID_WIDTH = 10 - )( + parameter int unsigned DATA_WIDTH = 256, + parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature + parameter int unsigned AXI_ID_WIDTH = 10 +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -202,9 +201,13 @@ module axi_adapter #( axi.w_valid = 1'b1; axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0; - axi.w_data = wdata_i[BURST_SIZE-cnt_q]; - axi.w_strb = be_i[BURST_SIZE-cnt_q]; - + if (type_i == SINGLE_REQ) begin + axi.w_data = wdata_i[0]; + axi.w_strb = be_i[0]; + end else begin + axi.w_data = wdata_i[BURST_SIZE-cnt_q]; + axi.w_strb = be_i[BURST_SIZE-cnt_q]; + end axi.aw_valid = 1'b1; // we are here because we want to write a cache line axi.aw_len = BURST_SIZE; @@ -251,8 +254,13 @@ module axi_adapter #( // ~> from write, there is an outstanding write WAIT_LAST_W_READY: begin axi.w_valid = 1'b1; - axi.w_data = wdata_i[BURST_SIZE-cnt_q]; - axi.w_strb = be_i[BURST_SIZE-cnt_q]; + if (type_i == SINGLE_REQ) begin + axi.w_data = wdata_i[0]; + axi.w_strb = be_i[0]; + end else begin + axi.w_data = wdata_i[BURST_SIZE-cnt_q]; + axi.w_strb = be_i[BURST_SIZE-cnt_q]; + end // this is the last write axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0; diff --git a/src/cache_subsystem/amo_alu.sv b/src/cache_subsystem/amo_alu.sv new file mode 100644 index 000000000..7e2537e85 --- /dev/null +++ b/src/cache_subsystem/amo_alu.sv @@ -0,0 +1,63 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 15.09.2018 +// Description: Combinatorial AMO unit +module amo_alu ( + // AMO interface + input ariane_pkg::amo_t amo_op_i, + input logic [63:0] amo_operand_a_i, + input logic [63:0] amo_operand_b_i, + output logic [63:0] amo_result_o // result of atomic memory operation +); + + logic [64:0] adder_sum; + logic [64:0] adder_operand_a, adder_operand_b; + + assign adder_sum = adder_operand_a + adder_operand_b; + + always_comb begin + + adder_operand_a = $signed(amo_operand_a_i); + adder_operand_b = $signed(amo_operand_b_i); + + amo_result_o = amo_operand_b_i; + + unique case (amo_op_i) + // the default is to output operand_b + ariane_pkg::AMO_SC:; + ariane_pkg::AMO_SWAP:; + ariane_pkg::AMO_ADD: amo_result_o = adder_sum[63:0]; + ariane_pkg::AMO_AND: amo_result_o = amo_operand_a_i & amo_operand_b_i; + ariane_pkg::AMO_OR: amo_result_o = amo_operand_a_i | amo_operand_b_i; + ariane_pkg::AMO_XOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i; + ariane_pkg::AMO_MAX: begin + adder_operand_b = -$signed(amo_operand_b_i); + amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i; + end + ariane_pkg::AMO_MIN: begin + adder_operand_b = -$signed(amo_operand_b_i); + amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i; + end + ariane_pkg::AMO_MAXU: begin + adder_operand_a = $unsigned(amo_operand_a_i); + adder_operand_b = -$unsigned(amo_operand_b_i); + amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i; + end + ariane_pkg::AMO_MINU: begin + adder_operand_a = $unsigned(amo_operand_a_i); + adder_operand_b = -$unsigned(amo_operand_b_i); + amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i; + end + default: amo_result_o = '0; + endcase + end +endmodule diff --git a/src/cache_subsystem/cache_ctrl.sv b/src/cache_subsystem/cache_ctrl.sv index 75ccd38e6..7e1813df1 100644 --- a/src/cache_subsystem/cache_ctrl.sv +++ b/src/cache_subsystem/cache_ctrl.sv @@ -21,57 +21,53 @@ import ariane_pkg::*; import std_cache_pkg::*; module cache_ctrl #( - parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 - )( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic flush_i, - input logic bypass_i, // enable cache - output logic busy_o, - - // Core request ports - input dcache_req_i_t req_port_i, - output dcache_req_o_t req_port_o, - - // SRAM interface - output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid - output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array - input logic gnt_i, - output cache_line_t data_o, - output cl_be_t be_o, - output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later - input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, - output logic we_o, - input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, - // Miss handling - output miss_req_t miss_req_o, - // return - input logic miss_gnt_i, - input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss - input logic [63:0] critical_word_i, - input logic critical_word_valid_i, - - input logic bypass_gnt_i, - input logic bypass_valid_i, - input logic [63:0] bypass_data_i, - // check MSHR for aliasing - output logic [55:0] mshr_addr_o, - input logic mshr_addr_matches_i, - input logic mshr_index_matches_i + parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic bypass_i, // enable cache + output logic busy_o, + // Core request ports + input dcache_req_i_t req_port_i, + output dcache_req_o_t req_port_o, + // SRAM interface + output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid + output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array + input logic gnt_i, + output cache_line_t data_o, + output cl_be_t be_o, + output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later + input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, + output logic we_o, + input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, + // Miss handling + output miss_req_t miss_req_o, + // return + input logic miss_gnt_i, + input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss + input logic [63:0] critical_word_i, + input logic critical_word_valid_i, + // bypass ports + input logic bypass_gnt_i, + input logic bypass_valid_i, + input logic [63:0] bypass_data_i, + // check MSHR for aliasing + output logic [55:0] mshr_addr_o, + input logic mshr_addr_matches_i, + input logic mshr_index_matches_i ); - // 0 IDLE - // 1 WAIT_TAG - // 2 WAIT_TAG_BYPASSED - // 3 STORE_REQ - // 4 WAIT_REFILL_VALID - // 5 WAIT_REFILL_GNT - // 6 WAIT_TAG_SAVED - // 7 WAIT_MSHR - // 8 WAIT_CRITICAL_WORD - enum logic [3:0] { - IDLE, WAIT_TAG, WAIT_TAG_BYPASSED, STORE_REQ, WAIT_REFILL_VALID, WAIT_REFILL_GNT, WAIT_TAG_SAVED, WAIT_MSHR, WAIT_CRITICAL_WORD + IDLE, // 0 + WAIT_TAG, // 1 + WAIT_TAG_BYPASSED, // 2 + STORE_REQ, // 3 + WAIT_REFILL_VALID, // 4 + WAIT_REFILL_GNT, // 5 + WAIT_TAG_SAVED, // 6 + WAIT_MSHR, // 7 + WAIT_CRITICAL_WORD // 8 } state_d, state_q; typedef struct packed { @@ -109,12 +105,10 @@ module cache_ctrl #( // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array // cache-line offset -> multiple of 64 cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left - // default assignments state_d = state_q; mem_req_d = mem_req_q; hit_way_d = hit_way_q; - // output assignments req_port_o.data_gnt = 1'b0; req_port_o.data_rvalid = 1'b0; @@ -135,7 +129,7 @@ module cache_ctrl #( IDLE: begin // a new request arrived if (req_port_i.data_req && !flush_i) begin - // request the cache line - we can do this specualtive + // request the cache line - we can do this speculatively req_o = '1; // save index, be and we @@ -172,10 +166,11 @@ module cache_ctrl #( WAIT_TAG, WAIT_TAG_SAVED: begin // depending on where we come from // For the store case the tag comes in the same cycle - tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : req_port_i.address_tag; + tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag + : req_port_i.address_tag; // we speculatively request another transfer if (req_port_i.data_req && !flush_i) begin - req_o = '1; + req_o = '1; end // check that the client really wants to do the request @@ -185,7 +180,6 @@ module cache_ctrl #( // ------------ if (|hit_way_i) begin // we can request another cache-line if this was a load - // make another request if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin state_d = WAIT_TAG; // switch back to WAIT_TAG mem_req_d.index = req_port_i.address_index; @@ -195,12 +189,12 @@ module cache_ctrl #( mem_req_d.wdata = req_port_i.data_wdata; mem_req_d.tag = req_port_i.address_tag; mem_req_d.bypass = 1'b0; + req_port_o.data_gnt = gnt_i; if (!gnt_i) begin state_d = IDLE; end - end else begin state_d = IDLE; end @@ -215,7 +209,6 @@ module cache_ctrl #( // report data for a read if (!mem_req_q.we) begin req_port_o.data_rvalid = 1'b1; - // else this was a store so we need an extra step to handle it end else begin state_d = STORE_REQ; @@ -273,7 +266,7 @@ module cache_ctrl #( // ~> we are here as we need a second round of memory access for a store STORE_REQ: begin // check if the MSHR still doesn't match - mshr_addr_o = {mem_req_d.tag, mem_req_q.index}; + mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; // We need to re-check for MSHR aliasing here as the store requires at least // two memory look-ups on a single-ported SRAM and therefore is non-atomic @@ -284,7 +277,7 @@ module cache_ctrl #( we_o = 1'b1; be_o.vldrty = hit_way_q; - + // set the correct byte enable be_o.data[cl_offset>>3 +: 8] = mem_req_q.be; data_o.data[cl_offset +: 64] = mem_req_q.wdata; @@ -384,7 +377,6 @@ module cache_ctrl #( mem_req_d.wdata = req_port_i.data_wdata; mem_req_d.tag = req_port_i.address_tag; - state_d = IDLE; // Wait until we have access on the memory array @@ -393,7 +385,6 @@ module cache_ctrl #( mem_req_d.bypass = 1'b0; req_port_o.data_gnt = 1'b1; end - end else begin state_d = IDLE; end @@ -411,7 +402,7 @@ module cache_ctrl #( endcase if (req_port_i.kill_req) begin - state_d = IDLE; + state_d = IDLE; req_port_o.data_rvalid = 1'b1; end end @@ -421,9 +412,9 @@ module cache_ctrl #( // -------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - state_q <= IDLE; - mem_req_q <= '0; - hit_way_q <= '0; + state_q <= IDLE; + mem_req_q <= '0; + hit_way_q <= '0; end else begin state_q <= state_d; mem_req_q <= mem_req_d; @@ -443,18 +434,3 @@ module cache_ctrl #( `endif `endif endmodule - - - - -module AMO_alu ( - input logic clk_i, - input logic rst_ni, - // AMO interface - input logic amo_commit_i, // commit atomic memory operation - output logic amo_valid_o, // we have a valid AMO result - output logic [63:0] amo_result_o, // result of atomic memory operation - input logic amo_flush_i // forget about AMO - ); - -endmodule diff --git a/src/cache_subsystem/miss_handler.sv b/src/cache_subsystem/miss_handler.sv index 4848335f5..b119f68a6 100644 --- a/src/cache_subsystem/miss_handler.sv +++ b/src/cache_subsystem/miss_handler.sv @@ -47,6 +47,9 @@ module miss_handler #( input logic [NR_PORTS-1:0][55:0] mshr_addr_i, output logic [NR_PORTS-1:0] mshr_addr_matches_o, output logic [NR_PORTS-1:0] mshr_index_matches_o, + // AMO + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, // Port to SRAMs, for refill and eviction output logic [DCACHE_SET_ASSOC-1:0] req_o, output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array @@ -56,22 +59,25 @@ module miss_handler #( output logic we_o ); - // 0 IDLE - // 1 FLUSHING - // 2 FLUSH - // 3 WB_CACHELINE_FLUSH - // 4 FLUSH_REQ_STATUS - // 5 WB_CACHELINE_MISS - // 6 WAIT_GNT_SRAM - // 7 MISS - // 8 REQ_CACHELINE - // 9 MISS_REPL - // A SAVE_CACHELINE - // B INIT - // FSM states - enum logic [3:0] { IDLE, FLUSHING, FLUSH, WB_CACHELINE_FLUSH, FLUSH_REQ_STATUS, WB_CACHELINE_MISS, WAIT_GNT_SRAM, MISS, - REQ_CACHELINE, MISS_REPL, SAVE_CACHELINE, INIT } state_d, state_q; + enum logic [3:0] { + IDLE, // 0 + FLUSHING, // 1 + FLUSH, // 2 + WB_CACHELINE_FLUSH, // 3 + FLUSH_REQ_STATUS, // 4 + WB_CACHELINE_MISS, // 5 + WAIT_GNT_SRAM, // 6 + MISS, // 7 + REQ_CACHELINE, // 8 + MISS_REPL, // 9 + SAVE_CACHELINE, // A + INIT, // B + AMO_LOAD, // C + AMO_SAVE_LOAD, // D + AMO_STORE // E + } state_d, state_q; + // Registers mshr_t mshr_d, mshr_q; logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; @@ -79,6 +85,7 @@ module miss_handler #( // cache line to evict cache_line_t evict_cl_d, evict_cl_q; + logic serve_amo_d, serve_amo_q; // Request from one FSM logic [NR_PORTS-1:0] miss_req_valid; logic [NR_PORTS-1:0] miss_req_bypass; @@ -90,11 +97,13 @@ module miss_handler #( // Cache Line Refill <-> AXI logic req_fsm_miss_valid; - logic req_fsm_miss_bypass; logic [63:0] req_fsm_miss_addr; logic [DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata; logic req_fsm_miss_we; logic [(DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be; + req_t req_fsm_miss_req; + logic [1:0] req_fsm_miss_size; + logic gnt_miss_fsm; logic valid_miss_fsm; logic [(DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm; @@ -103,6 +112,14 @@ module miss_handler #( logic lfsr_enable; logic [DCACHE_SET_ASSOC-1:0] lfsr_oh; logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin; + // AMOs + ariane_pkg::amo_t amo_op; + logic [63:0] amo_operand_a, amo_operand_b, amo_result_o; + + struct packed { + logic [63:3] address; + logic valid; + } reservation_d, reservation_q; // ------------------------------ // Cache Management @@ -129,14 +146,16 @@ module miss_handler #( lfsr_enable = 1'b0; // to AXI refill req_fsm_miss_valid = 1'b0; - req_fsm_miss_bypass = 1'b0; req_fsm_miss_addr = '0; req_fsm_miss_wdata = '0; req_fsm_miss_we = 1'b0; req_fsm_miss_be = '0; + req_fsm_miss_req = CACHE_LINE_REQ; + req_fsm_miss_size = 2'b11; // core flush_ack_o = 1'b0; miss_o = 1'b0; // to performance counter + serve_amo_d = serve_amo_q; // -------------------------------- // Flush and Miss operation // -------------------------------- @@ -148,11 +167,30 @@ module miss_handler #( // communicate to the requester which unit we are currently serving active_serving_o = '0; active_serving_o[mshr_q.id] = mshr_q.valid; + // AMOs + amo_resp_o.ack = 1'b0; + amo_resp_o.result = '0; + // silence the unit when not used + amo_op = amo_req_i.amo_op; + amo_operand_a = '0; + amo_operand_b = '0; + reservation_d = reservation_q; case (state_q) IDLE: begin - + // lowest priority are AMOs, wait until everything else is served before going for the AMOs + if (amo_req_i.req) begin + // 1. Flush the cache + if (!serve_amo_q) begin + state_d = FLUSH_REQ_STATUS; + serve_amo_d = 1'b1; + // 2. Do the AMO + end else begin + state_d = AMO_LOAD; + serve_amo_d = 1'b0; + end + end // check if we want to flush and can flush e.g.: we are not busy anymore // TODO: Check that the busy flag is indeed needed if (flush_i && !busy_i) begin @@ -312,7 +350,8 @@ module miss_handler #( we_o = 1'b1; // finished with flushing operation, go back to idle if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) begin - flush_ack_o = 1'b1; + // only acknowledge if the flush wasn't triggered by an atomic + flush_ack_o = ~serve_amo_q; state_d = IDLE; end end @@ -331,6 +370,82 @@ module miss_handler #( if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) state_d = IDLE; end + // ---------------------- + // AMOs + // ---------------------- + // TODO(zarubaf) Move this closer to memory + // ~> we are here because we need to do the AMO, the cache is clean at this point + // start by executing the load + AMO_LOAD: begin + req_fsm_miss_valid = 1'b1; + // address is in operand a + req_fsm_miss_addr = amo_req_i.operand_a; + req_fsm_miss_req = SINGLE_REQ; + req_fsm_miss_size = amo_req_i.size; + // the request has been granted + if (gnt_miss_fsm) begin + state_d = AMO_SAVE_LOAD; + end + end + // save the load value + AMO_SAVE_LOAD: begin + if (valid_miss_fsm) begin + // we are only concerned about the lower 64-bit + mshr_d.wdata = data_miss_fsm[0]; + state_d = AMO_STORE; + end + end + // and do the store + AMO_STORE: begin + automatic logic [63:0] load_data; + // re-align load data + load_data = data_align(amo_req_i.operand_a[2:0], mshr_q.wdata); + // Sign-extend for word operation + if (amo_req_i.size == 2'b10) begin + amo_operand_a = sext32(load_data[31:0]); + amo_operand_b = sext32(amo_req_i.operand_b[31:0]); + end else begin + amo_operand_a = load_data; + amo_operand_b = amo_req_i.operand_b; + end + + // we do not need a store request for load reserved + req_fsm_miss_valid = (amo_req_i.amo_op == AMO_LR) ? 1'b0 : 1'b1; + // for a load reserved we do not want to write + req_fsm_miss_we = (amo_req_i.amo_op == AMO_LR) ? 1'b0 : 1'b1; + req_fsm_miss_req = SINGLE_REQ; + req_fsm_miss_size = amo_req_i.size; + req_fsm_miss_addr = amo_req_i.operand_a; + + req_fsm_miss_wdata = data_align(amo_req_i.operand_a[2:0], amo_result_o); + req_fsm_miss_be = be_gen(amo_req_i.operand_a[2:0], amo_req_i.size); + + // place a reservation on the memory + if (amo_req_i.amo_op == AMO_LR) begin + reservation_d.address = amo_req_i.operand_a[63:3]; + reservation_d.valid = 1'b1; + end + + // the request is valid or we didn't need to go for another store + if (valid_miss_fsm || (amo_req_i.amo_op == AMO_LR)) begin + state_d = IDLE; + amo_resp_o.ack = 1'b1; + // write-back the result + amo_resp_o.result = amo_operand_a; + // in case we have a SC we need to look into the reservation table + if (amo_req_i.amo_op == AMO_SC) begin + if (reservation_q.address == amo_req_i.operand_a[63:3] && reservation_q.valid) begin + amo_resp_o.result = 1'b0; + end else begin + amo_resp_o.result = 1'b1; + end + // An SC must fail if there is a nother SC (to any address) between the LR and the SC in program + // order. + // in any case destory the reservation + reservation_d.valid = 1'b0; + end + end + end endcase end @@ -357,17 +472,21 @@ module miss_handler #( // -------------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - mshr_q <= '0; - state_q <= INIT; - cnt_q <= '0; - evict_way_q <= '0; - evict_cl_q <= '0; + mshr_q <= '0; + state_q <= INIT; + cnt_q <= '0; + evict_way_q <= '0; + evict_cl_q <= '0; + serve_amo_q <= 1'b0; + reservation_q <= '0; end else begin - mshr_q <= mshr_d; - state_q <= state_d; - cnt_q <= cnt_d; - evict_way_q <= evict_way_d; - evict_cl_q <= evict_cl_d; + mshr_q <= mshr_d; + state_q <= state_d; + cnt_q <= cnt_d; + evict_way_q <= evict_way_d; + evict_cl_q <= evict_cl_d; + serve_amo_q <= serve_amo_d; + reservation_q <= reservation_d; end end @@ -396,32 +515,32 @@ module miss_handler #( logic [AXI_ID_WIDTH-1:0] gnt_id_bypass_fsm; arbiter #( - .NR_PORTS ( NR_PORTS ), - .DATA_WIDTH ( 64 ) + .NR_PORTS ( NR_PORTS ), + .DATA_WIDTH ( 64 ) ) i_bypass_arbiter ( // Master Side - .data_req_i ( miss_req_valid & miss_req_bypass ), - .address_i ( miss_req_addr ), - .data_wdata_i ( miss_req_wdata ), - .data_we_i ( miss_req_we ), - .data_be_i ( miss_req_be ), - .data_size_i ( miss_req_size ), - .data_gnt_o ( bypass_gnt_o ), - .data_rvalid_o ( bypass_valid_o ), - .data_rdata_o ( bypass_data_o ), + .data_req_i ( miss_req_valid & miss_req_bypass ), + .address_i ( miss_req_addr ), + .data_wdata_i ( miss_req_wdata ), + .data_we_i ( miss_req_we ), + .data_be_i ( miss_req_be ), + .data_size_i ( miss_req_size ), + .data_gnt_o ( bypass_gnt_o ), + .data_rvalid_o ( bypass_valid_o ), + .data_rdata_o ( bypass_data_o ), // Slave Sid - .id_i ( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), - .id_o ( id_fsm_bypass ), - .gnt_id_i ( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), - .address_o ( req_fsm_bypass_addr ), - .data_wdata_o ( req_fsm_bypass_wdata ), - .data_req_o ( req_fsm_bypass_valid ), - .data_we_o ( req_fsm_bypass_we ), - .data_be_o ( req_fsm_bypass_be ), - .data_size_o ( req_fsm_bypass_size ), - .data_gnt_i ( gnt_bypass_fsm ), - .data_rvalid_i ( valid_bypass_fsm ), - .data_rdata_i ( data_bypass_fsm ), + .id_i ( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), + .id_o ( id_fsm_bypass ), + .gnt_id_i ( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), + .address_o ( req_fsm_bypass_addr ), + .data_wdata_o ( req_fsm_bypass_wdata ), + .data_req_o ( req_fsm_bypass_valid ), + .data_we_o ( req_fsm_bypass_we ), + .data_be_o ( req_fsm_bypass_be ), + .data_size_o ( req_fsm_bypass_size ), + .data_gnt_i ( gnt_bypass_fsm ), + .data_rvalid_i ( valid_bypass_fsm ), + .data_rdata_i ( data_bypass_fsm ), .* ); @@ -449,20 +568,20 @@ module miss_handler #( ); // ---------------------- - // Cache Line Arbiter + // Cache Line AXI Refill // ---------------------- axi_adapter #( .DATA_WIDTH ( DCACHE_LINE_WIDTH ), .AXI_ID_WIDTH ( AXI_ID_WIDTH ) ) i_miss_axi_adapter ( .req_i ( req_fsm_miss_valid ), - .type_i ( CACHE_LINE_REQ ), + .type_i ( req_fsm_miss_req ), .gnt_o ( gnt_miss_fsm ), .addr_i ( req_fsm_miss_addr ), .we_i ( req_fsm_miss_we ), .wdata_i ( req_fsm_miss_wdata ), .be_i ( req_fsm_miss_be ), - .size_i ( 2'b11 ), + .size_i ( req_fsm_miss_size ), .id_i ( '0 ), .gnt_id_o ( ), // open .valid_o ( valid_miss_fsm ), @@ -482,6 +601,16 @@ module miss_handler #( .* ); + // ----------------- + // AMO ALU + // ----------------- + amo_alu i_amo_alu ( + .amo_op_i ( amo_op ), + .amo_operand_a_i ( amo_operand_a ), + .amo_operand_b_i ( amo_operand_b ), + .amo_result_o ( amo_result_o ) + ); + // ----------------- // Struct Split // ----------------- diff --git a/src/cache_subsystem/std_cache_subsystem.sv b/src/cache_subsystem/std_cache_subsystem.sv index a6739b7b4..e3cfeb6fa 100644 --- a/src/cache_subsystem/std_cache_subsystem.sv +++ b/src/cache_subsystem/std_cache_subsystem.sv @@ -24,58 +24,50 @@ import std_cache_pkg::*; module std_cache_subsystem #( parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 )( - input logic clk_i, - input logic rst_ni, - - // I$ - input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) - input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together - output logic icache_miss_o, // to performance counter - - // address translation requests - input icache_areq_i_t icache_areq_i, // to/from frontend - output icache_areq_o_t icache_areq_o, - // data requests - input icache_dreq_i_t icache_dreq_i, // to/from frontend - output icache_dreq_o_t icache_dreq_o, - - // D$ - // Cache management - input logic dcache_enable_i, // from CSR - input logic dcache_flush_i, // high until acknowledged - output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed - output logic dcache_miss_o, // we missed on a ld/st - // AMO interface (not functional yet) - input logic dcache_amo_commit_i, // commit atomic memory operation - output logic dcache_amo_valid_o, // we have a valid AMO result - output logic [63:0] dcache_amo_result_o, // result of atomic memory operation - input logic dcache_amo_flush_i, // forget about AMO - // Request ports - input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU - output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU - - // memory side - AXI_BUS.Master icache_data_if, // I$ refill port - AXI_BUS.Master dcache_data_if, // D$ refill port - AXI_BUS.Master dcache_bypass_if // bypass axi port (disabled D$ or uncacheable access) + input logic clk_i, + input logic rst_ni, + // I$ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input icache_areq_i_t icache_areq_i, // to/from frontend + output icache_areq_o_t icache_areq_o, + // data requests + input icache_dreq_i_t icache_dreq_i, // to/from frontend + output icache_dreq_o_t icache_dreq_o, + // AMOs + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // D$ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + // Request ports + input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU + output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU + // memory side + AXI_BUS.Master icache_data_if, // I$ refill port + AXI_BUS.Master dcache_data_if, // D$ refill port + AXI_BUS.Master dcache_bypass_if // bypass axi port (disabled D$ or uncacheable access) ); - std_icache #( ) i_icache ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i ( icache_flush_i ), - .en_i ( icache_en_i ), - .miss_o ( icache_miss_o ), - .areq_i ( icache_areq_i ), - .areq_o ( icache_areq_o ), - .dreq_i ( icache_dreq_i ), - .dreq_o ( icache_dreq_o ), - .axi ( icache_data_if ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( icache_flush_i ), + .en_i ( icache_en_i ), + .miss_o ( icache_miss_o ), + .areq_i ( icache_areq_i ), + .areq_o ( icache_areq_o ), + .dreq_i ( icache_dreq_i ), + .dreq_o ( icache_dreq_o ), + .axi ( icache_data_if ) ); - // decreasing priority // Port 0: PTW // Port 1: Load Unit @@ -83,21 +75,18 @@ module std_cache_subsystem #( std_nbdcache #( .CACHE_START_ADDR ( CACHE_START_ADDR ) ) i_nbdcache ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .enable_i ( dcache_enable_i ), - .flush_i ( dcache_flush_i ), - .flush_ack_o ( dcache_flush_ack_o ), - .miss_o ( dcache_miss_o ), - .data_if ( dcache_data_if ), - .bypass_if ( dcache_bypass_if ), - .amo_commit_i ( dcache_amo_commit_i ), - .amo_valid_o ( dcache_amo_valid_o ), - .amo_result_o ( dcache_amo_result_o ), - .amo_flush_i ( dcache_amo_flush_i ), - .req_ports_i ( dcache_req_ports_i ), - .req_ports_o ( dcache_req_ports_o ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .enable_i ( dcache_enable_i ), + .flush_i ( dcache_flush_i ), + .flush_ack_o ( dcache_flush_ack_o ), + .miss_o ( dcache_miss_o ), + .data_if ( dcache_data_if ), + .bypass_if ( dcache_bypass_if ), + .req_ports_i ( dcache_req_ports_i ), + .req_ports_o ( dcache_req_ports_o ), + .amo_req_i ( amo_req_i ), + .amo_resp_o ( amo_resp_o ) ); - endmodule // std_cache_subsystem diff --git a/src/cache_subsystem/std_nbdcache.sv b/src/cache_subsystem/std_nbdcache.sv index 6d89c7d68..fa499cf6d 100644 --- a/src/cache_subsystem/std_nbdcache.sv +++ b/src/cache_subsystem/std_nbdcache.sv @@ -16,7 +16,7 @@ import ariane_pkg::*; import std_cache_pkg::*; module std_nbdcache #( - parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 + parameter logic [63:0] CACHE_START_ADDR = 64'h8000_0000 )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -24,16 +24,13 @@ module std_nbdcache #( input logic enable_i, // from CSR input logic flush_i, // high until acknowledged output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed - output logic miss_o, // we missed on a ld/st - // AMO interface - input logic amo_commit_i, // commit atomic memory operation - output logic amo_valid_o, // we have a valid AMO result - output logic [63:0] amo_result_o, // result of atomic memory operation - input logic amo_flush_i, // forget about AMO + output logic miss_o, // we missed on a LD/ST + // AMOs + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, // Request ports input dcache_req_i_t [2:0] req_ports_i, // request ports - output dcache_req_o_t [2:0] req_ports_o, // request ports - + output dcache_req_o_t [2:0] req_ports_o, // request ports // Cache AXI refill port AXI_BUS.Master data_if, AXI_BUS.Master bypass_if @@ -92,13 +89,11 @@ module std_nbdcache #( .CACHE_START_ADDR ( CACHE_START_ADDR ) ) i_cache_ctrl ( .bypass_i ( ~enable_i ), - .busy_o ( busy [i] ), - + // from core .req_port_i ( req_ports_i [i] ), .req_port_o ( req_ports_o [i] ), - - + // to SRAM array .req_o ( req [i+1] ), .addr_o ( addr [i+1] ), .gnt_i ( gnt [i+1] ), @@ -118,9 +113,9 @@ module std_nbdcache #( .bypass_valid_i ( bypass_valid [i] ), .bypass_data_i ( bypass_data [i] ), - .mshr_addr_o ( mshr_addr [i] ), // TODO - .mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO - .mshr_index_matches_i ( mshr_index_matches[i] ), // TODO + .mshr_addr_o ( mshr_addr [i] ), + .mshr_addr_matches_i ( mshr_addr_matches [i] ), + .mshr_index_matches_i ( mshr_index_matches[i] ), .* ); end @@ -132,7 +127,11 @@ module std_nbdcache #( miss_handler #( .NR_PORTS ( 3 ) ) i_miss_handler ( + .flush_i ( flush_i ), .busy_i ( |busy ), + // AMOs + .amo_req_i ( amo_req_i ), + .amo_resp_o ( amo_resp_o ), .miss_req_i ( miss_req ), .miss_gnt_o ( miss_gnt ), .bypass_gnt_o ( bypass_gnt ), @@ -150,6 +149,8 @@ module std_nbdcache #( .be_o ( be [0] ), .data_o ( wdata [0] ), .we_o ( we [0] ), + .bypass_if, + .data_if, .* ); @@ -194,7 +195,7 @@ module std_nbdcache #( // ---------------- // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals. - // note: if you have an SRAM that supports flat bit enables for your target technology, + // note: if you have an SRAM that supports flat bit enables for your target technology, // you can use it here to save the extra 4x overhead introduced by this workaround. logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; diff --git a/src/clint/axi_lite_interface.sv b/src/clint/axi_lite_interface.sv index 77a0b1cac..982ce2fbc 100644 --- a/src/clint/axi_lite_interface.sv +++ b/src/clint/axi_lite_interface.sv @@ -141,7 +141,7 @@ module axi_lite_interface #( // Registers // ------------------------ always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin + if (~rst_ni) begin CS <= IDLE; address_q <= '0; trans_id_q <= '0; @@ -159,10 +159,10 @@ module axi_lite_interface #( `ifndef SYNTHESIS `ifndef VERILATOR // check that burst length is just one - assert property (@(posedge clk_i) slave.ar_valid |-> ((slave.ar_len == 8'b0) && (slave.ar_size == $clog2(AXI_ADDR_WIDTH/8)))) + assert property (@(posedge clk_i) slave.ar_valid |-> ((slave.ar_len == 8'b0))) else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end // do the same for the write channel - assert property (@(posedge clk_i) slave.aw_valid |-> ((slave.aw_len == 8'b0) && (slave.aw_size == $clog2(AXI_ADDR_WIDTH/8)))) + assert property (@(posedge clk_i) slave.aw_valid |-> ((slave.aw_len == 8'b0))) else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end `endif `endif diff --git a/src/clint/clint.sv b/src/clint/clint.sv index 659d6cefd..a53766497 100644 --- a/src/clint/clint.sv +++ b/src/clint/clint.sv @@ -132,10 +132,11 @@ module clint #( always_comb begin : irq_gen // check that the mtime cmp register is set to a meaningful value for (int unsigned i = 0; i < NR_CORES; i++) begin - if (mtimecmp_q[i] != 0 && mtime_q >= mtimecmp_q[i]) + if (mtimecmp_q[i] != 0 && mtime_q >= mtimecmp_q[i]) begin timer_irq_o[i] = 1'b1; - else + end else begin timer_irq_o[i] = 1'b0; + end end end @@ -155,7 +156,7 @@ module clint #( // Registers always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin + if (~rst_ni) begin mtime_q <= 64'b0; mtimecmp_q <= 'b0; msip_q <= '0; diff --git a/src/commit_stage.sv b/src/commit_stage.sv index 2dd207236..06c1beeb9 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -18,6 +18,7 @@ module commit_stage #( parameter int unsigned NR_COMMIT_PORTS = 2 )( input logic clk_i, + input logic rst_ni, input logic halt_i, // request to halt the core input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline output exception_t exception_o, // take exception to controller @@ -27,12 +28,12 @@ module commit_stage #( // from scoreboard input scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_i, // the instruction we want to commit output logic [NR_COMMIT_PORTS-1:0] commit_ack_o, // acknowledge that we are indeed committing - // to register file output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable - + // Atomic memory operations + input amo_resp_t amo_resp_i, // result of AMO operation // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) output logic [63:0] pc_o, // to/from CSR file @@ -43,18 +44,22 @@ module commit_stage #( // commit signals to ex output logic commit_lsu_o, // commit the pending store input logic commit_lsu_ready_i, // commit buffer of LSU is ready + output logic amo_valid_commit_o, // valid AMO in commit stage input logic no_st_pending_i, // there is no store pending output logic commit_csr_o, // commit the pending CSR instruction output logic fence_i_o, // flush I$ and pipeline output logic fence_o, // flush D$ and pipeline + output logic flush_commit_o, // request a pipeline flush output logic sfence_vma_o // flush TLBs and pipeline ); assign waddr_o[0] = commit_instr_i[0].rd[4:0]; assign waddr_o[1] = commit_instr_i[1].rd[4:0]; - assign pc_o = commit_instr_i[0].pc; + assign pc_o = commit_instr_i[0].pc; + logic instr_0_is_amo; + assign instr_0_is_amo = is_amo(commit_instr_i[0].op); // ------------------- // Commit Instruction // ------------------- @@ -64,22 +69,28 @@ module commit_stage #( commit_ack_o[0] = 1'b0; commit_ack_o[1] = 1'b0; + amo_valid_commit_o = 1'b0; + we_o[0] = 1'b0; we_o[1] = 1'b0; commit_lsu_o = 1'b0; commit_csr_o = 1'b0; - wdata_o[0] = commit_instr_i[0].result; + // amos will commit on port 0 + wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result : commit_instr_i[0].result; wdata_o[1] = commit_instr_i[1].result; csr_op_o = ADD; // this corresponds to a CSR NOP csr_wdata_o = 64'b0; fence_i_o = 1'b0; fence_o = 1'b0; sfence_vma_o = 1'b0; + flush_commit_o = 1'b0; // we will not commit the instruction if we took an exception // and we do not commit the instruction if we requested a halt // furthermore if the debugger is requesting to debug do not commit this instruction if we are not yet in debug mode + // also check that there is no atomic memory operation committing, right now this is the only operation + // which will take longer than one cycle to commit if (commit_instr_i[0].valid && !halt_i && (!debug_req_i || debug_mode_i)) begin commit_ack_o[0] = 1'b1; @@ -95,7 +106,7 @@ module commit_stage #( // check whether the instruction we retire was a store // do not commit the instruction if we got an exception since the store buffer will be cleared // by the subsequent flush triggered by an exception - if (commit_instr_i[0].fu == STORE) begin + if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) if (commit_lsu_ready_i) commit_lsu_o = 1'b1; @@ -143,14 +154,34 @@ module commit_stage #( // tell the controller to flush the D$ fence_o = no_st_pending_i; end + // ------------------ + // AMO + // ------------------ + if (instr_0_is_amo && !exception_o.valid) begin + // AMO finished + commit_ack_o[0] = amo_resp_i.ack; + // flush the pipeline + flush_commit_o = amo_resp_i.ack; + amo_valid_commit_o = 1'b1; + we_o[0] = amo_resp_i.ack; + end end + // ----------------- + // Commit Port 2 + // ----------------- // check if the second instruction can be committed as well and the first wasn't a CSR instruction // also if we are in single step mode don't retire the second instruction - if (commit_ack_o[0] && commit_instr_i[1].valid && !halt_i && !(commit_instr_i[0].fu inside {CSR}) && !flush_dcache_i && !single_step_i) begin + if (commit_ack_o[0] && commit_instr_i[1].valid + && !halt_i + && !(commit_instr_i[0].fu inside {CSR}) + && !flush_dcache_i + && !instr_0_is_amo + && !single_step_i) begin // only if the first instruction didn't throw an exception and this instruction won't throw an exception // and the operator is of type ALU, LOAD, CTRL_FLOW, MULT - if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin + if (!exception_o.valid && !commit_instr_i[1].ex.valid + && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin we_o[1] = 1'b1; commit_ack_o[1] = 1'b1; end @@ -195,14 +226,17 @@ module commit_stage #( // ------------------------ // check for CSR interrupts (e.g.: normal interrupts which get triggered here) // by putting interrupts here we give them precedence over any other exception - if (csr_exception_i.valid && csr_exception_i.cause[63]) begin + // Don't take the interrupt if we are committing an AMO. + if (csr_exception_i.valid && csr_exception_i.cause[63] && !amo_valid_commit_o) begin exception_o = csr_exception_i; exception_o.tval = commit_instr_i[0].ex.tval; end end - // If we halted the processor don't take any exceptions + // Don't take any exceptions iff: + // - If we halted the processor if (halt_i) begin exception_o.valid = 1'b0; end end + endmodule diff --git a/src/controller.sv b/src/controller.sv index e88a7ba28..51916ffbe 100644 --- a/src/controller.sv +++ b/src/controller.sv @@ -36,7 +36,8 @@ module controller ( input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline input logic fence_i_i, // fence.i in input logic fence_i, // fence in - input logic sfence_vma_i // We got an instruction to flush the TLBs and pipeline + input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline + input logic flush_commit_i // Flush request from commit stage ); // active fence - high if we are currently flushing the dcache @@ -114,13 +115,12 @@ module controller ( flush_unissued_instr_o = 1'b1; flush_id_o = 1'b1; flush_ex_o = 1'b1; + flush_tlb_o = 1'b1; end - // --------------------------------- - // CSR instruction with side-effect - // --------------------------------- - if (flush_csr_i) begin + // Set PC to commit stage and flush pipleine + if (flush_csr_i || flush_commit_i) begin set_pc_commit_o = 1'b1; flush_if_o = 1'b1; flush_unissued_instr_o = 1'b1; diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 4f6c5a8b1..b95100bc5 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -524,6 +524,7 @@ module csr_regfile #( end // we've got a debug request (and we have an instruction which we can associate it to) + // don't interrupt the AMO if (debug_req_i && commit_instr_i[0].valid) begin // save the PC dpc_d = pc_i; diff --git a/src/decoder.sv b/src/decoder.sv index 815d10d1b..efb31c5ad 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -390,18 +390,22 @@ module decoder ( endcase end - `ifdef ENABLE_ATOMICS riscv::OpcodeAmo: begin // we are going to use the load unit for AMOs - instruction_o.fu = LOAD; - instruction_o.rd[4:0] = instr.stype.imm0; - instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.fu = STORE; + instruction_o.rs1[4:0] = instr.atype.rs1; + instruction_o.rs2[4:0] = instr.atype.rs2; + instruction_o.rd[4:0] = instr.atype.rd; + // TODO(zarubaf): Ordering // words if (instr.stype.funct3 == 3'h2) begin unique case (instr.instr[31:27]) 5'h0: instruction_o.op = AMO_ADDW; 5'h1: instruction_o.op = AMO_SWAPW; - 5'h2: instruction_o.op = AMO_LRW; + 5'h2: begin + instruction_o.op = AMO_LRW; + if (instr.atype.rs2 != 0) illegal_instr = 1'b1; + end 5'h3: instruction_o.op = AMO_SCW; 5'h4: instruction_o.op = AMO_XORW; 5'h8: instruction_o.op = AMO_ORW; @@ -417,7 +421,10 @@ module decoder ( unique case (instr.instr[31:27]) 5'h0: instruction_o.op = AMO_ADDD; 5'h1: instruction_o.op = AMO_SWAPD; - 5'h2: instruction_o.op = AMO_LRD; + 5'h2: begin + instruction_o.op = AMO_LRD; + if (instr.atype.rs2 != 0) illegal_instr = 1'b1; + end 5'h3: instruction_o.op = AMO_SCD; 5'h4: instruction_o.op = AMO_XORD; 5'h8: instruction_o.op = AMO_ORD; @@ -432,7 +439,6 @@ module decoder ( illegal_instr = 1'b1; end end - `endif // -------------------------------- // Control Flow Instructions diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 34ed57fa2..ac14f98ca 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -58,6 +58,7 @@ module ex_stage #( output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request output exception_t lsu_exception_o, output logic no_st_pending_o, + input logic amo_valid_commit_i, // CSR output logic csr_ready_o, input logic csr_valid_i, @@ -91,7 +92,8 @@ module ex_stage #( // interface to dcache input dcache_req_o_t [2:0] dcache_req_ports_i, output dcache_req_i_t [2:0] dcache_req_ports_o, - + output amo_req_t amo_req_o, // request to cache subsytem + input amo_resp_t amo_resp_i, // response from cache subsystem // Performance counters output logic itlb_miss_o, output logic dtlb_miss_o @@ -131,8 +133,10 @@ module ex_stage #( lsu lsu_i ( .commit_i ( lsu_commit_i ), .commit_ready_o ( lsu_commit_ready_o ), - .dcache_req_ports_i ( dcache_req_ports_i ), - .dcache_req_ports_o ( dcache_req_ports_o ), + .dcache_req_ports_i, + .dcache_req_ports_o, + .amo_req_o, + .amo_resp_i, .* ); diff --git a/src/frontend/frontend.sv b/src/frontend/frontend.sv index 5e89cbce2..c8877ef91 100644 --- a/src/frontend/frontend.sv +++ b/src/frontend/frontend.sv @@ -355,8 +355,8 @@ module frontend ( // On a pipeline flush start fetching from the next address // of the instruction in the commit stage if (set_pc_commit_i) begin - // we came here from a flush request of a CSR instruction, - // as CSR instructions do not exist in a compressed form + // we came here from a flush request of a CSR instruction or AMO, + // as CSR or AMO instructions do not exist in a compressed form // we can unconditionally do PC + 4 here // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage npc_d = pc_commit_i + 64'h4; diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 5f1ff0e4e..793aedaa5 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -19,7 +19,7 @@ module issue_stage #( parameter int unsigned NR_ENTRIES = 8, parameter int unsigned NR_WB_PORTS = 4, parameter int unsigned NR_COMMIT_PORTS = 2 - )( +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -98,45 +98,54 @@ module issue_stage #( // 1. Re-name // --------------------------------------------------------- re_name i_re_name ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i ( flush_i ), - .issue_instr_i ( decoded_instr_i ), - .issue_instr_valid_i ( decoded_instr_valid_i ), - .issue_ack_o ( decoded_instr_ack_o ), - .issue_instr_o ( issue_instr_rename_sb ), - .issue_instr_valid_o ( issue_instr_valid_rename_sb ), - .issue_ack_i ( issue_ack_sb_rename ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .flush_unissied_instr_i ( flush_unissued_instr_i ), + .issue_instr_i ( decoded_instr_i ), + .issue_instr_valid_i ( decoded_instr_valid_i ), + .issue_ack_o ( decoded_instr_ack_o ), + .issue_instr_o ( issue_instr_rename_sb ), + .issue_instr_valid_o ( issue_instr_valid_rename_sb ), + .issue_ack_i ( issue_ack_sb_rename ) ); // --------------------------------------------------------- // 2. Manage instructions in a scoreboard // --------------------------------------------------------- - scoreboard #( - .NR_ENTRIES ( NR_ENTRIES ), - .NR_WB_PORTS ( NR_WB_PORTS ) + scoreboard #( + .NR_ENTRIES (NR_ENTRIES ), + .NR_WB_PORTS(NR_WB_PORTS) ) i_scoreboard ( - .unresolved_branch_i ( 1'b0 ), - .rd_clobber_o ( rd_clobber_sb_iro ), - .rs1_i ( rs1_iro_sb ), - .rs1_o ( rs1_sb_iro ), - .rs1_valid_o ( rs1_valid_sb_iro ), - .rs2_i ( rs2_iro_sb ), - .rs2_o ( rs2_sb_iro ), - .rs2_valid_o ( rs2_valid_iro_sb ), + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_unissued_instr_i ( flush_unissued_instr_i ), + .flush_i ( flush_i ), + .unresolved_branch_i ( 1'b0 ), - .decoded_instr_i ( issue_instr_rename_sb ), - .decoded_instr_valid_i ( issue_instr_valid_rename_sb ), - .decoded_instr_ack_o ( issue_ack_sb_rename ), - .issue_instr_o ( issue_instr_sb_iro ), - .issue_instr_valid_o ( issue_instr_valid_sb_iro ), - .issue_ack_i ( issue_ack_iro_sb ), + .rd_clobber_o ( rd_clobber_sb_iro ), + .rs1_i ( rs1_iro_sb ), + .rs1_o ( rs1_sb_iro ), + .rs1_valid_o ( rs1_valid_sb_iro ), + .rs2_i ( rs2_iro_sb ), + .rs2_o ( rs2_sb_iro ), + .rs2_valid_o ( rs2_valid_iro_sb ), - .resolved_branch_i ( resolved_branch_i ), - .trans_id_i ( trans_id_i ), - .wbdata_i ( wbdata_i ), - .ex_i ( ex_ex_i ), - .* + .commit_instr_o ( commit_instr_o ), + .commit_ack_i ( commit_ack_i ), + + .decoded_instr_i ( issue_instr_rename_sb ), + .decoded_instr_valid_i ( issue_instr_valid_rename_sb ), + .decoded_instr_ack_o ( issue_ack_sb_rename ), + + .issue_instr_o ( issue_instr_sb_iro ), + .issue_instr_valid_o ( issue_instr_valid_sb_iro ), + .issue_ack_i ( issue_ack_iro_sb ), + .resolved_branch_i ( resolved_branch_i ), + .trans_id_i ( trans_id_i ), + .wbdata_i ( wbdata_i ), + .ex_i ( ex_ex_i ), + .wb_valid_i ( wb_valid_i ) ); // --------------------------------------------------------- diff --git a/src/load_unit.sv b/src/load_unit.sv index d4fdd5599..5d299d279 100644 --- a/src/load_unit.sv +++ b/src/load_unit.sv @@ -41,7 +41,9 @@ module load_unit ( input dcache_req_o_t req_port_i, output dcache_req_i_t req_port_o ); - enum logic [2:0] {IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH} NS, CS; + enum logic [2:0] { IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, + ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH + } state_d, state_q; // in order to decouple the response interface from the request interface we need a // a queue which can hold all outstanding memory requests struct packed { @@ -72,7 +74,7 @@ module load_unit ( // --------------- always_comb begin : load_control // default assignments - NS = CS; + state_d = state_q; load_data_d = load_data_q; translation_req_o = 1'b0; req_port_o.data_req = 1'b0; @@ -83,7 +85,7 @@ module load_unit ( req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operator); pop_ld_o = 1'b0; - case (CS) + case (state_q) IDLE: begin // we've got a new load request if (valid_i) begin @@ -96,18 +98,18 @@ module load_unit ( req_port_o.data_req = 1'b1; // we got no data grant so wait for the grant before sending the tag if (!req_port_i.data_gnt) begin - NS = WAIT_GNT; + state_d = WAIT_GNT; end else begin if (dtlb_hit_i) begin // we got a grant and a hit on the DTLB so we can send the tag in the next cycle - NS = SEND_TAG; + state_d = SEND_TAG; pop_ld_o = 1'b1; end else - NS = ABORT_TRANSACTION; + state_d = ABORT_TRANSACTION; end end else begin // wait for the store buffer to train and the page offset to not match anymore - NS = WAIT_PAGE_OFFSET; + state_d = WAIT_PAGE_OFFSET; end end end @@ -116,7 +118,7 @@ module load_unit ( WAIT_PAGE_OFFSET: begin // we make a new request as soon as the page offset does not match anymore if (!page_offset_matches_i) begin - NS = WAIT_GNT; + state_d = WAIT_GNT; end end @@ -127,14 +129,14 @@ module load_unit ( req_port_o.kill_req = 1'b1; req_port_o.tag_valid = 1'b1; // redo the request by going back to the wait gnt state - NS = WAIT_TRANSLATION; + state_d = WAIT_TRANSLATION; end WAIT_TRANSLATION: begin translation_req_o = 1'b1; // we've got a hit and we can continue with the request process if (dtlb_hit_i) - NS = WAIT_GNT; + state_d = WAIT_GNT; end WAIT_GNT: begin @@ -146,17 +148,17 @@ module load_unit ( if (req_port_i.data_gnt) begin // so we send the tag in the next cycle if (dtlb_hit_i) begin - NS = SEND_TAG; + state_d = SEND_TAG; pop_ld_o = 1'b1; end else // should we not have hit on the TLB abort this transaction an retry later - NS = ABORT_TRANSACTION; + state_d = ABORT_TRANSACTION; end // otherwise we keep waiting on our grant end // we know for sure that the tag we want to send is valid SEND_TAG: begin req_port_o.tag_valid = 1'b1; - NS = IDLE; + state_d = IDLE; // we can make a new request here if we got one if (valid_i) begin // start the translation process even though we do not know if the addresses match @@ -168,19 +170,19 @@ module load_unit ( req_port_o.data_req = 1'b1; // we got no data grant so wait for the grant before sending the tag if (!req_port_i.data_gnt) begin - NS = WAIT_GNT; + state_d = WAIT_GNT; end else begin // we got a grant so we can send the tag in the next cycle if (dtlb_hit_i) begin // we got a grant and a hit on the DTLB so we can send the tag in the next cycle - NS = SEND_TAG; + state_d = SEND_TAG; pop_ld_o = 1'b1; end else // we missed on the TLB -> wait for the translation - NS = ABORT_TRANSACTION; + state_d = ABORT_TRANSACTION; end end else begin // wait for the store buffer to train and the page offset to not match anymore - NS = WAIT_PAGE_OFFSET; + state_d = WAIT_PAGE_OFFSET; end end // ---------- @@ -198,7 +200,7 @@ module load_unit ( req_port_o.kill_req = 1'b1; req_port_o.tag_valid = 1'b1; // we've killed the current request so we can go back to idle - NS = IDLE; + state_d = IDLE; end endcase @@ -206,8 +208,8 @@ module load_unit ( // we got an exception if (ex_i.valid && valid_i) begin // the next state will be the idle state - NS = IDLE; - // pop load - but only if we are not getting an rvalid in here - otherwise we will over-wright an incoming transaction + state_d = IDLE; + // pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction if (!req_port_i.data_rvalid) pop_ld_o = 1'b1; end @@ -219,7 +221,7 @@ module load_unit ( // if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage if (flush_i) begin - NS = WAIT_FLUSH; + state_d = WAIT_FLUSH; end end @@ -232,7 +234,7 @@ module load_unit ( // output the queue data directly, the valid signal is set corresponding to the process above trans_id_o = load_data_q.trans_id; // we got an rvalid and are currently not flushing and not aborting the request - if (req_port_i.data_rvalid && CS != WAIT_FLUSH) begin + if (req_port_i.data_rvalid && state_q != WAIT_FLUSH) begin // we killed the request if(!req_port_o.kill_req) valid_o = 1'b1; @@ -249,7 +251,7 @@ module load_unit ( valid_o = 1'b1; trans_id_o = lsu_ctrl_i.trans_id; // if we are waiting for the translation to finish do not give a valid signal yet - end else if (CS == WAIT_TRANSLATION) begin + end else if (state_q == WAIT_TRANSLATION) begin valid_o = 1'b0; end @@ -259,53 +261,17 @@ module load_unit ( // latch physical address for the tag cycle (one cycle after applying the index) always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - CS <= IDLE; + state_q <= IDLE; load_data_q <= '0; end else begin - CS <= NS; + state_q <= state_d; load_data_q <= load_data_d; end end - // --------------- - // AMO Operation - // --------------- - always_comb begin : amo_op_select - req_port_o.amo_op = AMO_NONE; - - if (lsu_ctrl_i.valid) begin - case (lsu_ctrl_i.operator) - AMO_LRW: req_port_o.amo_op = AMO_LR; - AMO_LRD: req_port_o.amo_op = AMO_LR; - AMO_SCW: req_port_o.amo_op = AMO_SC; - AMO_SCD: req_port_o.amo_op = AMO_SC; - AMO_SWAPW: req_port_o.amo_op = AMO_SWAP; - AMO_ADDW: req_port_o.amo_op = AMO_ADD; - AMO_ANDW: req_port_o.amo_op = AMO_AND; - AMO_ORW: req_port_o.amo_op = AMO_OR; - AMO_XORW: req_port_o.amo_op = AMO_XOR; - AMO_MAXW: req_port_o.amo_op = AMO_MAX; - AMO_MAXWU: req_port_o.amo_op = AMO_MAXU; - AMO_MINW: req_port_o.amo_op = AMO_MIN; - AMO_MINWU: req_port_o.amo_op = AMO_MINU; - AMO_SWAPD: req_port_o.amo_op = AMO_SWAP; - AMO_ADDD: req_port_o.amo_op = AMO_ADD; - AMO_ANDD: req_port_o.amo_op = AMO_AND; - AMO_ORD: req_port_o.amo_op = AMO_OR; - AMO_XORD: req_port_o.amo_op = AMO_XOR; - AMO_MAXD: req_port_o.amo_op = AMO_MAX; - AMO_MAXDU: req_port_o.amo_op = AMO_MAXU; - AMO_MIND: req_port_o.amo_op = AMO_MIN; - AMO_MINDU: req_port_o.amo_op = AMO_MINU; - default: req_port_o.amo_op = AMO_NONE; - endcase - end - end - // --------------- // Sign Extend // --------------- - logic [63:0] shifted_data; // realign as needed @@ -362,7 +328,9 @@ module load_unit ( // result mux always_comb begin unique case (load_data_q.operator) - LW, LWU: result_o = {{32{sign_bit}}, shifted_data[31:0]}; + LW, LWU: begin + result_o = {{32{sign_bit}}, shifted_data[31:0]}; + end LH, LHU: result_o = {{48{sign_bit}}, shifted_data[15:0]}; LB, LBU: result_o = {{56{sign_bit}}, shifted_data[7:0]}; default: result_o = shifted_data; @@ -380,7 +348,7 @@ module load_unit ( // end always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs - if(~rst_ni) begin + if (~rst_ni) begin idx_q <= 0; signed_q <= 0; fp_sign_q <= 0; diff --git a/src/lsu.sv b/src/lsu.sv index 2250c6b41..ef2d8fbfe 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -15,12 +15,13 @@ import ariane_pkg::*; module lsu #( - parameter int unsigned ASID_WIDTH = 1 + parameter int unsigned ASID_WIDTH = 1 )( input logic clk_i, input logic rst_ni, input logic flush_i, output logic no_st_pending_o, + input logic amo_valid_commit_i, input fu_t fu_i, input fu_op operator_i, @@ -57,7 +58,9 @@ module lsu #( // interface to dcache input dcache_req_o_t [2:0] dcache_req_ports_i, output dcache_req_i_t [2:0] dcache_req_ports_o, - + // AMO interface + output amo_req_t amo_req_o, + input amo_resp_t amo_resp_i, output exception_t lsu_exception_o // to WB, signal exception status LD/ST exception ); @@ -73,6 +76,7 @@ module lsu #( logic pop_st; logic pop_ld; + // ------------------------------ // Address Generation Unit (AGU) // ------------------------------ // virtual address as calculated by the AGU in the first cycle @@ -108,28 +112,6 @@ module lsu #( exception_t ld_ex; exception_t st_ex; - // ------------ - // NB Dcache - // ------------ - logic [2:0][11:0] address_index_i; - logic [2:0][43:0] address_tag_i; - logic [2:0][63:0] data_wdata_i; - logic [2:0] data_req_i; - logic [2:0] data_we_i; - logic [2:0][1:0] data_size_i; - - logic [2:0] kill_req_i; - logic [2:0] tag_valid_i; - logic [2:0][7:0] data_be_i; - logic [2:0] data_gnt_o; - logic [2:0] data_rvalid_o; - logic [2:0][63:0] data_rdata_o; - amo_t [2:0] amo_op_i; - - // AMO operations always go through the load unit - assign amo_op_i[0] = AMO_NONE; - assign amo_op_i[2] = AMO_NONE; - // ------------------- // MMU e.g.: TLBs/PTW // ------------------- @@ -147,7 +129,7 @@ module lsu #( .lsu_paddr_o ( mmu_paddr ), .lsu_exception_o ( mmu_exception ), .lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request - // connecting PTW to D$ IF (aka mem arbiter + // connecting PTW to D$ IF .req_port_i ( dcache_req_ports_i [0] ), .req_port_o ( dcache_req_ports_o [0] ), // icache address translation requests @@ -159,9 +141,17 @@ module lsu #( // Store Unit // ------------------ store_unit i_store_unit ( + .clk_i, + .rst_ni, + .flush_i, + .no_st_pending_o, + .valid_i ( st_valid_i ), .lsu_ctrl_i ( lsu_ctrl ), .pop_st_o ( pop_st ), + .commit_i, + .commit_ready_o, + .amo_valid_commit_i, .valid_o ( st_valid ), .trans_id_o ( st_trans_id ), @@ -176,10 +166,12 @@ module lsu #( // Load Unit .page_offset_i ( page_offset ), .page_offset_matches_o ( page_offset_matches ), + // AMOs + .amo_req_o, + .amo_resp_i, // to memory arbiter .req_port_i ( dcache_req_ports_i [2] ), - .req_port_o ( dcache_req_ports_o [2] ), - .* + .req_port_o ( dcache_req_ports_o [2] ) ); // ------------------ @@ -264,49 +256,10 @@ module lsu #( // --------------- // Byte Enable // --------------- - always_comb begin : byte_enable - be_i = 8'b0; - // we can generate the byte enable from the virtual address since the last - // 12 bit are the same anyway - // and we can always generate the byte enable from the address at hand - case (operator_i) - LD, SD: // double word - be_i = 8'b1111_1111; - LW, LWU, SW: // word - case (vaddr_i[2:0]) - 3'b000: be_i = 8'b0000_1111; - 3'b001: be_i = 8'b0001_1110; - 3'b010: be_i = 8'b0011_1100; - 3'b011: be_i = 8'b0111_1000; - 3'b100: be_i = 8'b1111_0000; - default:; - endcase - LH, LHU, SH: // half word - case (vaddr_i[2:0]) - 3'b000: be_i = 8'b0000_0011; - 3'b001: be_i = 8'b0000_0110; - 3'b010: be_i = 8'b0000_1100; - 3'b011: be_i = 8'b0001_1000; - 3'b100: be_i = 8'b0011_0000; - 3'b101: be_i = 8'b0110_0000; - 3'b110: be_i = 8'b1100_0000; - default:; - endcase - LB, LBU, SB: // byte - case (vaddr_i[2:0]) - 3'b000: be_i = 8'b0000_0001; - 3'b001: be_i = 8'b0000_0010; - 3'b010: be_i = 8'b0000_0100; - 3'b011: be_i = 8'b0000_1000; - 3'b100: be_i = 8'b0001_0000; - 3'b101: be_i = 8'b0010_0000; - 3'b110: be_i = 8'b0100_0000; - 3'b111: be_i = 8'b1000_0000; - endcase - default: - be_i = 8'b0; - endcase - end + // we can generate the byte enable from the virtual address since the last + // 12 bit are the same anyway + // and we can always generate the byte enable from the address at hand + assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(operator_i)); // ------------------------ // Misaligned Exception @@ -324,23 +277,33 @@ module lsu #( data_misaligned = 1'b0; - if(lsu_ctrl.valid) begin + if (lsu_ctrl.valid) begin case (lsu_ctrl.operator) // double word - LD, SD: begin - if (lsu_ctrl.vaddr[2:0] != 3'b000) + LD, SD, + AMO_LRD, AMO_SCD, + AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, + AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, + AMO_MINDU: begin + if (lsu_ctrl.vaddr[2:0] != 3'b000) begin data_misaligned = 1'b1; + end end // word - LW, LWU, SW: begin - if (lsu_ctrl.vaddr[1:0] != 2'b00) + LW, LWU, SW, + AMO_LRW, AMO_SCW, + AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, + AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, + AMO_MINWU: begin + if (lsu_ctrl.vaddr[1:0] != 2'b00) begin data_misaligned = 1'b1; + end end - // half word LH, LHU, SH: begin - if (lsu_ctrl.vaddr[0] != 1'b0) + if (lsu_ctrl.vaddr[0] != 1'b0) begin data_misaligned = 1'b1; + end end // byte -> is always aligned default:; @@ -403,15 +366,6 @@ module lsu #( .ready_o ( lsu_ready_o ), .* ); - // ------------ - // Assertions - // ------------ - - `ifndef SYNTHESIS - `ifndef VERILATOR - // TODO - `endif - `endif endmodule // ------------------ @@ -504,7 +458,7 @@ module lsu_bypass ( // registers always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin + if (~rst_ni) begin mem_q <= '{default: 0}; status_cnt_q <= '0; write_pointer_q <= '0; diff --git a/src/ptw.sv b/src/ptw.sv index d552f4092..89a04e2ca 100644 --- a/src/ptw.sv +++ b/src/ptw.sv @@ -31,7 +31,7 @@ module ptw #( input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores input logic lsu_is_store_i, // this translation was triggered by a store - // PTW memory interface + // PTW memory interface input dcache_req_o_t req_port_i, output dcache_req_i_t req_port_o, @@ -61,8 +61,6 @@ module ptw #( ); - assign req_port_o.amo_op = AMO_NONE; - // input registers logic data_rvalid_q; logic [63:0] data_rdata_q; @@ -165,10 +163,10 @@ module ptw #( ptw_pptr_n = ptw_pptr_q; state_d = state_q; global_mapping_n = global_mapping_q; - // input registers + // input registers tlb_update_asid_n = tlb_update_asid_q; vaddr_n = vaddr_q; - + itlb_miss_o = 1'b0; dtlb_miss_o = 1'b0; diff --git a/src/re_name.sv b/src/re_name.sv index f61dfa8ef..93def90f9 100644 --- a/src/re_name.sv +++ b/src/re_name.sv @@ -24,6 +24,7 @@ module re_name ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, // Flush renaming state + input logic flush_unissied_instr_i, // from/to scoreboard input scoreboard_entry_t issue_instr_i, input logic issue_instr_valid_i, @@ -52,7 +53,7 @@ module re_name ( re_name_table_gpr_n = re_name_table_gpr_q; issue_instr_o = issue_instr_i; - if (issue_ack_i) begin + if (issue_ack_i && !flush_unissied_instr_i) begin // if we acknowledge the instruction tic the corresponding destination register re_name_table_gpr_n[issue_instr_i.rd] = re_name_table_gpr_q[issue_instr_i.rd] ^ 1'b1; end diff --git a/src/store_buffer.sv b/src/store_buffer.sv index 26f4005cb..6b07536f8 100644 --- a/src/store_buffer.sv +++ b/src/store_buffer.sv @@ -40,7 +40,7 @@ module store_buffer ( // D$ interface input dcache_req_o_t req_port_i, - output dcache_req_i_t req_port_o + output dcache_req_i_t req_port_o ); // depth of store-buffers localparam int unsigned DEPTH_SPEC = 4; @@ -71,10 +71,6 @@ module store_buffer ( logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q; logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q; - - - assign req_port_o.amo_op = AMO_NONE; - // ---------------------------------------- // Speculative Queue - Core Interface // ---------------------------------------- @@ -257,6 +253,10 @@ module store_buffer ( @(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i) else $error ("[Speculative Queue] You are trying to push new data although the buffer is not ready"); + speculative_buffer_underflow: assert property ( + @(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i) + else $error ("[Speculative Queue] You are committing although there are no stores to commit"); + commit_buffer_overflow: assert property ( @(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_SPEC) |-> !commit_i) else $error("[Commit Queue] You are trying to commit a store although the buffer is full"); diff --git a/src/store_unit.sv b/src/store_unit.sv index a7a3ea590..7c2c8a363 100644 --- a/src/store_unit.sv +++ b/src/store_unit.sv @@ -10,7 +10,7 @@ // // Author: Florian Zaruba, ETH Zurich // Date: 22.05.2017 -// Description: Store Unit, takes care of all store requests +// Description: Store Unit, takes care of all store requests and atomic memory operations (AMOs) import ariane_pkg::*; @@ -25,7 +25,7 @@ module store_unit ( output logic pop_st_o, input logic commit_i, output logic commit_ready_o, - + input logic amo_valid_commit_i, // store unit output port output logic valid_o, output logic [TRANS_ID_BITS-1:0] trans_id_o, @@ -41,22 +41,33 @@ module store_unit ( input logic [11:0] page_offset_i, output logic page_offset_matches_o, // D$ interface + output amo_req_t amo_req_o, + input amo_resp_t amo_resp_i, input dcache_req_o_t req_port_i, - output dcache_req_i_t req_port_o + output dcache_req_i_t req_port_o ); + // it doesn't matter what we are writing back as stores don't return anything assign result_o = 64'b0; - enum logic [1:0] {IDLE, VALID_STORE, WAIT_TRANSLATION, WAIT_STORE_READY} NS, CS; + enum logic [1:0] { + IDLE, + VALID_STORE, + WAIT_TRANSLATION, + WAIT_STORE_READY + } state_d, state_q; // store buffer control signals - logic st_ready; - logic st_valid; - logic st_valid_without_flush; - + logic st_ready; + logic st_valid; + logic st_valid_without_flush; + logic instr_is_amo; + assign instr_is_amo = is_amo(lsu_ctrl_i.operator); // keep the data and the byte enable for the second cycle (after address translation) - logic [63:0] st_data_n, st_data_q; - logic [7:0] st_be_n, st_be_q; - logic [1:0] st_data_size_n, st_data_size_q; + logic [63:0] st_data_n, st_data_q; + logic [7:0] st_be_n, st_be_q; + logic [1:0] st_data_size_n, st_data_size_q; + amo_t amo_op_d, amo_op_q; + logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; // output assignments @@ -71,25 +82,24 @@ module store_unit ( pop_st_o = 1'b0; ex_o = ex_i; trans_id_n = lsu_ctrl_i.trans_id; - NS = CS; + state_d = state_q; - case (CS) + case (state_q) // we got a valid store IDLE: begin if (valid_i) begin - - NS = VALID_STORE; + state_d = VALID_STORE; translation_req_o = 1'b1; pop_st_o = 1'b1; // check if translation was valid and we have space in the store buffer // otherwise simply stall if (!dtlb_hit_i) begin - NS = WAIT_TRANSLATION; + state_d = WAIT_TRANSLATION; pop_st_o = 1'b0; end if (!st_ready) begin - NS = WAIT_STORE_READY; + state_d = WAIT_STORE_READY; pop_st_o = 1'b0; end end @@ -103,25 +113,25 @@ module store_unit ( st_valid_without_flush = 1'b1; - // we have another request - if (valid_i) begin + // we have another request and its not an AMO (the AMO buffer only has depth 1) + if (valid_i && !instr_is_amo) begin translation_req_o = 1'b1; - NS = VALID_STORE; - pop_st_o = 1'b1; + state_d = VALID_STORE; + pop_st_o = 1'b1; if (!dtlb_hit_i) begin - NS = WAIT_TRANSLATION; + state_d = WAIT_TRANSLATION; pop_st_o = 1'b0; end if (!st_ready) begin + state_d = WAIT_STORE_READY; pop_st_o = 1'b0; - NS = WAIT_STORE_READY; end // if we do not have another request go back to idle end else begin - NS = IDLE; + state_d = IDLE; end end @@ -131,7 +141,7 @@ module store_unit ( translation_req_o = 1'b1; if (st_ready && dtlb_hit_i) begin - NS = IDLE; + state_d = IDLE; end end @@ -142,7 +152,7 @@ module store_unit ( translation_req_o = 1'b1; if (dtlb_hit_i) begin - NS = IDLE; + state_d = IDLE; end end endcase @@ -151,16 +161,16 @@ module store_unit ( // Access Exception // ----------------- // we got an address translation exception (access rights, misaligned or page fault) - if (ex_i.valid && (CS != IDLE)) begin + if (ex_i.valid && (state_q != IDLE)) begin // the only difference is that we do not want to store this request pop_st_o = 1'b1; st_valid = 1'b0; - NS = IDLE; + state_d = IDLE; valid_o = 1'b1; end if (flush_i) - NS = IDLE; + state_d = IDLE; end // ----------- @@ -168,57 +178,99 @@ module store_unit ( // ----------- // re-align the write data to comply with the address offset always_comb begin - st_be_n = lsu_ctrl_i.be; - st_data_n = lsu_ctrl_i.data; + st_be_n = lsu_ctrl_i.be; + // don't shift the data if we are going to perform an AMO as we still need to operate on this data + st_data_n = instr_is_amo ? lsu_ctrl_i.data + : data_align(lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.data); st_data_size_n = extract_transfer_size(lsu_ctrl_i.operator); - - case (lsu_ctrl_i.vaddr[2:0]) - 3'b000: st_data_n = lsu_ctrl_i.data; - 3'b001: st_data_n = {lsu_ctrl_i.data[55:0], lsu_ctrl_i.data[63:56]}; - 3'b010: st_data_n = {lsu_ctrl_i.data[47:0], lsu_ctrl_i.data[63:48]}; - 3'b011: st_data_n = {lsu_ctrl_i.data[39:0], lsu_ctrl_i.data[63:40]}; - 3'b100: st_data_n = {lsu_ctrl_i.data[31:0], lsu_ctrl_i.data[63:32]}; - 3'b101: st_data_n = {lsu_ctrl_i.data[23:0], lsu_ctrl_i.data[63:24]}; - 3'b110: st_data_n = {lsu_ctrl_i.data[15:0], lsu_ctrl_i.data[63:16]}; - 3'b111: st_data_n = {lsu_ctrl_i.data[7:0], lsu_ctrl_i.data[63:8]}; + // save AMO op for next cycle + case (lsu_ctrl_i.operator) + AMO_LRW, AMO_LRD: amo_op_d = AMO_LR; + AMO_SCW, AMO_SCD: amo_op_d = AMO_SC; + AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP; + AMO_ADDW, AMO_ADDD: amo_op_d = AMO_ADD; + AMO_ANDW, AMO_ANDD: amo_op_d = AMO_AND; + AMO_ORW, AMO_ORD: amo_op_d = AMO_OR; + AMO_XORW, AMO_XORD: amo_op_d = AMO_XOR; + AMO_MAXW, AMO_MAXD: amo_op_d = AMO_MAX; + AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU; + AMO_MINW, AMO_MIND: amo_op_d = AMO_MIN; + AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU; + default: amo_op_d = AMO_NONE; endcase end + + logic store_buffer_valid, amo_buffer_valid; + logic store_buffer_ready, amo_buffer_ready; + + // multiplex between store unit and amo buffer + assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE); + assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE); + + assign st_ready = store_buffer_ready & amo_buffer_ready; + // --------------- // Store Queue // --------------- store_buffer store_buffer_i ( - // store queue write port - .valid_i ( st_valid ), - .valid_without_flush_i ( st_valid_without_flush ), // the flush signal can be critical and we need this valid - // signal to check whether the page_offset matches or not, functionaly it doesn't - // make a difference whether we use the correct valid signal or not as we are flushing the whole pipeline anyway + .clk_i, + .rst_ni, + .flush_i, + .no_st_pending_o, + .page_offset_i, + .page_offset_matches_o, + .commit_i, + .commit_ready_o, + .ready_o ( store_buffer_ready ), + .valid_i ( store_buffer_valid ), + // the flush signal can be critical and we need this valid + // signal to check whether the page_offset matches or not, + // functionaly it doesn't make a difference whether we use + // the correct valid signal or not as we are flushing + // the whole pipeline anyway + .valid_without_flush_i ( st_valid_without_flush ), + .paddr_i, .data_i ( st_data_q ), .be_i ( st_be_q ), .data_size_i ( st_data_size_q ), - // store buffer out - .ready_o ( st_ready ), - .req_port_i ( req_port_i ), - .req_port_o ( req_port_o ), - - .* + .req_port_o ( req_port_o ) ); + + amo_buffer i_amo_buffer ( + .clk_i, + .rst_ni, + .flush_i, + .valid_i ( amo_buffer_valid ), + .ready_o ( amo_buffer_ready ), + .paddr_i ( paddr_i ), + .amo_op_i ( amo_op_q ), + .data_i ( st_data_q ), + .data_size_i ( st_data_size_q ), + .amo_req_o ( amo_req_o ), + .amo_resp_i ( amo_resp_i ), + .amo_valid_commit_i ( amo_valid_commit_i ), + .no_st_pending_i ( no_st_pending_o ) + ); + // --------------- // Registers // --------------- always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin - CS <= IDLE; + if (~rst_ni) begin + state_q <= IDLE; st_be_q <= '0; st_data_q <= '0; st_data_size_q <= '0; trans_id_q <= '0; + amo_op_q <= AMO_NONE; end else begin - CS <= NS; + state_q <= state_d; st_be_q <= st_be_n; st_data_q <= st_data_n; trans_id_q <= trans_id_n; st_data_size_q <= st_data_size_n; + amo_op_q <= amo_op_d; end end diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 1a31d900c..144f275dd 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -203,6 +203,7 @@ class instruction_trace_item; // loads and stores INSTR_LOAD: s = this.printLoadInstr(); INSTR_STORE: s = this.printStoreInstr(); + INSTR_AMO: s = this.printAMOInstr(); default: s = this.printMnemonic("INVALID"); endcase @@ -226,11 +227,11 @@ class instruction_trace_item; s = $sformatf("%s %-4s:%16x", s, regAddrToStr(result_regs[i]), this.result); end - foreach (read_regs[i]) begin if (read_regs[i] != 0) s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), reg_file[read_regs[i]]); end + casex (instr) // check of the instrction was a load or store INSTR_STORE: begin @@ -363,29 +364,27 @@ class instruction_trace_item; function string printLoadInstr(); string mnemonic; + case (instr[14:12]) + 3'b000: mnemonic = "lb"; + 3'b001: mnemonic = "lh"; + 3'b010: mnemonic = "lw"; + 3'b100: mnemonic = "lbu"; + 3'b101: mnemonic = "lhu"; + 3'b110: mnemonic = "lwu"; + 3'b011: mnemonic = "ld"; + default: return printMnemonic("INVALID"); + endcase - case (instr[14:12]) - 3'b000: mnemonic = "lb"; - 3'b001: mnemonic = "lh"; - 3'b010: mnemonic = "lw"; - 3'b100: mnemonic = "lbu"; - 3'b101: mnemonic = "lhu"; - 3'b110: mnemonic = "lwu"; - 3'b011: mnemonic = "ld"; - default: return printMnemonic("INVALID"); - endcase + result_regs.push_back(sbe.rd); + read_regs.push_back(sbe.rs1); + // save the immediate for calculating the virtual address + this.imm = sbe.result; - result_regs.push_back(sbe.rd); - read_regs.push_back(sbe.rs1); - // save the immediate for calculating the virtual address - this.imm = sbe.result; - - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction function string printStoreInstr(); - string mnemonic; - + string mnemonic; case (instr[14:12]) 3'b000: mnemonic = "sb"; 3'b001: mnemonic = "sh"; @@ -402,6 +401,51 @@ class instruction_trace_item; return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction // printSInstr + function string printAMOInstr(); + string mnemonic; + // words + if (instr[14:12] == 3'h2) begin + case (instr[31:27]) + 5'h0: mnemonic = "amoadd.w"; + 5'h1: mnemonic = "amoswap.w"; + 5'h2: mnemonic = "lr.w"; + 5'h3: mnemonic = "sc.w"; + 5'h4: mnemonic = "amoxor.w"; + 5'h8: mnemonic = "amoor.w"; + 5'hC: mnemonic = "amoand.w"; + 5'h10: mnemonic = "amomin.w"; + 5'h14: mnemonic = "amomax.w"; + 5'h18: mnemonic = "amominu.w"; + 5'h1C: mnemonic = "amomax.w"; + default: return printMnemonic("INVALID"); + endcase + // doubles + end else if (instr[14:12] == 3'h3) begin + case (instr[31:27]) + 5'h0: mnemonic = "amoadd.d"; + 5'h1: mnemonic = "amoswap.d"; + 5'h2: mnemonic = "lr.d"; + 5'h3: mnemonic = "sc.d"; + 5'h4: mnemonic = "amoxor.d"; + 5'h8: mnemonic = "amoor.d"; + 5'hC: mnemonic = "amoand.d"; + 5'h10: mnemonic = "amomin.d"; + 5'h14: mnemonic = "amomax.d"; + 5'h18: mnemonic = "amominu.d"; + 5'h1C: mnemonic = "amomax.d"; + default: return printMnemonic("INVALID"); + endcase + end else return printMnemonic("INVALID"); + + result_regs.push_back(sbe.rd); + read_regs.push_back(sbe.rs2); + read_regs.push_back(sbe.rs1); + // save the immediate for calculating the virtual address + this.imm = 0; + + return $sformatf("%-16s %s, %s,(%s)", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs2), regAddrToStr(sbe.rs1)); + endfunction + function string printMulInstr(logic is_op32); string s = ""; diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh index 0c66770be..2b96043c4 100644 --- a/src/util/instruction_tracer.svh +++ b/src/util/instruction_tracer.svh @@ -29,7 +29,7 @@ class instruction_tracer; logic [63:0] reg_file [32]; // 64 bit clock tick count longint unsigned clk_ticks; - int f; + int f, commit_log; // address mapping // contains mappings of the form vaddr <-> paddr // should it print the instructions to the console @@ -47,11 +47,13 @@ class instruction_tracer; endfunction : new function void create_file(logic [5:0] cluster_id, logic [3:0] core_id); - string fn; + string fn, fn_commit_log; $sformat(fn, "trace_core_%h_%h.log", cluster_id, core_id); + $sformat(fn_commit_log, "trace_core_%h_%h_commit.log", cluster_id, core_id); $display("[TRACER] Output filename is: %s", fn); this.f = $fopen(fn,"w"); + if (ENABLE_SPIKE_COMMIT_LOG) this.commit_log = $fopen(fn_commit_log, "w"); endfunction : create_file task trace(); @@ -125,8 +127,9 @@ class instruction_tracer; // as the most recent version of this register will be there. if (tracer_if.pck.we[i]) begin printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); - end else + end else begin printInstr(issue_sbe, issue_commit_instruction, reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end end end // -------------- @@ -140,10 +143,11 @@ class instruction_tracer; // Commit Registers // ---------------------- // update shadow reg file here - for (int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) begin if (tracer_if.pck.we[i] && tracer_if.pck.waddr[i] != 5'b0) begin reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; end + end // -------------- // Flush Signals @@ -181,6 +185,9 @@ class instruction_tracer; instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl, debug_mode, bp); // print instruction to console string print_instr = iti.printInstr(); + if (ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin + $fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result)); + end uvm_report_info( "Tracer", print_instr, UVM_HIGH); $fwrite(this.f, {print_instr, "\n"}); endfunction @@ -193,8 +200,8 @@ class instruction_tracer; endfunction function void close(); - if (f) - $fclose(this.f); + if (f) $fclose(this.f); + if (ENABLE_SPIKE_COMMIT_LOG && this.commit_log) $fclose(this.commit_log); endfunction endclass : instruction_tracer diff --git a/src/util/instruction_tracer_defines.svh b/src/util/instruction_tracer_defines.svh index e79d34e85..4b9756e45 100644 --- a/src/util/instruction_tracer_defines.svh +++ b/src/util/instruction_tracer_defines.svh @@ -101,6 +101,9 @@ parameter INSTR_DIVU = { 7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp } parameter INSTR_REM = { 7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp }; parameter INSTR_REMU = { 7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp }; +// A +parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo }; + // Load/Stores parameter INSTR_LOAD = {25'b?, riscv::OpcodeLoad }; parameter INSTR_STORE = {25'b?, riscv::OpcodeStore }; diff --git a/src/util/instruction_tracer_if.sv b/src/util/instruction_tracer_if.sv index 01f6ff07a..86e8007e2 100644 --- a/src/util/instruction_tracer_if.sv +++ b/src/util/instruction_tracer_if.sv @@ -36,7 +36,6 @@ interface instruction_tracer_if ( // commit stage scoreboard_entry_t [1:0] commit_instr; // commit instruction logic [1:0] commit_ack; - // address translation // stores logic st_valid; diff --git a/tb/ariane_testharness.sv b/tb/ariane_testharness.sv index d8fd5698a..3b451ca10 100644 --- a/tb/ariane_testharness.sv +++ b/tb/ariane_testharness.sv @@ -14,17 +14,17 @@ // Instantiates an AXI-Bus and memories module ariane_testharness #( - parameter logic [63:0] CACHE_START_ADDR = 64'h8000_0000, // address on which to decide whether the request is cache-able or not - parameter int unsigned AXI_ID_WIDTH = 10, - parameter int unsigned AXI_USER_WIDTH = 1, - parameter int unsigned AXI_ADDRESS_WIDTH = 64, - parameter int unsigned AXI_DATA_WIDTH = 64, - parameter int unsigned NUM_WORDS = 2**24 // memory size - )( - input logic clk_i, - input logic rst_ni, - output logic [31:0] exit_o - ); + parameter logic [63:0] CACHE_START_ADDR = 64'h8000_0000, // address on which to decide whether the request is cache-able or not + parameter int unsigned AXI_ID_WIDTH = 10, + parameter int unsigned AXI_USER_WIDTH = 1, + parameter int unsigned AXI_ADDRESS_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned NUM_WORDS = 2**24 // memory size +)( + input logic clk_i, + input logic rst_ni, + output logic [31:0] exit_o +); // disable test-enable logic test_en; @@ -64,6 +64,9 @@ module ariane_testharness #( logic dmi_resp_ready; logic dmi_resp_valid; + logic rtc_i; + assign rtc_i = 1'b0; + assign test_en = 1'b0; assign ndmreset_n = ~ndmreset ; @@ -153,7 +156,7 @@ module ariane_testharness #( .debug_req_valid ( dmi_req_valid ), .debug_req_ready ( debug_req_ready ), .debug_req_bits_addr ( dmi_req.addr ), - .debug_req_bits_op ( debug_req_bits_op ), + .debug_req_bits_op ( debug_req_bits_op ), .debug_req_bits_data ( dmi_req.data ), .debug_resp_valid ( dmi_resp_valid ), .debug_resp_ready ( dmi_resp_ready ), @@ -298,7 +301,7 @@ module ariane_testharness #( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .slave ( master[1] ), - .rtc_i ( 1'b0 ), + .rtc_i ( rtc_i ), .timer_irq_o ( timer_irq ), .ipi_o ( ipi ) );