Finished Cache Integration

This commit is contained in:
felsabbagh3 2019-10-22 06:02:08 -04:00
parent b7af8c3f34
commit 9d8273afe4
21 changed files with 2317 additions and 16 deletions

View file

@ -3,7 +3,7 @@ all: RUNFILE
# /rf2_256x128_wm1/
BaseMEM=../models/memory/cln28hpm
INCLUDE=-I. -Ishared_memory -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm1/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate
INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate
FILE=Vortex.v

View file

@ -10,6 +10,11 @@
// `define ONLY
`define NUMBER_BANKS 8
`define NUM_WORDS_PER_BLOCK 4
`define NUM_BARRIERS 4
`define R_INST 7'd51

View file

@ -3,7 +3,9 @@
module VX_dmem_controller (
input wire clk,
input wire reset,
// MEM-RAM
VX_dram_req_rsp_inter VX_dram_req_rsp,
// MEM-Processor
VX_dcache_request_inter VX_dcache_req,
VX_dcache_response_inter VX_dcache_rsp
@ -14,7 +16,10 @@ module VX_dmem_controller (
wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{to_shm}};
wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{~to_shm}};
// Cache don't understand
wire initial_request = (|cache_driver_in_valid);
wire read_or_write = (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE);
@ -25,8 +30,10 @@ module VX_dmem_controller (
wire[`NT_M1:0][31:0] cache_driver_out_data;
wire[`NT_M1:0][31:0] sm_driver_out_data;
wire[`NT_M1:0] cache_driver_out_valid; // Not used for now
wire delay;
wire sm_delay;
wire cache_delay;
VX_shared_memory #(.NB(7), .BITS_PER_BANK(3)) shared_memory (
@ -37,15 +44,34 @@ module VX_dmem_controller (
.mem_read (cache_driver_in_mem_read),
.mem_write (cache_driver_in_mem_write),
.out_valid (cache_driver_out_valid),
.out_data (cache_driver_out_data),
.stall (delay)
.out_data (sm_driver_out_data),
.stall (sm_delay)
);
VX_d_cache dcache(
.clk (clk),
.rst (reset),
.i_p_valid (cache_driver_in_valid),
.i_p_addr (cache_driver_in_address),
.i_p_initial_request(),
.i_p_writedata (cache_driver_in_data),
.i_p_read_or_write (read_or_write),
.o_p_readdata (cache_driver_out_data),
.o_p_readdata_valid (),
.o_p_waitrequest (cache_delay),
.o_m_addr (VX_dram_req_rsp.o_m_addr),
.o_m_valid (VX_dram_req_rsp.o_m_valid),
.o_m_writedata (VX_dram_req_rsp.o_m_writedata),
.o_m_read_or_write (VX_dram_req_rsp.o_m_read_or_write),
.i_m_readdata (VX_dram_req_rsp.i_m_readdata),
.i_m_ready (VX_dram_req_rsp.i_m_ready)
);
assign VX_dcache_rsp.in_cache_driver_out_data = cache_driver_out_data;
assign VX_dcache_rsp.delay = delay;
assign VX_dcache_rsp.in_cache_driver_out_data = to_shm ? sm_driver_out_data : cache_driver_out_data;
assign VX_dcache_rsp.delay = sm_delay || cache_delay;
endmodule

View file

@ -1,17 +1,29 @@
`include "VX_define.v"
module Vortex(
input wire clk,
input wire reset,
input wire[31:0] icache_response_instruction,
output wire[31:0] icache_request_pc_address,
// Req
output reg [31:0] o_m_addr,
output reg o_m_valid,
output reg [31:0] o_m_writedata[`NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0],
output reg o_m_read_or_write,
// Rsp
input wire [31:0] i_m_readdata[`NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0],
input wire i_m_ready,
// Remove Start
input wire[31:0] in_cache_driver_out_data[`NT_M1:0],
output wire[31:0] out_cache_driver_in_address[`NT_M1:0],
output wire[2:0] out_cache_driver_in_mem_read,
output wire[2:0] out_cache_driver_in_mem_write,
output wire out_cache_driver_in_valid[`NT_M1:0],
output wire[31:0] out_cache_driver_in_data[`NT_M1:0],
// Remove end
output wire out_ebreak
);
@ -35,6 +47,27 @@ assign out_cache_driver_in_mem_write = `NO_MEM_WRITE;
VX_dcache_response_inter VX_dcache_rsp();
VX_dcache_request_inter VX_dcache_req();
VX_dram_req_rsp_inter VX_dram_req_rsp();
assign o_m_addr = VX_dram_req_rsp.o_m_addr;
assign o_m_valid = VX_dram_req_rsp.o_m_valid;
assign o_m_read_or_write = VX_dram_req_rsp.o_m_read_or_write;
assign VX_dram_req_rsp.i_m_ready = i_m_ready;
genvar curr_bank;
genvar curr_word;
for (curr_bank = 0; curr_bank < `NUMBER_BANKS; curr_bank = curr_bank + 1) begin
for (curr_word = 0; curr_word < `NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
assign o_m_writedata[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word];
assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word];
end
end
// Icache Interface
VX_icache_response_inter icache_response_fe();
@ -108,9 +141,11 @@ VX_back_end vx_back_end(
VX_dmem_controller VX_dmem_controller(
.clk (clk),
.VX_dcache_req(VX_dcache_req),
.VX_dcache_rsp(VX_dcache_rsp)
.clk (clk),
.reset (reset),
.VX_dram_req_rsp(VX_dram_req_rsp),
.VX_dcache_req (VX_dcache_req),
.VX_dcache_rsp (VX_dcache_rsp)
);
// VX_csr_handler vx_csr_handler(
// .clk (clk),

12
rtl/cache/Makefile vendored Normal file
View file

@ -0,0 +1,12 @@
all: RUNFILE
VERILATOR:
verilator --compiler gcc --Wno-UNOPTFLAT -Wall --trace -cc VX_d_cache_encapsulate.v -Iinterfaces/ --exe d_cache_test_bench.cpp -CFLAGS -std=c++11
RUNFILE: VERILATOR
(cd obj_dir && make -j -f VVX_d_cache_encapsulate.mk)
clean:
rm ./obj_dir/*

46
rtl/cache/Notes vendored Normal file
View file

@ -0,0 +1,46 @@
Notes
8 kB L1 Data Cache | 16 kB L1 I cache (maybe)
[tag index offset_remaining_block bank wordOffset], use a blocksize of 128 bytes between memory and cache. So each bank gets 16 bytes.
total offset is b its
4 bits new offset, 2 bits block, 2 bits word offset
xxxxxxxIIIIIIIIoobbbyy
9876543210
bbbyyyyy
o = index into block offset
b = bank
y = word offset
I = index into cach
6 bits indexes (64 indeces) No ways || 16 indexes with 4 ways
Rest of the bits are tag bits
blocks / banks = 16 bytes, 8 banks. 128 bytes. 256 indexes (height). width is 16 bytes. 4 words per block (per bank). 17 bit tag
gtkwave ___.vcd
// Splitting it up
// word byte
wire[127:0][3:0] data_from_ram;
// word byte bank
wire[15:0][3:0] bank_data_n[3:0]
integer i;
for (i = 0; i < something; i+=8)
{
bank_data_n[0][i/8] = data_from_ram[i+0]
bank_data_n[1][i/8] = data_from_ram[i+1]
bank_data_n[2][i/8] = data_from_ram[i+2]
bank_data_n[3][i/8] = data_from_ram[i+3]
bank_data_n[4][i/8] = data_from_ram[i+4]
bank_data_n[5][i/8] = data_from_ram[i+5]
bank_data_n[6][i/8] = data_from_ram[i+6]
bank_data_n[7][i/8] = data_from_ram[i+7]
}
With Cache. If miss. Go to memory, grab all data, replace that data in the cache. Generate a new request, feed that into the cache (this one will hit), return that

143
rtl/cache/VX_Cache_Bank.v vendored Normal file
View file

@ -0,0 +1,143 @@
// To Do: Change way_id_out to an internal register which holds when in between access and finished.
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly
`define NUM_WORDS_PER_BLOCK 4
`include "VX_define.v"
`include "VX_cache_data.v"
module VX_Cache_Bank
#(
// parameter NUMBER_INDEXES = 256
parameter NUMBER_INDEXES = 256
)
(
clk,
state,
read_or_write, // Read = 0 | Write = 1
valid_in,
//write_from_mem,
actual_index,
o_tag,
block_offset,
writedata,
fetched_writedata,
readdata,
hit,
//miss,
eviction_wb, // Need to evict
eviction_addr, // What's the eviction tag
data_evicted
);
parameter cache_entry = 14;
parameter ways_per_set = 4;
parameter Number_Blocks = 32;
localparam CACHE_IDLE = 0; // Idle
localparam SORT_BY_BANK = 1; // Determines the bank each thread will access
localparam INITIAL_ACCESS = 2; // Accesses the bank and checks if it is a hit or miss
localparam INITIAL_PROCESSING = 3; // Check to see if there were misses
localparam CONTINUED_PROCESSING = 4; // Keep checking status of banks that need to be written back or fetched
localparam DIRTY_EVICT_GRAB_BLOCK = 5; // Grab the full block of dirty data
localparam DIRTY_EVICT_WB = 6; // Write back this block into memory
localparam FETCH_FROM_MEM = 7; // Send a request to mem looking for read data
localparam FETCH2 = 8; // Stall until memory gets back with the data
localparam UPDATE_CACHE = 9; // Update the cache with the data read from mem
localparam RE_ACCESS = 10; // Access the cache after the block has been fetched from memory
localparam RE_ACCESS_PROCESSING = 11; // Access the cache after the block has been fetched from memory
// Inputs
input wire clk;
input wire [3:0] state;
//input wire write_from_mem;
// Reading Data
input wire[$clog2(NUMBER_INDEXES)-1:0] actual_index;
input wire[16:0] o_tag; // When write_from_mem = 1, o_tag is the new tag
input wire[1:0] block_offset;
input wire[31:0] writedata;
input wire valid_in;
input wire read_or_write; // Specifies if it is a read or write operation
input wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] fetched_writedata;
// Outputs
// Normal shit
output wire[31:0] readdata;
output wire hit;
//output wire miss;
// Eviction Data (Notice)
output wire eviction_wb; // Need to evict
output wire[31:0] eviction_addr; // What's the eviction tag
// Eviction Data (Extraction)
output wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_evicted;
wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_use;
wire[16:0] tag_use;
wire[16:0] eviction_tag;
wire valid_use;
wire dirty_use;
wire access;
wire write_from_mem;
wire miss; // -10/21
assign miss = (tag_use != o_tag) && valid_use && valid_in;
assign data_evicted = data_use;
assign eviction_wb = miss && (dirty_use != 1'b0);
assign eviction_tag = tag_use;
assign access = (state == INITIAL_ACCESS || state == RE_ACCESS) && valid_in;
assign write_from_mem = (state == UPDATE_CACHE) && valid_in;
assign readdata = (access) ? data_use[block_offset] : 32'b0; // Fix with actual data
assign hit = (access && (tag_use == o_tag) && valid_use);
//assign eviction_addr = {eviction_tag, actual_index, block_offset, 5'b0}; // Fix with actual data
assign eviction_addr = {eviction_tag, actual_index, 7'b0}; // Fix with actual data
wire[`NUM_WORDS_PER_BLOCK-1:0] we;
wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_write;
genvar g;
for (g = 0; g < `NUM_WORDS_PER_BLOCK; g = g + 1) begin
wire correct_block = (block_offset == g);
assign we[g] = (read_or_write && ((access && correct_block) || (write_from_mem && !correct_block)) ) ? 1'b1 : 1'b0;
//assign we[g] = (!(write_from_mem && correct_block) && ((write_from_mem || correct_block) && read_or_write == 1'b1)) ? 1 : 0; // added the "not"
assign data_write[g] = write_from_mem ? fetched_writedata[g] : writedata;
end
VX_cache_data data_structures(
.clk (clk),
// Inputs
.addr (actual_index),
.we (we),
.evict (write_from_mem),
.data_write(data_write),
.tag_write (o_tag),
// Outputs
.tag_use (tag_use),
.data_use (data_use),
.valid_use (valid_use),
.dirty_use (dirty_use)
);
endmodule

196
rtl/cache/VX_Cache_Block_DM.v vendored Normal file
View file

@ -0,0 +1,196 @@
// To Do: Change way_id_out to an internal register which holds when in between access and finished.
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly
`include "VX_define.v"
module VX_Cache_Block_DM(clk,
rst,
// These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes
access, // First
find_evict,
write_from_mem,
idle,
// entry,
o_tag,
block_offset,
writedata,
//byte_en,
write,
fetched_writedata,
//word_en,
//way_id_in,
//way_id_out,
readdata,
//wb_addr,
hit,
eviction_wb,
eviction_tag,
evicted_data,
//modify,
miss
//valid_data
//read_miss
);
parameter cache_entry = 14;
parameter ways_per_set = 4;
parameter Number_Blocks = 32;
input wire clk, rst;
input wire access;
input wire find_evict;
input wire write_from_mem;
input wire idle;
//input wire [cache_entry-1:0] entry;
input wire [21:0] o_tag;
input wire [4:0] block_offset;
input wire [31:0] writedata;
//input wire [3:0] byte_en;
input wire write; // 0 == False
input wire [31:0][31:0] fetched_writedata;
//input wire [3:0] word_en;
//input wire read_miss;
//input wire [1:0] way_id_in;
//output reg [1:0] way_id_out;
//output reg [31:0] readdata;
output wire [31:0] readdata;
//output reg hit;
output wire hit;
output reg miss;
output wire eviction_wb;
output wire [21:0] eviction_tag;
output wire [31:0][31:0] evicted_data;
//reg [31:0] eviction_data;
//output wire [22:0] wb_addr;
//output wire modify, valid_data;
//wire [2:0] i_tag;
//wire dirty;
//wire [24-cache_entry:0] write_tag_data;
// Table for one set
//reg [2:0] counter; // Determines which to evict
reg valid;
reg [21:0] tag;
reg clean;
//reg [31:0] data[31:0];
reg [31:0] data[31:0];
integer j;
// WS AW BS
//reg[3:0][31:0] some_data[5:0]; // before variable name is width, after name is height
//wire blockNun;
//wire WordNumWIthinABlock;
//ddata[31:0] =some_data[blockNun][WordNumWIthinABlock]
assign eviction_wb = miss && clean != 1'b1 && valid == 1'b1;
assign eviction_tag = tag;
assign readdata = (access && !write && tag == o_tag && valid) ? data[0] : 32'b0; // Fix with actual data
assign hit = (access && !write && tag == o_tag && valid) ? 1'b1 : 1'b0;
//assign evicted_data = (eviction_wb ) ? data : 0;
genvar k;
for (k = 0; k < Number_Blocks; k = k + 1) begin
assign evicted_data[k] = (eviction_wb) ? data[k] : 32'b0;
//data[j] <= fetched_writedata[(j+1) * 32 - 1 -: 32];
end
//assign eviction_data = data[counter[1:0]];
//assign hit = valid_data && (o_tag == i_tag);
//assign modify = valid_data && (o_tag != i_tag) && dirty;
//assign miss = !valid_data || ((o_tag != i_tag) && !dirty);
//assign wb_addr = {i_tag, entry};
always @(posedge clk) begin
if (rst) begin
end
if (find_evict) begin
if (tag == o_tag && valid) begin
//readdata <= data;
// evicted_data <= data;
end
end else if (access) begin
// Hit in First Column
if (tag == o_tag && valid) begin
if (write == 1'b0) begin // if it is a read
if (clean == 1'b1 ) begin
//hit <= 1'b1;
//readdata <= data;
miss <= 1'b0;
end else begin
//hit <= 1'b0;
//readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
//for (j = 0; j < Number_Blocks; j = j + 1) begin
//data[j] <= fetched_writedata[(j+1) * 32 - 1 -: 32];
//end
data[block_offset] <= writedata;
clean <= 1'b0;
//hit <= 1'b1;
end
end
// Miss
else begin
//way_id_out <= counter;
miss <= 1'b1;
if (write == 1'b0) begin // Read Miss
clean <= 1'b1;
//data <= 0; // FIX WITH ACTUAL MEMORY ACCESS
for (j = 0; j < Number_Blocks; j = j + 1) begin
data[j] <= 32'b0;
end
end else if (write == 1'b1) begin // Write Miss
clean <= 1'b1;
data[block_offset] <= writedata;
//for (j = 0; j < Number_Blocks; j = j + 1) begin
//data[j] <= fetched_writedata[(j+1) * 32 - 1 -: 32];
//end
end
end
end
if (write_from_mem) begin
tag <= o_tag;
valid <= 1'b1;
//hit <= 1'b1;
if (write == 1'b0) begin // Read Miss
clean <= 1'b1;
//data <= 0; // FIX WITH ACTUAL MEMORY ACCESS
for (j = 0; j < Number_Blocks; j = j + 1) begin
data[j] <= 32'b0;
end
end else if (write == 1'b1) begin // Write Miss
clean <= 1'b0;
//data <= fetched_writedata;
for (j = 0; j < Number_Blocks; j = j + 1) begin
//data[j] <= fetched_writedata[(j+1) * 32 - 1 -: 32];
data[j] <= fetched_writedata[j];
end
end
end
if (idle) begin // Set "way" register equal to invalid value
//hit <= 1'b1; // set to know it is ready
miss <= 1'b0;
//readdata <= 32'hFFFFFFFF;
end
if (find_evict) begin // Keep "way" value the same !!!! Fix. Need to send back data with matching tag. Also need to ensure evicted data doesnt get lost
if (tag == o_tag && valid) begin
//readdata <= data;
end
//hit <= 1'b1;
miss <= 1'b0;
end
//eviction_data <= data;
end
endmodule

193
rtl/cache/VX_cache_data.v vendored Normal file
View file

@ -0,0 +1,193 @@
`define NUM_WORDS_PER_BLOCK 4
module VX_cache_data (
input wire clk, // Clock
// Addr
input wire[$clog2(NUMBER_INDEXES)-1:0] addr,
// WE
input wire[`NUM_WORDS_PER_BLOCK-1:0] we,
input wire evict,
// Data
input wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data
input wire[16:0] tag_write,
output wire[16:0] tag_use,
output wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
output wire valid_use,
output wire dirty_use
);
parameter NUMBER_INDEXES = 256;
wire currently_writing = (|we);
wire update_dirty = ((!dirty_use) && currently_writing) || (evict);
/////////////////
// (3:0) 4 bytes
reg[`NUM_WORDS_PER_BLOCK-1:0][31:0] data[NUMBER_INDEXES-1:0]; // Actual Data
reg[16:0] tag[NUMBER_INDEXES-1:0];
reg valid[NUMBER_INDEXES-1:0];
reg dirty[NUMBER_INDEXES-1:0];
// 16 bytes
assign data_use = data[addr]; // Read Port
assign tag_use = tag[addr];
assign valid_use = valid[addr];
assign dirty_use = dirty[addr];
wire dirt_new = evict ? 0 : (|we);
integer f;
always @(posedge clk) begin : dirty_update
if (update_dirty) dirty[addr] <= dirt_new; // WRite Port
end
always @(posedge clk) begin : data_update
for (f = 0; f < `NUM_WORDS_PER_BLOCK; f = f + 1) begin
if (we[f]) data[addr][f] <= data_write[f];
end
end
always @(posedge clk) begin : tag_update
if (evict) tag[addr] <= tag_write;
end
always @(posedge clk) begin : valid_update
if (evict) valid[addr] <= 1;
end
//////////////////////////////
// wire cena = 1;
// wire cenb_d = (|we);
// wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_d = data_write;
// wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] write_bit_mask_d;
// wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_d;
// genvar cur_b;
// for (cur_b = 0; cur_b < `NUM_WORDS_PER_BLOCK; cur_b=cur_b+1) begin
// assign write_bit_mask_d[cur_b] = {32{~we[cur_b]}};
// end
// assign data_use = data_out_d;
// // Using ASIC MEM
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_256x128_wm1 data (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(data_out_d),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(cena),
// .AA(addr),
// .CLKB(clk),
// .CENB(cenb_d),
// .WENB(write_bit_mask_d),
// .AB(addr),
// .DB(wdata_d),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
// wire[16:0] old_tag;
// wire old_valid;
// wire old_dirty;
// wire[16:0] new_tag = evict ? tag_write : old_tag;
// wire new_valid = evict ? 1 : old_valid;
// wire new_dirty = update_dirty ? new_dirty : old_dirty;
// wire cenb_m = (evict || update_dirty);
// wire[19-1:0][31:0] write_bit_mask_m = cenb_m ? 19'b0 : 19'b1;
// wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_m = {new_tag, new_dirty, new_valid};
// wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_m;
// assign {old_tag, old_dirty, old_valid} = data_out_m;
// assign dirty_use = old_dirty;
// assign valid_use = old_valid;
// assign tag_use = old_tag;
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_256x19_wm0 meta (
// .CENYA(),
// .AYA(),
// .CENYB(),
// // .WENYB(),
// .AYB(),
// .QA(data_out_m),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(cena),
// .AA(addr),
// .CLKB(clk),
// .CENB(cenb_m),
// // .WENB(write_bit_mask_m),
// .AB(addr),
// .DB(wdata_m),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// // .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
endmodule

602
rtl/cache/VX_d_cache.v vendored Normal file
View file

@ -0,0 +1,602 @@
// Cache Memory (8way 4word) //
// i_ means input port //
// o_ means output port //
// _p_ means data exchange with processor //
// _m_ means data exchange with memory //
// TO DO:
// - Send in a response from memory of what the data is from the test bench
`include "VX_define.v"
//`include "VX_priority_encoder.v"
`include "VX_Cache_Bank.v"
//`include "cache_set.v"
module VX_d_cache(clk,
rst,
i_p_initial_request,
i_p_addr,
//i_p_byte_en,
i_p_writedata,
i_p_read_or_write, // 0 = Read | 1 = Write
i_p_valid,
//i_p_write,
o_p_readdata,
o_p_readdata_valid,
o_p_waitrequest, // 0 = all threads done | 1 = Still threads that need to
o_m_addr,
//o_m_byte_en,
o_m_writedata,
o_m_read_or_write, // 0 = Read | 1 = Write
o_m_valid,
//o_m_write,
i_m_readdata,
//i_m_readdata_ready,
//i_m_waitrequest,
i_m_ready
//cnt_r,
//cnt_w,
//cnt_hit_r,
//cnt_hit_w
//cnt_wb_r,
//cnt_wb_w
);
parameter NUMBER_BANKS = 8;
localparam CACHE_IDLE = 0; // Idle
localparam SORT_BY_BANK = 1; // Determines the bank each thread will access
localparam INITIAL_ACCESS = 2; // Accesses the bank and checks if it is a hit or miss
localparam INITIAL_PROCESSING = 3; // Check to see if there were misses
localparam CONTINUED_PROCESSING = 4; // Keep checking status of banks that need to be written back or fetched
localparam DIRTY_EVICT_GRAB_BLOCK = 5; // Grab the full block of dirty data
localparam DIRTY_EVICT_WB = 6; // Write back this block into memory
localparam FETCH_FROM_MEM = 7; // Send a request to mem looking for read data
localparam FETCH2 = 8; // Stall until memory gets back with the data
localparam UPDATE_CACHE = 9; // Update the cache with the data read from mem
localparam RE_ACCESS = 10; // Access the cache after the block has been fetched from memory
localparam RE_ACCESS_PROCESSING = 11; // Access the cache after the block has been fetched from memory
//parameter cache_entry = 9;
input wire clk, rst;
input wire [`NT_M1:0] i_p_valid;
//input wire [`NT_M1:0][24:0] i_p_addr; // FIXME
input wire [`NT_M1:0][31:0] i_p_addr; // FIXME
input wire i_p_initial_request;
//input wire [3:0] i_p_byte_en;
input wire [`NT_M1:0][31:0] i_p_writedata;
input wire i_p_read_or_write; //, i_p_write;
output reg [`NT_M1:0][31:0] o_p_readdata;
output reg [`NT_M1:0] o_p_readdata_valid;
output wire o_p_waitrequest;
//output reg [24:0] o_m_addr; // Only one address is sent out at a time to memory -- FIXME
output reg [31:0] o_m_addr; // Address is xxxxxxxxxxoooobbbyy
output reg o_m_valid;
//output wire [255:0][31:0] evicted_data;
//output wire [3:0] o_m_byte_en;
//output reg [(NUMBER_BANKS * 32) - 1:0] o_m_writedata;
output reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata;
output reg o_m_read_or_write; //, o_m_write;
//input wire [(NUMBER_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller
input wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata;
//input wire i_m_readdata_ready;
//input wire i_m_waitrequest;
input wire i_m_ready;
//output reg [31:0] cnt_r;
//output reg [31:0] cnt_w;
//output reg [31:0] cnt_hit_r;
//output reg [31:0] cnt_hit_w;
//output reg [31:0] cnt_wb_r;
//output reg [31:0] cnt_wb_w;
//wire [1:0] tag [`NT_M1:0];
//wire [3:0] index [`NT_M1:0];
//wire [2:0] bank [`NT_M1:0];
//wire all_done;
//integer i;
reg [`NT_M1:0] thread_done; // Maybe should have "thread_serviced" and "thread_done", serviced==checked cache
//reg [`NT_M1:0] thread_serviced; // Maybe should have "thread_serviced" and "thread_done", serviced==checked cache
reg [NUMBER_BANKS - 1:0] banks_ready;
//reg [NUMBER_BANKS - 1:0] banks_missed;
reg [NUMBER_BANKS - 1:0] banks_to_service;
reg [NUMBER_BANKS - 1:0] banks_wb_needed;
reg [NUMBER_BANKS - 1:0][31:0] banks_wb_addr;
//reg [NUMBER_BANKS - 1:0] bank_states;
//reg [NUMBER_BANKS - 1:0][31:0] banks_wb_data;
//reg [NUMBER_BANKS - 1:0][13:0] banks_in_addr;
reg [3:0] state;
reg [NUMBER_BANKS - 1:0][31:0] data_from_bank;
//reg got_valid_data;
//reg [31:0] data_to_write;
//reg [`NT_M1:0] thread_track_bank_0;
//reg [`NT_M1:0] thread_track_bank_1;
//reg [`NT_M1:0] thread_track_bank_2;
//reg [`NT_M1:0] thread_track_bank_3;
//reg [`NT_M1:0] thread_track_bank_4;
//reg [`NT_M1:0] thread_track_bank_5;
//reg [`NT_M1:0] thread_track_bank_6;
//reg [`NT_M1:0] thread_track_bank_7;
reg [NUMBER_BANKS - 1 : 0][`NT_M1:0] thread_track_banks;
reg [NUMBER_BANKS - 1 : 0] bank_has_access; // Will track if a bank has been accessed in this cycle
reg [NUMBER_BANKS - 1 : 0][31:0] bank_access_addr;
reg [NUMBER_BANKS - 1 : 0][31:0] bank_access_data;
reg [NUMBER_BANKS - 1 : 0][1:0] threads_in_banks;
//reg [1:0] thread_in_memory; // keeps track of threadID which is in memory
reg rd_or_wr;
//reg did_miss, needs_service; Commented out Oct 21
integer bnk;
integer found;
integer t_id;
//integer num_misses;
//integer num_evictions_to_wb;
integer i; //reg [1:0] correct_tag;
integer index;
//reg [3:0] correct_index;
//assign tag = i_p_addr[13:12];
assign o_p_waitrequest = (thread_done == 4'hF) ? 1'b0 : 1'b1; // change thread_done to be generic
//assign did_miss = (banks_missed != 8'h0) ? 1'b1 : 1'b0;
//assign needs_service = ((banks_to_service != 8'b0 || banks_to_service_temp != 8'b0)) ? 1'b1 : 1'b0; // added banks_to_service temp
//assign w_Test1 = r_Check ? 1'b1 : 1'b0;
//for ( i = 0;i < `NT_M1;i = i + 1) begin
// assign tag[i] = i_p_addr[i][13:12];
// Fares
// wire no_bank_misses;
// assign no_bank_misses = banks_to_service != 8'b0;
reg[NUMBER_BANKS - 1:0] banks_to_service_temp;
reg[NUMBER_BANKS - 1:0] banks_to_wb;
reg[NUMBER_BANKS - 1:0] banks_to_wb_temp;
reg[NUMBER_BANKS - 1:0] banks_all_help;
always @(posedge clk) begin
if (rst) begin
state <= 0;
//banks_ready <= 8'b0;
//cnt_r <= 0;
//cnt_w <= 0;
//cnt_hit_r <= 0;
//cnt_hit_w <= 0;
//cnt_wb_r <= 0;
//cnt_wb_w <= 0;
end else begin
// Change Logic of which state the cache is in
case (state)
CACHE_IDLE:begin
if (i_p_initial_request == 1'b1) begin
state <= SORT_BY_BANK;
end else begin
state <= CACHE_IDLE;
end
end
SORT_BY_BANK:begin
state <= INITIAL_ACCESS;
end
INITIAL_ACCESS:begin
if (thread_done == 4'hF) begin
state <= CACHE_IDLE;
end else begin
state <= INITIAL_PROCESSING;
end
end
INITIAL_PROCESSING:begin
if (bank_has_access == banks_ready ) begin // if all hits
state <= INITIAL_ACCESS;
end else begin
state <= CONTINUED_PROCESSING;
end
end
CONTINUED_PROCESSING:begin
if (banks_to_wb == 8'b0 && banks_to_service == 8'b0) begin // If all threads are done, then the cache can go back into idle state (not currently fetching any requests)
state <= INITIAL_ACCESS;
//end else if (num_misses > 0) begin
end else if ((banks_to_wb != 8'b0)) begin // change 1pm
state <= DIRTY_EVICT_GRAB_BLOCK;
//end else if (did_miss == 1'b1 || needs_service == 1'b1) begin
end else if(banks_to_service != 8'b0) begin
state <= FETCH_FROM_MEM;
// end else if (did_miss == 1'b0 && num_evictions_to_wb > 0) begin
//end else if (needs_service == 1'b0 && did_miss == 1'b0 && (banks_to_wb != 8'b0)) begin
//end else if (did_miss == 1'b0 && needs_service == 1'b0) begin
//state <= INITIAL_ACCESS;
end
end
FETCH_FROM_MEM:begin
state <= FETCH2;
end
FETCH2:begin
if (i_m_ready == 1'b1) begin
state <= UPDATE_CACHE; // Not sure about this one !!!!!! Check
end else begin
state <= FETCH2;
end
end
UPDATE_CACHE:begin
state <= RE_ACCESS;
end
RE_ACCESS:begin
state <= RE_ACCESS_PROCESSING;
end
RE_ACCESS_PROCESSING: begin
state <= CONTINUED_PROCESSING;
end
DIRTY_EVICT_GRAB_BLOCK:begin
state <= DIRTY_EVICT_WB;
end
DIRTY_EVICT_WB:begin
state <= CONTINUED_PROCESSING;
end
endcase
end
//tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12];
end
// Change values which will be fed into the cache
always @(*) begin
case (state)
CACHE_IDLE:begin
thread_done = 0;
o_m_read_or_write = 0;
o_m_valid = 0;
o_m_writedata = 0;
o_p_readdata = 0;
o_p_readdata_valid = 0;
bank_has_access = 8'b0;
//bank_states = CACHE_IDLE;
//thread_track_bank_0 = 4'b0;
//thread_track_bank_1 = 4'b0;
//thread_track_bank_2 = 4'b0;
//thread_track_bank_3 = 4'b0;
//thread_track_bank_4 = 4'b0;
//thread_track_bank_5 = 4'b0;
//thread_track_bank_6 = 4'b0;
//thread_track_bank_7 = 4'b0;
for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
thread_track_banks[bnk] = 4'b0;
end
end
SORT_BY_BANK:begin
//bank_states = SORT_BY_BANK;
rd_or_wr = i_p_read_or_write;
for (t_id = 0; t_id <= `NT_M1; t_id = t_id + 1) begin
//t_id = {1'b0,t_id};
if (i_p_valid[t_id] == 1'b0) begin
thread_done[t_id] = 1'b1;
end
//if (i_p_valid[t_id] == 1'b1 && thread_done[t_id] == 1'b0) begin // Need logic for thread done
else if (i_p_addr[t_id][4:2] == 3'b000) begin
//banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later
//thread_track_bank_0[t_id] = 1'b1;
thread_track_banks[0][t_id] = 1'b1;
end
else if (i_p_addr[t_id][4:2] == 3'b001) begin // !!!!!!!
//banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later
//thread_track_bank_1[t_id] = 1'b1;
thread_track_banks[1][t_id] = 1'b1;
end
else if (i_p_addr[t_id][4:2] == 3'b010) begin
//banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later
//thread_track_bank_2[t_id] = 1'b1;
thread_track_banks[2][t_id] = 1'b1;
end
else if (i_p_addr[t_id][4:2] == 3'b011) begin
//banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later
//thread_track_bank_3[t_id] = 1'b1;
thread_track_banks[3][t_id] = 1'b1;
end
else if (i_p_addr[t_id][4:2] == 3'b100) begin
//banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later
//thread_track_bank_4[t_id] = 1'b1;
thread_track_banks[4][t_id] = 1'b1;
end
else if (i_p_addr[t_id][4:2] == 3'b101) begin
//banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later
//thread_track_bank_5[t_id] = 1'b1;
thread_track_banks[5][t_id] = 1'b1;
end
else if (i_p_addr[t_id][4:2] == 3'b110) begin
//banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later
//thread_track_bank_6[t_id] = 1'b1;
thread_track_banks[6][t_id] = 1'b1;
end
else if (i_p_addr[t_id][4:2] == 3'b111) begin
//banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later
//thread_track_bank_7[t_id] = 1'b1;
thread_track_banks[7][t_id] = 1'b1;
end
end
end
INITIAL_ACCESS:begin
//bank_states = INITIAL_ACCESS;
o_m_valid = 1'b0;
// Before Access
// if (no_bank_misses) begin
// Dont do anything, next clock cycle it will switch back to (Fetch from mem)
// end else begin // Do logic to send requests to each bank (look through thread_track_bank regs)
bank_has_access = 8'b0;
for (t_id = 0; t_id <= `NT_M1; t_id = t_id + 1) begin
for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
if(thread_track_banks[bnk][t_id] == 1'b1 && bank_has_access[bnk] == 1'b0) begin
bank_has_access[bnk] = 1'b1;
bank_access_data[bnk] = i_p_writedata[t_id];
bank_access_addr[bnk] = i_p_addr[t_id];
threads_in_banks[bnk] = t_id[1:0];
end
end
//if (banks_wb_needed[bnk]) begin // need to fix this for multiple misses
//o_m_read_or_write = 1'b0;
//o_m_addr = banks_wb_addr[bnk];
//o_m_valid = 1'b1;
//o_m_writedata = {banks_wb_data[bnk], 96'b0};
//end
//if(thread_track_bank_0[t_id] == 1'b1 && bank_has_access[0] == 1'b0) begin
//bank_has_access[0] = 1'b1;
//bank_access_data[0] = i_p_writedata[t_id];
//bank_access_addr[0] = i_p_addr[t_id];
//threads_in_banks[0] = t_id;
//end
// NEED TO UPDATE HITS (STORE IN THREADS_DONE)
end
//num_misses = {28'b0, $countones(banks_missed)};
//did_miss = (banks_missed == 4'hF);
// end
end
INITIAL_PROCESSING:begin
//bank_has_access = 8'b0;
for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
if(banks_ready[bnk]) begin // FIX to handle hits
thread_done[threads_in_banks[bnk]] = 1'b1;
o_p_readdata[threads_in_banks[bnk]] = data_from_bank[bnk];
if(i_p_read_or_write == 1'b0) begin
o_p_readdata_valid[threads_in_banks[bnk]] = 1'b1;
end
thread_track_banks[bnk][threads_in_banks[bnk]] = 1'b0; // Update that this thread does not need to be serviced again
end
end
//banks_to_service_temp = !banks_ready; // These are clean misses
for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
assign banks_to_service_temp[bnk] = (banks_ready[bnk] || (bank_has_access[bnk] == 0)) ? 1'b0 : 1'b1;
assign banks_to_wb_temp[bnk] = (banks_wb_needed[bnk]);
assign banks_all_help[bnk] = banks_to_service_temp[bnk] || banks_to_wb_temp[bnk];
end
end
CONTINUED_PROCESSING:begin
//for (i = `NW-1; i >= 0; i = i - 1) begin
// if (thread_done[threads_in_banks[bnk]] == 1'b1) begin // Not sure about this logic
// //index = i[`NW_M1:0];
// banks_to_service_temp[i] = 1'b0;
// banks_to_wb_temp[i] = 1'b0;
// end
//end
end
FETCH_FROM_MEM:begin
// NEED TO ADD LOGIC TO SEE IF MISSES GO TO SAME BLOCK
index = 0;
found = 0;
for (i = `NW-1; i >= 0; i = i - 1) begin
if (banks_to_service[i]) begin // Not sure about this logic
//index = i[`NW_M1:0];
index = i;
found = 1;
end
end
if (found == 1) begin
//banks_missed[index] = 0;
//thread_done
//thread_in_memory = threads_in_banks[index];
//o_m_writedata = bank_access_data[index];
banks_to_service_temp[index] = 0;
o_m_addr = bank_access_addr[index];
o_m_valid = 1'b1;
o_m_read_or_write = 1'b0;
end
//bank_states = FETCH_FROM_MEM;
end
FETCH2:begin
o_m_valid = 1'b0;
end
UPDATE_CACHE:begin
for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
//if(thread_track_banks[bnk][t_id] == 1'b1 && bank_has_access[bnk] == 1'b0) begin
bank_has_access[bnk] = 1'b1;
//bank_access_data[bnk] = i_m_readdata[(bnk+1)*32 - 1:bnk*32];
bank_access_addr[bnk] = o_m_addr;
threads_in_banks[bnk] = t_id[1:0];
//end
end
//bank_access_data = i_m_readdata;
rd_or_wr = 1'b1;
//thread_done[thread_in_memory] = 1'b1; // Removed, new cache style - Oct 21
//o_p_readdata[thread_in_memory] = i_m_readdata[i_p_addr[thread_in_memory][9:5]]; // Removed, new cache style
end
DIRTY_EVICT_WB:begin // this begininng logic should be added to dirty evict grab block
//thread_done[thread_in_memory] = 1'b1;
o_m_valid = 1'b1;
end
DIRTY_EVICT_GRAB_BLOCK:begin
index = 0;
found = 0;
for (i = `NW-1; i >= 0; i = i - 1) begin
if (banks_to_wb_temp[i]) begin
//index = i[`NW_M1:0];
index = i;
found = 1;
end
end
if (found == 1) begin
banks_to_wb_temp[index] = 0;
for (i = `NW-1; i >= 0; i = i - 1) begin
if (banks_to_wb_temp[i] && banks_wb_addr[index][31:7] == banks_wb_addr[i][31:7]) begin
//index = i[`NW_M1:0];
banks_to_wb_temp[i] = 0;
end
end
//thread_done
//thread_in_memory = threads_in_banks[index];
//o_m_writedata[(bnk+1)*32 - 1:bnk*32] = banks_wb_data[index];
o_m_addr = banks_wb_addr[index];
o_m_read_or_write = 1'b1;
end
//for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
//o_m_writedata[(bnk+1)*32 - 1:bnk*32] = banks_wb_data[index];
//end
// NEXT LINE CONTAINS DATA TO WB !!!! Think need to just change this to be read data and can remove banks_wb_data
//o_m_writedata = {banks_wb_data[7],banks_wb_data[6],banks_wb_data[5],banks_wb_data[4],banks_wb_data[3],banks_wb_data[2],banks_wb_data[1],banks_wb_data[0]};
//num_evictions_to_wb = {28'b0, $countones(banks_wb_needed)};
rd_or_wr = 1'b0;
for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
//if(thread_track_banks[bnk][t_id] == 1'b1 && bank_has_access[bnk] == 1'b0) begin
bank_has_access[bnk] = 1'b1;
bank_access_addr[bnk] = o_m_addr;
//end
end
end
RE_ACCESS:begin
//bank_states = INITIAL_ACCESS;
o_m_valid = 1'b0;
// Before Access
// if (no_bank_misses) begin
// Dont do anything, next clock cycle it will switch back to (Fetch from mem)
// end else begin // Do logic to send requests to each bank (look through thread_track_bank regs)
//bank_has_access = banks_all_help & !(banks_to_wb) & !(banks_to_service);
for (t_id = 0; t_id <= `NT_M1; t_id = t_id + 1) begin
for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
//bank_has_access[bnk] = banks_all_help[bnk] && !thread_done[threads_in_banks[bnk]]; // Not sure
bank_has_access[bnk] = banks_all_help[bnk] && !thread_done[t_id]; // Not sure
if(thread_track_banks[bnk][t_id] == 1'b1 && bank_has_access[bnk] == 1'b1) begin
//bank_has_access[bnk] = 1'b1;
bank_access_data[bnk] = i_p_writedata[t_id];
bank_access_addr[bnk] = i_p_addr[t_id];
threads_in_banks[bnk] = t_id[1:0];
end
end
end
end
RE_ACCESS_PROCESSING:begin
// After Access
for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin
if(banks_ready[bnk]) begin // FIX to handle hits
thread_done[threads_in_banks[bnk]] = 1'b1;
o_p_readdata[threads_in_banks[bnk]] = data_from_bank[bnk];
if(i_p_read_or_write == 1'b0) begin
o_p_readdata_valid[threads_in_banks[bnk]] = 1'b1;
end
thread_track_banks[bnk][threads_in_banks[bnk]] = 1'b0; // Update that this thread does not need to be serviced again
// Added Oct 21
banks_to_service_temp[bnk] = 1'b0;
banks_to_wb_temp[bnk] = 1'b0;
end
end
end
endcase
end
always @(posedge clk) begin
banks_to_service <= banks_to_service_temp;
banks_to_wb <= banks_to_wb_temp;
end
genvar bank_id;
generate
for (bank_id = 0; bank_id < NUMBER_BANKS; bank_id = bank_id + 1)
begin
//VX_alu vx_alu(
// .in_reg_data (in_reg_data[1:0]),
// .in_1 (in_a_reg_data[index_out_reg]),
// .in_2 (in_b_reg_data[index_out_reg]),
// .in_rs2_src (in_rs2_src),
// .in_itype_immed(in_itype_immed),
// .in_upper_immed(in_upper_immed),
// .in_alu_op (in_alu_op),
// .in_csr_data (in_csr_data),
// .in_curr_PC (in_curr_PC),
// .out_alu_result(VX_exe_mem_req.alu_result[index_out_reg])
//);
// bank VX_banks(
// .clk (clk),
// .rst (rst),
// //.state (bank_states[bank_id]),
// .state (state),
// .read_or_write (rd_or_wr),
// //.index (correct_index),
// //.tag (correct_tag),
// .addr (bank_access_addr[bank_id]),
// .writedata (bank_access_data[bank_id]),
// .fetched_write_data(i_m_readdata[(bank_id+1)*32-1 -: 32]),
// .valid (bank_has_access[bank_id]),
// .readdata (data_from_bank[bank_id]),
// .miss_cache (banks_missed[bank_id]),
// .w2m_needed (banks_wb_needed[bank_id]),
// .w2m_addr (banks_wb_addr[bank_id]),
// .e_data (o_m_writedata[(bank_id+1)*32-1 -: 32]),
// //.w2m_data (banks_wb_data[bank_id]),
// .ready (banks_ready[bank_id])
// //.valid_data (valid_in_set)
// //.read_miss (read_miss)
// );
VX_Cache_Bank bank_structure (
.clk (clk),
.state (state),
.read_or_write (rd_or_wr),
.valid_in (bank_has_access[bank_id]),
.actual_index (bank_access_addr[bank_id][14:7]), // fix when size changes
.o_tag (bank_access_addr[bank_id][31:15]), // fix when size changes
.block_offset (bank_access_addr[bank_id][6:5]),
.writedata (bank_access_data[bank_id]),
//.fetched_writedata (i_m_readdata[(bank_id+1)*32-1 -: 32]),
.fetched_writedata (i_m_readdata[bank_id[3:0]]),
.readdata (data_from_bank[bank_id]),
.hit (banks_ready[bank_id]),
//.miss (banks_missed[bank_id]),
.eviction_wb (banks_wb_needed[bank_id]),
.eviction_addr (banks_wb_addr[bank_id]),
//.data_evicted (o_m_writedata[(bank_id+1)*32-1 -: 32])
.data_evicted (o_m_writedata[bank_id[3:0]])
);
end
endgenerate
//end
endmodule

118
rtl/cache/VX_d_cache_encapsulate.v vendored Normal file
View file

@ -0,0 +1,118 @@
`include "VX_define.v"
// `define NUM_WORDS_PER_BLOCK 4
module VX_d_cache_encapsulate (
clk,
rst,
i_p_initial_request,
i_p_addr,
i_p_writedata,
i_p_read_or_write,
i_p_valid,
o_p_readdata,
o_p_readdata_valid,
o_p_waitrequest,
o_m_addr,
o_m_writedata,
o_m_read_or_write,
o_m_valid,
i_m_readdata,
i_m_ready
);
parameter NUMBER_BANKS = 8;
//parameter cache_entry = 9;
input wire clk, rst;
input wire i_p_valid[`NT_M1:0];
input wire [31:0] i_p_addr[`NT_M1:0];
input wire i_p_initial_request;
input wire [31:0] i_p_writedata[`NT_M1:0];
input wire i_p_read_or_write;
input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
input wire i_m_ready;
output reg [31:0] o_p_readdata[`NT_M1:0];
output reg o_p_readdata_valid[`NT_M1:0] ;
output reg o_p_waitrequest;
output reg [31:0] o_m_addr;
output reg o_m_valid;
output reg [31:0] o_m_writedata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
output reg o_m_read_or_write;
// Inter
wire [`NT_M1:0] i_p_valid_inter;
wire [`NT_M1:0][31:0] i_p_addr_inter;
wire [`NT_M1:0][31:0] i_p_writedata_inter;
reg [`NT_M1:0][31:0] o_p_readdata_inter;
reg [`NT_M1:0] o_p_readdata_valid_inter;
reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter;
wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter;
genvar curr_thraed;
for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin
assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed];
assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed];
assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed];
assign o_p_readdata[curr_thraed] = o_p_readdata_inter[curr_thraed];
assign o_p_readdata_valid[curr_thraed] = o_p_readdata_valid_inter[curr_thraed];
end
genvar curr_bank;
genvar curr_word;
for (curr_bank = 0; curr_bank < NUMBER_BANKS; curr_bank = curr_bank + 1) begin
for (curr_word = 0; curr_word < `NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
assign o_m_writedata[curr_bank][curr_word] = o_m_writedata_inter[curr_bank][curr_word];
assign i_m_readdata_inter[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word];
end
end
VX_d_cache dcache(
.clk (clk),
.rst (rst),
.i_p_valid (i_p_valid_inter),
.i_p_addr (i_p_addr_inter),
.i_p_initial_request(i_p_initial_request),
.i_p_writedata (i_p_writedata_inter),
.i_p_read_or_write (i_p_read_or_write),
.o_p_readdata (o_p_readdata_inter),
.o_p_readdata_valid (o_p_readdata_valid_inter),
.o_p_waitrequest (o_p_waitrequest),
.o_m_addr (o_m_addr),
.o_m_valid (o_m_valid),
.o_m_writedata (o_m_writedata_inter),
.o_m_read_or_write (o_m_read_or_write),
.i_m_readdata (i_m_readdata_inter),
.i_m_ready (i_m_ready)
);
endmodule

58
rtl/cache/VX_d_cache_tb.v vendored Normal file
View file

@ -0,0 +1,58 @@
`include "VX_define.v"
`include "VX_d_cache.v"
module VX_d_cache_tb;
parameter NUMBER_BANKS = 8;
reg clk, reset, im_ready;
reg [`NT_M1:0] i_p_valid;
reg [`NT_M1:0][13:0] i_p_addr; // FIXME
reg i_p_initial_request;
reg [`NT_M1:0][31:0] i_p_writedata;
reg i_p_read_or_write; //, i_p_write;
reg [`NT_M1:0][31:0] o_p_readdata;
reg [`NT_M1:0] o_p_readdata_valid;
reg o_p_waitrequest;
reg [13:0] o_m_addr; // Only one address is sent out at a time to memory
reg o_m_valid;
reg [(NUMBER_BANKS * 32) - 1:0] o_m_writedata;
reg o_m_read_or_write; //, o_m_write;
reg [(NUMBER_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller
VX_d_cache d_cache(.clk(clk),
.rst(reset),
.i_p_initial_request(i_p_initial_request),
.i_p_addr(i_p_addr),
.i_p_writedata(i_p_writedata),
.i_p_read_or_write(i_p_read_or_write), // 0 = Read | 1 = Write
.i_p_valid(i_p_valid),
.o_p_readdata(o_p_readdata),
.o_p_readdata_valid(o_p_readdata_valid),
.o_p_waitrequest(o_p_waitrequest), // 0 = all threads done | 1 = Still threads that need to
.o_m_addr(o_m_addr),
.o_m_writedata(o_m_writedata),
.o_m_read_or_write(o_m_read_or_write), // 0 = Read | 1 = Write
.o_m_valid(o_m_valid),
.i_m_readdata(i_m_readdata),
.i_m_ready(im_ready)
//cnt_r,
//cnt_w,
//cnt_hit_r,
//cnt_hit_w
);
initial
begin
clk = 0;
reset = 0;
end
always
#5 clk = ! clk;
endmodule

24
rtl/cache/VX_generic_pe.v vendored Normal file
View file

@ -0,0 +1,24 @@
module VX_generic_pe
#(
parameter N = 8
)
(
input wire[N-1:0] valids,
output reg[$clog2(N)-1:0] index,
output reg found
);
parameter my_secret = 0;
integer i;
always @(*) begin
index = 0;
found = 0;
for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
index = i[$clog2(N)-1:0];
found = 1;
end
end
end
endmodule

201
rtl/cache/bank.v vendored Normal file
View file

@ -0,0 +1,201 @@
`include "VX_define.v"
//`include "cache_set.v"
`include "VX_Cache_Block_DM.v"
module bank(clk,
rst,
state,
read_or_write,
//index,
//tag,
addr,
writedata,
fetched_write_data,
valid,
readdata,
miss_cache,
w2m_needed,
w2m_addr,
e_data,
//w2m_data,
ready
);
//parameter NUMBER_INDEXES = 16;
parameter NUMBER_INDEXES = 64;
localparam CACHE_IDLE = 0; // Idle
localparam SORT_BY_BANK = 1; // Determines the bank each thread will access
localparam CACHE_ACCESS = 2; // Accesses the bank and checks if it is a hit or miss
localparam FETCH_FROM_MEM = 3; // Send a request to mem looking for read data
localparam FETCH2 = 4; // Stall until memory gets back with the data
localparam UPDATE_CACHE = 5; // Update the cache with the data read from mem
localparam DIRTY_EVICT_GRAB_BLOCK = 6; // Grab the full block of dirty data
localparam DIRTY_EVICT_WB = 7; // Write back this block into memory
localparam WB_FROM_MEM = 8; // Currently unused
input wire clk, rst;
input wire read_or_write;
input wire [31:0] writedata;
input wire [31:0][31:0] fetched_write_data;
input wire [3:0] state;
//input wire [1:0] tag;
//input wire [3:0] index;
input wire [31:0] addr;
input wire valid;
output wire[NUMBER_INDEXES-1:0] [31:0] readdata;
output wire ready;
//output wire miss_cache;
output reg miss_cache;
output wire [31:0][31:0] e_data;
output wire w2m_needed;
//output reg [31:0] w2m_data;
output reg [31:0] w2m_addr;
wire [NUMBER_INDEXES-1:0] miss;
//wire [15:0][31:0] e_data;
wire [NUMBER_INDEXES-1:0] e_wb;
wire [NUMBER_INDEXES-1:0][21:0] e_tag;
//wire [3:0] index;
//wire valid_in_set;
//wire read_miss;
//wire modify;
wire hit;
reg [NUMBER_INDEXES-1:0] set_to_access;
reg [NUMBER_INDEXES-1:0] set_find_evict;
reg [NUMBER_INDEXES-1:0] set_idle;
reg [NUMBER_INDEXES-1:0] set_wfm;
//reg [1:0][15:0] way_id_recieved;
//reg [1:0][15:0] way_id_sending;
//reg wb_addr; // Concatination of tag and index for which we will write the data after a memory fetch
// Do logic about processing before going into the cache set here
assign miss_cache = (miss != 0);
assign ready = hit && (miss == 0);
//assign set_wfm =
//assign e_tag = miss ?
//always @(state) begin
//miss_cache = (miss != 0);
//end
//always @(state) begin
//for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) begin
//if (set_to_access == indeces) begin
//if ({28'b0,addr[11:8]} == indeces && state == UPDATE_CACHE && valid) begin
// reset
//set_wfm[indeces] = 1'b1;
//set_find_evict[indeces] = 1'b0;
//set_idle[indeces] = 1'b0;
//set_to_access[indeces] = 1'b0;
//end else if ({28'b0,addr[11:8]} == indeces && state == CACHE_ACCESS && valid) begin
//set_to_access[indeces] = 1'b1;
//set_wfm[indeces] = 1'b0;
//set_idle[indeces] = 1'b0;
//set_find_evict[indeces] = 1'b0;
//end else if ({28'b0,addr[11:8]} == indeces && state == DIRTY_EVICT_GRAB_BLOCK && valid) begin
//set_to_access[indeces] = 1'b0;
//set_wfm[indeces] = 1'b0;
//set_idle[indeces] = 1'b0;
//set_find_evict[indeces] = 1'b1;
//end else begin
//set_find_evict[indeces] = 1'b0;
//set_to_access[indeces] = 1'b0;
//set_idle[indeces] = 1'b1;
//set_wfm[indeces] = 1'b0;
//end
//end
//end
for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) begin
assign set_to_access[indeces] = ({28'b0,addr[11:8]} == indeces && state == CACHE_ACCESS && valid) ? 1'b1 : 1'b0;
assign set_find_evict[indeces] = ({28'b0,addr[11:8]} == indeces && state == DIRTY_EVICT_GRAB_BLOCK && valid) ? 1'b1 : 1'b0;
assign set_wfm[indeces] = ({28'b0,addr[11:8]} == indeces && state == UPDATE_CACHE && valid) ? 1'b1 : 1'b0;
assign set_idle[indeces] = (!set_to_access[indeces] && !set_wfm[indeces] && !set_find_evict[indeces]) ? 1'b1 : 1'b0;
end
// reg[31:0][31:0] data[NUMBER_INDEXES-1:0];
wire[$clog2(NUMBER_INDEXES)-1:0] actual_index;
assign actual_index = addr[11:8];
genvar indeces;
generate
for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1)
begin
VX_Cache_Block_DM set(
.clk (clk),
.rst (rst),
.actual_index (actual_index)
.access (set_to_access[indeces]),
.find_evict (set_find_evict[indeces]),
.write_from_mem (set_wfm[indeces]),
.idle (set_idle[indeces]),
//.entry,
//.o_tag (tag),
.o_tag (addr[31:10]),
.block_offset (addr[9:5]),
.writedata (writedata),
//byte_en,
.write (read_or_write),
.fetched_writedata (fetched_write_data),
//.way_id_in (way_id_sending[indeces]),
//.way_id_out (way_id_recieved[indeces]),
//word_en,
.readdata (readdata[indeces]),
//.wb_addr,
.hit (hit),
//.modify (modify),
.eviction_wb (e_wb[indeces]),
.eviction_tag (e_tag[indeces]),
//.evicted_data (e_data[indeces]),
.evicted_data (e_data),
.miss (miss[indeces])
//.valid_data (valid_in_set)
//.read_miss (read_miss)
);
end
endgenerate
//always @(e_wb) begin
// for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) begin
// //if (set_to_access == indeces) begin
// if (e_wb[indeces] == 1'b1) begin
// // reset
// w2m_needed = 1'b1;
// w2m_addr = {e_tag[indeces], addr[11:0]}; // FIXME !!! Need to figure out how to do this (reassemble the address)
// //w2m_data = e_data[indeces];
// end
// end
//end
wire[$clog2(NUMBER_INDEXES)-1:0] index_w2m_addr;
wire found_w2m_addr;
VX_generic_pe #(.N(NUMBER_INDEXES)) find_evicted(
.valids(e_wb),
.index(index_w2m_addr),
.found (found_w2m_addr)
);
assign w2m_addr = {e_tag[index_w2m_addr], addr[9:0]};
assign w2m_needed = (e_wb != 0) ? 1'b1 : 1'b0;
for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) begin
assign set_to_access[indeces] = ({28'b0,addr[11:8]} == indeces && state == CACHE_ACCESS && valid) ? 1'b1 : 1'b0;
end
// Do logic about processing done after going into the cache set here
endmodule

233
rtl/cache/cache_set.v vendored Normal file
View file

@ -0,0 +1,233 @@
// To Do: Change way_id_out to an internal register which holds when in between access and finished.
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly
`include "VX_define.v"
module cache_set(clk,
rst,
// These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes
access, // First
find_evict,
write_from_mem,
idle,
// entry,
o_tag,
writedata,
//byte_en,
write,
//word_en,
//way_id_in,
//way_id_out,
readdata,
//wb_addr,
hit,
eviction_wb,
eviction_tag,
//eviction_data,
//modify,
miss
//valid_data
//read_miss
);
parameter cache_entry = 14;
parameter ways_per_set = 4;
input wire clk, rst;
input wire access;
input wire find_evict;
input wire write_from_mem;
input wire idle;
//input wire [cache_entry-1:0] entry;
input wire [1:0] o_tag;
input wire [31:0] writedata;
//input wire [3:0] byte_en;
input wire write; // 0 == False
//input wire [3:0] word_en;
//input wire read_miss;
//input wire [1:0] way_id_in;
//output reg [1:0] way_id_out;
output reg [31:0] readdata;
//output reg [3:0] hit;
output reg hit;
output reg miss;
output wire eviction_wb;
output wire [1:0] eviction_tag;
reg [31:0] eviction_data;
//output wire [22:0] wb_addr;
//output wire modify, valid_data;
//wire [2:0] i_tag;
//wire dirty;
//wire [24-cache_entry:0] write_tag_data;
// Table for one set
reg [2:0] counter; // Determines which to evict
reg valid [ways_per_set-1:0];
reg [1:0] tag [ways_per_set-1:0];
reg clean [ways_per_set-1:0];
reg [31:0] data [ways_per_set-1:0];
assign eviction_wb = miss && clean[counter[1:0]] != 1'b1 && valid[counter[1:0]] == 1'b1;
assign eviction_tag = tag[counter[1:0]];
//assign eviction_data = data[counter[1:0]];
//assign hit = valid_data && (o_tag == i_tag);
//assign modify = valid_data && (o_tag != i_tag) && dirty;
//assign miss = !valid_data || ((o_tag != i_tag) && !dirty);
//assign wb_addr = {i_tag, entry};
always @(posedge clk) begin
if (rst) begin
end
if (find_evict) begin
if (tag[0] == o_tag && valid[0]) begin
readdata <= data[0];
end else if (tag[1] == o_tag && valid[1]) begin
readdata <= data[1];
end else if (tag[2] == o_tag && valid[2]) begin
readdata <= data[2];
end else if (tag[3] == o_tag && valid[3]) begin
readdata <= data[3];
end
end else if (access) begin
//tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12];
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
// Hit in First Column
if (tag[0] == o_tag && valid[0]) begin
if (write == 1'b0) begin // if it is a read
if (clean[0] == 1'b1 ) begin
//hit <= 4'b0001;
hit <= 1'b1;
readdata <= data[0];
miss <= 1'b0;
end else begin
//hit <= 4'b0000; // SHOULD PROBABLY TRACK WHERE THIS MISS IS IN A DIFFERENT VARIABLE
hit <= 1'b0;
readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
data[0] <= writedata;
clean[0] <= 1'b0;
//hit <= 4'b0001;
hit <= 1'b1;
end
end
// Hit in Second Column
else if (tag[1] == o_tag && valid[1]) begin
if (write == 1'b0) begin // if it is a read
if (clean[1] == 1'b1 ) begin
//hit <= 4'b0010;
hit <= 1'b1;
readdata <= data[1];
miss <= 1'b0;
end else begin
//hit <= 4'b0000;
hit <= 1'b0;
readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
data[1] <= writedata;
clean[1] <= 1'b0;
//hit <= 4'b0010;
hit <= 1'b1;
end
end
// Hit in Third Column
else if (tag[2] == o_tag && valid[2]) begin
if (write == 1'b0) begin // if it is a read
if (clean[2] == 1'b1 ) begin
//hit <= 4'b0100;
hit <= 1'b1;
readdata <= data[2];
miss <= 1'b0;
end else begin
//hit <= 4'b0000;
hit <= 1'b0;
readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
data[2] <= writedata;
clean[2] <= 1'b0;
//hit <= 4'b0100;
hit <= 1'b1;
end
end
// Hit in Fourth Column
else if (tag[3] == o_tag && valid[3]) begin
if (write == 1'b0) begin // if it is a read
if (clean[3] == 1'b1 ) begin
//hit <= 4'b1000;
hit <= 1'b1;
readdata <= data[3];
miss <= 1'b0;
end else begin
//hit <= 4'b0000;
hit <= 1'b0;
readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
data[3] <= writedata;
clean[3] <= 1'b0;
//hit <= 4'b1000;
hit <= 1'b1;
end
end
// Miss
else begin
//way_id_out <= counter;
miss <= 1'b1;
if (write == 1'b0) begin // Read Miss
clean[counter[1:0]] <= 1'b1;
data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS
end else if (write == 1'b1) begin // Write Miss
clean[counter[1:0]] <= 1'b1;
data[counter[1:0]] <= writedata;
end
end
end
if (write_from_mem) begin
tag[counter[1:0]] <= o_tag;
valid[counter[1:0]] <= 1'b1;
hit <= 1'b1;
if (write == 1'b0) begin // Read Miss
clean[counter[1:0]] <= 1'b1;
data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS
end else if (write == 1'b1) begin // Write Miss
clean[counter[1:0]] <= 1'b0;
data[counter[1:0]] <= writedata;
end
end
if (idle) begin // Set "way" register equal to invalid value
hit <= 1'b1; // set to know it is ready
miss <= 1'b0;
readdata <= 32'hFFFFFFFF;
end
if (find_evict) begin // Keep "way" value the same !!!! Fix. Need to send back data with matching tag. Also need to ensure evicted data doesnt get lost
if (tag[3] == o_tag && valid[3]) begin
readdata <= data[3];
end else if (tag[1] == o_tag && valid[1]) begin
readdata <= data[1];
end else if (tag[2] == o_tag && valid[2]) begin
readdata <= data[2];
end else if (tag[0] == o_tag && valid[0]) begin
readdata <= data[0];
end else begin
readdata <= eviction_data;
end
hit <= 1'b1;
miss <= 1'b0;
end
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
eviction_data <= data[counter[1:0]];
end
endmodule

29
rtl/cache/d_cache_test_bench.cpp vendored Normal file
View file

@ -0,0 +1,29 @@
#include "d_cache_test_bench.h"
//#define NUM_TESTS 46
int main(int argc, char **argv)
{
Verilated::commandArgs(argc, argv);
Verilated::traceEverOn(true);
VX_d_cache v;
bool curr = v.simulate();
//if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl;
//if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl;
if ( curr) std::cerr << GREEN << "Test Passed: " << std::endl;
if (!curr) std::cerr << RED << "Test Failed: " << std::endl;
return 0;
}

355
rtl/cache/d_cache_test_bench.h vendored Normal file
View file

@ -0,0 +1,355 @@
// C++ libraries
#include <utility>
#include <iostream>
#include <map>
#include <iterator>
#include <iomanip>
#include <fstream>
#include <unistd.h>
#include <vector>
#include <math.h>
#include <algorithm>
#include "VX_define.h"
#include "VVX_d_cache_encapsulate.h"
#include "verilated.h"
#include "d_cache_test_bench_debug.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#endif
// void set_Index (auto & var, int index, int size, auto val)
// {
// int real_shift
// }
class VX_d_cache
{
public:
VX_d_cache();
~VX_d_cache();
bool simulate();
bool operation(int, bool);
VVX_d_cache_encapsulate * vx_d_cache_;
long int curr_cycle;
int stats_total_cycles = 0;
int stats_dram_accesses = 0;
#ifdef VCD_OUTPUT
VerilatedVcdC *m_trace;
#endif
};
VX_d_cache::VX_d_cache() : curr_cycle(0), stats_total_cycles(0), stats_dram_accesses(0)
{
this->vx_d_cache_ = new VVX_d_cache_encapsulate;
#ifdef VCD_OUTPUT
this->m_trace = new VerilatedVcdC;
this->vx_d_cache_->trace(m_trace, 99);
this->m_trace->open("trace.vcd");
#endif
//this->results.open("../results.txt");
}
VX_d_cache::~VX_d_cache()
{
delete this->vx_d_cache_;
#ifdef VCD_OUTPUT
m_trace->close();
#endif
}
bool VX_d_cache::operation(int counter_value, bool do_op) {
if (do_op) {
vx_d_cache_->i_p_initial_request = 1;
} else {
vx_d_cache_->i_p_initial_request = 0;
}
if (counter_value == 0 && do_op) { // Write to bank 1-4 at index 64
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 1;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0x7f6f8f6f;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001008; // bank 2
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3
} else {
vx_d_cache_->i_p_addr[3] = 0x30010010; // bank 4 -- This is serviced 1st, then the other 3 banks are at once
}
}
} else if (counter_value == 1 && do_op) { // Write to bank 4-7 at index 108
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 1;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0xd1d2d2d3;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001c14; // bank 5
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 6
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x30001c1c; // bank 7
} else {
vx_d_cache_->i_p_addr[3] = 0x30001c10; // bank 4
}
}
} else if (counter_value == 2 && do_op) { // Read from bank 1-4 at those indexes
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 0;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0x23232332;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 5
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3
} else {
vx_d_cache_->i_p_addr[3] = 0x30001c1c;; // bank 7
}
}
}
} else if (counter_value == 3 && do_op) { // Write to Bank 1-5 (evictions will need to take place)
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 1;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0;
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1;
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2;
} else {
vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3;
}
}
} else if (counter_value == 4 && do_op) { // Read from addresses that were just overwritten above ^^^
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 0;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0x23232332;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3
} else {
vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5
}
}
}
/* These will check writing multiple threads writing to the same block
} else if (counter_value == 3 && do_op) { // Write to Bank 0
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 1;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0;
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1;
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2;
} else {
vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3;
}
}
} else if (counter_value == 4 && do_op) { // Read from Bank 0
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 0;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0x23232332;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0
} else {
vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1
}
}
}
*/
// Handle Memory Accesses
unsigned int read_data_from_mem = 0x1111 + counter_value + this->stats_total_cycles;
if (vx_d_cache_->o_m_valid) {
this->stats_dram_accesses = this->stats_dram_accesses + 1; // (assuming memory access takes 20 cycles)
this->stats_total_cycles += 1;
vx_d_cache_->clk = 0;
vx_d_cache_->eval();
#ifdef VCD_OUTPUT
m_trace->dump(2*this->stats_total_cycles);
#endif
vx_d_cache_->clk = 1;
vx_d_cache_->eval();
#ifdef VCD_OUTPUT
m_trace->dump((2*this->stats_total_cycles)+1);
#endif
vx_d_cache_->i_m_ready = 1;
for (int j1 = 0; j1 < 8; j1++) {
for (int j2 = 0; j2 < 4; j2++) {
vx_d_cache_->i_m_readdata[j1][j2] = read_data_from_mem;
}
}
} else {
vx_d_cache_->i_m_ready = 0;
}
if (vx_d_cache_->o_p_waitrequest == 0) {
return true;
} else {
return false;
}
}
bool VX_d_cache::simulate()
{
// this->instruction_file_name = file_to_simulate;
// this->results << "\n****************\t" << file_to_simulate << "\t****************\n";
// this->ProcessFile();
// auto start_time = std::chrono::high_resolution_clock::now();
//static bool stop = false;
//static int counter = 0;
//counter = 0;
//stop = false;
// auto start_time = clock();
vx_d_cache_->clk = 0;
vx_d_cache_->rst = 1;
//vortex->eval();
//counter = 0;
vx_d_cache_->rst = 0;
bool cont = false;
bool out_operation = false;
bool do_operation = true;
int other_counter = 0;
//while (this->stop && ((other_counter < 5)))
while (other_counter < 5)
{
// std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n";
// istop = ibus_driver();
// dstop = !dbus_driver();
vx_d_cache_->clk = 1;
vx_d_cache_->eval();
#ifdef VCD_OUTPUT
m_trace->dump(2*this->stats_total_cycles);
#endif
//vortex->eval();
//dstop = !dbus_driver();
out_operation = operation(other_counter, do_operation);
vx_d_cache_->clk = 0;
vx_d_cache_->eval();
#ifdef VCD_OUTPUT
m_trace->dump((2*this->stats_total_cycles)+1);
#endif
//vortex->eval();
/*
// stop = istop && dstop;
stop = vortex->out_ebreak;
if (stop || cont)
{
cont = true;
counter++;
} else
{
counter = 0;
}
*/
if (out_operation) {
other_counter++;
do_operation = true;
} else {
do_operation = false;
}
++(this->stats_total_cycles);
if (this->stats_total_cycles > 5000) {
break;
}
}
std::cerr << "New Total Cycles: " << (this->stats_total_cycles + (this->stats_dram_accesses * 20)) << "\n";
//uint32_t status;
//ram.getWord(0, &status);
//this->print_stats();
return (true);
}

1
rtl/cache/d_cache_test_bench_debug.h vendored Normal file
View file

@ -0,0 +1 @@
#define VCD_OUTPUT

View file

@ -0,0 +1,24 @@
`include "../VX_define.v"
`ifndef VX_DRAM_REQ_RSP_INTER
`define VX_DRAM_REQ_RSP_INTER
interface VX_dram_req_rsp_inter ();
// Req
wire [31:0] o_m_addr;
wire o_m_valid;
wire[`NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata;
wire o_m_read_or_write;
// Rsp
wire[`NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata;
wire i_m_ready;
endinterface
`endif

View file

@ -33,10 +33,10 @@ module VX_shared_memory_block (
// wire[3:0][31:0] write_bit_mask;
// assign write_bit_mask[0] = (we == 2'b00) ? 0 : {32{1'b1}};
// assign write_bit_mask[1] = (we == 2'b01) ? 0 : {32{1'b1}};
// assign write_bit_mask[2] = (we == 2'b10) ? 0 : {32{1'b1}};
// assign write_bit_mask[3] = (we == 2'b11) ? 0 : {32{1'b1}};
// assign write_bit_mask[0] = (we == 2'b00) ? 1 : {32{1'b0}};
// assign write_bit_mask[1] = (we == 2'b01) ? 1 : {32{1'b0}};
// assign write_bit_mask[2] = (we == 2'b10) ? 1 : {32{1'b0}};
// assign write_bit_mask[3] = (we == 2'b11) ? 1 : {32{1'b0}};
// // Using ASIC MEM
// /* verilator lint_off PINCONNECTEMPTY */

View file

@ -3,7 +3,7 @@ set link_library [concat * sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_
set symbol_library {}
set target_library [concat sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_m40c.db]
set verilog_files [ list VX_bank_valids.v VX_priority_encoder_sm.v VX_set_bit.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v Vortex.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \
set verilog_files [ list VX_dram_req_rsp_inter.v bank.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_set_bit.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v Vortex.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \
]
analyze -format sverilog $verilog_files