mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
multicore fix
This commit is contained in:
parent
359601cfd3
commit
cc84e0691c
40 changed files with 27930 additions and 28148 deletions
|
@ -7,7 +7,7 @@ CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
|
|||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
|
||||
DEBUG = 1
|
||||
#DEBUG = 1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
|
@ -21,6 +21,10 @@ RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../
|
|||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE)
|
||||
|
||||
# Use 64 bytes DRAM blocks
|
||||
CFLAGS += -DGLOBAL_BLOCK_SIZE=64
|
||||
VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
|
|
@ -60,9 +60,7 @@ private:
|
|||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: is_done_(false)
|
||||
, simulator_(&ram_) {
|
||||
simulator_.reset();
|
||||
: is_done_(false) {
|
||||
thread_ = new std::thread(__thread_proc__, this);
|
||||
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
}
|
||||
|
@ -95,7 +93,7 @@ public:
|
|||
|
||||
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
|
||||
printf("mem-write: 0x%x <- 0x%x\n", uint32_t(dest_addr + i), *(uint32_t*)((uint8_t*)src + src_offset + i));
|
||||
}*/
|
||||
|
||||
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
|
||||
|
@ -111,7 +109,7 @@ public:
|
|||
|
||||
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
|
||||
printf("mem-read: 0x%x -> 0x%x\n", uint32_t(src_addr + i), *(uint32_t*)((uint8_t*)dest + dest_offset + i));
|
||||
}*/
|
||||
|
||||
return 0;
|
||||
|
@ -120,7 +118,9 @@ public:
|
|||
int flush_caches(size_t dev_maddr, size_t size) {
|
||||
|
||||
mutex_.lock();
|
||||
simulator_.attach_ram(&ram_);
|
||||
simulator_.flush_caches(dev_maddr, size);
|
||||
simulator_.attach_ram(nullptr);
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
|
@ -130,7 +130,8 @@ public:
|
|||
|
||||
mutex_.lock();
|
||||
simulator_.reset();
|
||||
mutex_.unlock();
|
||||
simulator_.attach_ram(&ram_);
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -142,8 +143,14 @@ public:
|
|||
bool is_busy = simulator_.is_busy();
|
||||
mutex_.unlock();
|
||||
|
||||
if (!is_busy || 0 == timeout_sec--)
|
||||
if (!is_busy || 0 == timeout_sec--) {
|
||||
if (!is_busy) {
|
||||
mutex_.lock();
|
||||
simulator_.attach_ram(nullptr);
|
||||
mutex_.unlock();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
|
||||
int test = -1;
|
||||
|
||||
|
@ -61,7 +62,7 @@ int run_memcopy_test(vx_buffer_h sbuf,
|
|||
int errors = 0;
|
||||
|
||||
// write sbuf data
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value);
|
||||
}
|
||||
|
||||
|
@ -75,12 +76,12 @@ int run_memcopy_test(vx_buffer_h sbuf,
|
|||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto ref = shuffle(i, value);
|
||||
if (curr != ref) {
|
||||
std::cout << "error @ " << std::hex << (address + 64 * i)
|
||||
<< ": actual " << curr << ", expected " << ref << std::endl;
|
||||
std::cout << "error at 0x" << std::hex << (address + 8 * i)
|
||||
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -101,13 +102,13 @@ int run_kernel_test(vx_device_h device,
|
|||
int errors = 0;
|
||||
|
||||
uint64_t seed = 0x0badf00d40ff40ff;
|
||||
int num_blocks = 4;
|
||||
|
||||
unsigned src_dev_addr = 0x10000000;
|
||||
unsigned dest_dev_addr = 0x20000000;
|
||||
|
||||
int src_dev_addr = DEV_MEM_SRC_ADDR;
|
||||
int dest_dev_addr = DEV_MEM_DST_ADDR;
|
||||
int num_blocks = NUM_BLOCKS;
|
||||
|
||||
// write sbuf data
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
|
||||
}
|
||||
|
||||
|
@ -137,12 +138,12 @@ int run_kernel_test(vx_device_h device,
|
|||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto ref = shuffle(i, seed);
|
||||
if (curr != ref) {
|
||||
std::cout << "error @ " << std::hex << (dest_dev_addr + 64 * i)
|
||||
<< ": actual " << curr << ", expected " << ref << std::endl;
|
||||
std::cout << "error at 0x" << std::hex << (dest_dev_addr + 8 * i)
|
||||
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -174,10 +175,10 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_alloc_shared_mem(device, 4096, &dbuf));
|
||||
|
||||
// run tests
|
||||
/*9if (0 == test || -1 == test) {
|
||||
/*if (0 == test || -1 == test) {
|
||||
std::cout << "run memcopy test" << std::endl;
|
||||
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1));
|
||||
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8));
|
||||
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1));
|
||||
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_DST_ADDR, 0x0badf00d40ff40ff, 8));
|
||||
}*/
|
||||
|
||||
if (1 == test || -1 == test) {
|
||||
|
|
8
driver/tests/basic/common.h
Normal file
8
driver/tests/basic/common.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define DEV_MEM_SRC_ADDR 0x10000000
|
||||
#define DEV_MEM_DST_ADDR 0x20000000
|
||||
#define NUM_BLOCKS 16
|
||||
|
||||
#endif
|
Binary file not shown.
|
@ -1,9 +1,20 @@
|
|||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "intrinsics/vx_intrinsics.h"
|
||||
#include "common.h"
|
||||
|
||||
void main() {
|
||||
int64_t* x = (int64_t*)0x10000000;
|
||||
int64_t* y = (int64_t*)0x20000000;
|
||||
for (int i = 0; i < 8 * 4; ++i) {
|
||||
y[i] = x[i];
|
||||
int64_t* x = (int64_t*)DEV_MEM_SRC_ADDR;
|
||||
int64_t* y = (int64_t*)DEV_MEM_DST_ADDR;
|
||||
int num_words = (NUM_BLOCKS * 64) / 8;
|
||||
|
||||
int core_id = vx_core_id();
|
||||
int num_cores = vx_num_cores();
|
||||
int num_words_per_core = num_words / num_cores;
|
||||
|
||||
int offset = core_id * num_words_per_core;
|
||||
|
||||
for (int i = 0; i < num_words_per_core; ++i) {
|
||||
y[offset + i] = x[offset + i];
|
||||
}
|
||||
}
|
|
@ -49,7 +49,7 @@ run-ase: $(PROJECT)
|
|||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 1
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -6,6 +6,10 @@ VF += --language 1800-2009 --assert -Wall -Wpedantic
|
|||
|
||||
VF += -exe $(SRCS) $(INCLUDE)
|
||||
|
||||
# Use 64 bytes DRAM blocks
|
||||
CF += -DGLOBAL_BLOCK_SIZE=64
|
||||
VF += -DGLOBAL_BLOCK_SIZE=64
|
||||
|
||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
|
||||
|
|
|
@ -73,7 +73,9 @@ module VX_back_end #(
|
|||
|
||||
assign ebreak = exec_unit_req_if.is_etype && (| exec_unit_req_if.valid);
|
||||
|
||||
VX_lsu_unit lsu_unit (
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) lsu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
|
|
|
@ -57,8 +57,12 @@
|
|||
|
||||
`define CSR_LTID 12'h020
|
||||
`define CSR_LWID 12'h021
|
||||
`define CSR_GWID 12'h022
|
||||
`define CSR_GTID 12'h023
|
||||
`define CSR_GTID 12'h022
|
||||
`define CSR_GWID 12'h023
|
||||
`define CSR_GCID 12'h024
|
||||
`define CSR_NT 12'h025
|
||||
`define CSR_NW 12'h026
|
||||
`define CSR_NC 12'h027
|
||||
|
||||
`define CSR_CYCLL 12'hC00
|
||||
`define CSR_CYCLH 12'hC80
|
||||
|
|
|
@ -46,6 +46,10 @@ module VX_csr_data #(
|
|||
`CSR_LWID : read_data = 32'(warp_num);
|
||||
`CSR_GTID ,
|
||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
||||
`CSR_GCID : read_data = CORE_ID;
|
||||
`CSR_NT : read_data = `NUM_THREADS;
|
||||
`CSR_NW : read_data = `NUM_WARPS;
|
||||
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
|
||||
`CSR_CYCLL : read_data = num_cycles[31:0];
|
||||
`CSR_CYCLH : read_data = num_cycles[63:32];
|
||||
`CSR_INSTL : read_data = num_instrs[31:0];
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_front_end (
|
||||
module VX_front_end #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -58,7 +60,9 @@ module VX_front_end (
|
|||
.fd_inst_meta_de (fe_inst_meta_fi2)
|
||||
);
|
||||
|
||||
VX_icache_stage icache_stage (
|
||||
VX_icache_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) icache_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.total_freeze (total_freeze),
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_icache_stage (
|
||||
module VX_icache_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire total_freeze,
|
||||
|
@ -26,7 +28,7 @@ module VX_icache_stage (
|
|||
// Icache Request
|
||||
assign icache_req_if.core_req_valid = valid_inst;
|
||||
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
|
||||
assign icache_req_if.core_req_data = 'z;
|
||||
assign icache_req_if.core_req_data = 0;
|
||||
assign icache_req_if.core_req_read = `BYTE_EN_LW;
|
||||
assign icache_req_if.core_req_write = `BYTE_EN_NO;
|
||||
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num};
|
||||
|
@ -50,8 +52,6 @@ module VX_icache_stage (
|
|||
// Core can't accept response
|
||||
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
|
||||
|
||||
integer i;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
//--
|
||||
|
@ -64,10 +64,10 @@ module VX_icache_stage (
|
|||
|
||||
/*always_comb begin
|
||||
if (1'($time & 1) && icache_req_if.core_req_ready && icache_req_if.core_req_valid) begin
|
||||
$display("*** %t: I$ req: pc=%0h, warp=%d", $time, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
|
||||
$display("*** %t: I%01d$ req: pc=%0h, warp=%0d", $time, CORE_ID, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
|
||||
end
|
||||
if (1'($time & 1) && icache_rsp_if.core_rsp_ready && icache_rsp_if.core_rsp_valid) begin
|
||||
$display("*** %t: I$ rsp: pc=%0h, warp=%d, instr=%0h", $time, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
|
||||
$display("*** %t: I%01d$ rsp: pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
|
||||
end
|
||||
end*/
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_unit (
|
||||
module VX_lsu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -62,10 +64,10 @@ module VX_lsu_unit (
|
|||
|
||||
/*always_comb begin
|
||||
if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin
|
||||
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
|
||||
$display("*** %t: D%01d$ req: valid=%b, addr=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
|
||||
end
|
||||
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin
|
||||
$display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
|
||||
$display("*** %t: D%01d$ rsp: valid=%b, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
|
||||
end
|
||||
end*/
|
||||
|
||||
|
|
|
@ -178,7 +178,9 @@ module Vortex #(
|
|||
assign dcache_snp_req_if.snp_req_addr = snp_req_addr;
|
||||
assign snp_req_ready = dcache_snp_req_if.snp_req_ready;
|
||||
|
||||
VX_front_end front_end (
|
||||
VX_front_end #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) front_end (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
|
|
|
@ -281,4 +281,13 @@ module Vortex_Socket (
|
|||
);
|
||||
end
|
||||
|
||||
/*always_comb begin
|
||||
if (1'($time & 1) && (dram_req_read || dram_req_write) && dram_req_ready) begin
|
||||
$display("*** %t: DRAM req: w=%b addr=%0h, tag=%0h, data=%0h", $time, dram_req_write, dram_req_addr, dram_req_tag, dram_req_data);
|
||||
end
|
||||
if (1'($time & 1) && dram_rsp_valid && dram_rsp_ready) begin
|
||||
$display("*** %t: DRAM rsp: tag=%0h, data=%0h", $time, dram_rsp_tag, dram_rsp_data);
|
||||
end
|
||||
end*/
|
||||
|
||||
endmodule
|
82
hw/rtl/cache/VX_bank.v
vendored
82
hw/rtl/cache/VX_bank.v
vendored
|
@ -2,54 +2,54 @@
|
|||
`include "VX_define.vh"
|
||||
module VX_bank #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 0,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
parameter NUM_BANKS = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 0,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
parameter NUM_REQUESTS = 0,
|
||||
// Number of cycles to complete i 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
parameter STAGE_1_CYCLES = 0,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
parameter REQQ_SIZE = 0,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
parameter MRVQ_SIZE = 0,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
parameter DFPQ_SIZE = 0,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
parameter SNRQ_SIZE = 0,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
parameter CWBQ_SIZE = 0,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
parameter DWBQ_SIZE = 0,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
parameter DFQQ_SIZE = 0,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
parameter LLVQ_SIZE = 0,
|
||||
// Fill Forward SNP Queue
|
||||
parameter FFSQ_SIZE = 8,
|
||||
parameter FFSQ_SIZE = 0,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
parameter FILL_INVALIDAOR_SIZE = 0,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
parameter WRITE_ENABLE = 0,
|
||||
|
||||
// Enable dram update
|
||||
parameter DRAM_ENABLE = 1,
|
||||
parameter DRAM_ENABLE = 0,
|
||||
|
||||
// Enable snoop forwarding
|
||||
parameter SNOOP_FORWARDING = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
parameter CORE_TAG_WIDTH = 0,
|
||||
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0
|
||||
|
@ -172,21 +172,9 @@ module VX_bank #(
|
|||
assign reqq_push = core_req_ready && (| core_req_valids);
|
||||
|
||||
VX_cache_req_queue #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
||||
) req_queue (
|
||||
|
@ -363,17 +351,7 @@ module VX_bank #(
|
|||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.DRAM_ENABLE (DRAM_ENABLE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE)
|
||||
) tag_data_access (
|
||||
|
@ -458,21 +436,11 @@ module VX_bank #(
|
|||
assign {miss_add_tag, miss_add_mem_read, miss_add_mem_write, miss_add_tid} = inst_meta_st2;
|
||||
|
||||
VX_cache_miss_resrv #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
|
||||
) cache_miss_resrv (
|
||||
.clk (clk),
|
||||
|
@ -568,20 +536,8 @@ module VX_bank #(
|
|||
wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
|
||||
|
||||
VX_fill_invalidator #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
|
||||
) fill_invalidator (
|
||||
.clk (clk),
|
||||
|
|
36
hw/rtl/cache/VX_cache.v
vendored
36
hw/rtl/cache/VX_cache.v
vendored
|
@ -140,21 +140,10 @@ module VX_cache #(
|
|||
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
|
||||
|
||||
VX_cache_core_req_bank_sel #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
|
||||
.NUM_REQUESTS (NUM_REQUESTS)
|
||||
) cache_core_req_bank_sell (
|
||||
.core_req_valid (core_req_valid),
|
||||
.core_req_addr (core_req_addr),
|
||||
|
@ -320,21 +309,9 @@ module VX_cache #(
|
|||
endgenerate
|
||||
|
||||
VX_cache_core_rsp_merge #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
||||
) cache_core_rsp_merge (
|
||||
|
@ -351,21 +328,10 @@ module VX_cache #(
|
|||
);
|
||||
|
||||
VX_cache_dram_req_arb #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.PRFQ_SIZE (PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (PRFQ_STRIDE)
|
||||
) cache_dram_req_arb (
|
||||
|
|
39
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
39
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
|
@ -2,43 +2,14 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_core_req_bank_sel #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 0,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
|
||||
parameter FUNC_ID = 0,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16
|
||||
parameter NUM_REQUESTS = 0
|
||||
) (
|
||||
input wire [NUM_REQUESTS-1:0] core_req_valid,
|
||||
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
|
||||
|
|
43
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
43
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -1,49 +1,16 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_core_rsp_merge #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
parameter NUM_BANKS = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 0,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
parameter NUM_REQUESTS = 0,
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
parameter CORE_TAG_WIDTH = 0,
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0,
|
||||
|
||||
// dram request tag size
|
||||
parameter DRAM_TAG_WIDTH = 1
|
||||
parameter CORE_TAG_ID_BITS = 0
|
||||
) (
|
||||
// Per Bank WB
|
||||
input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
|
||||
|
|
35
hw/rtl/cache/VX_cache_dfq_queue.v
vendored
35
hw/rtl/cache/VX_cache_dfq_queue.v
vendored
|
@ -1,41 +1,12 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_dfq_queue #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
parameter NUM_BANKS = 0,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16
|
||||
parameter DFQQ_SIZE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
47
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
47
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
|
@ -1,46 +1,17 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_dram_req_arb #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
parameter NUM_BANKS = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
parameter WORD_SIZE = 0,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Prefetcher
|
||||
parameter PRFQ_SIZE = 64,
|
||||
parameter PRFQ_STRIDE = 2
|
||||
parameter DFQQ_SIZE = 0,
|
||||
// Prefetcher
|
||||
parameter PRFQ_SIZE = 0,
|
||||
parameter PRFQ_STRIDE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -101,7 +72,9 @@ module VX_cache_dram_req_arb #(
|
|||
wire dfqq_push = (| per_bank_dram_fill_req_valid);
|
||||
|
||||
VX_cache_dfq_queue #(
|
||||
|
||||
.BANK_LINE_SIZE(BANK_LINE_SIZE),
|
||||
.NUM_BANKS(NUM_BANKS),
|
||||
.DFQQ_SIZE(DFQQ_SIZE)
|
||||
) cache_dfq_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
38
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
38
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -1,44 +1,18 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_miss_resrv #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
parameter NUM_BANKS = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 0,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
parameter NUM_REQUESTS = 0,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
parameter MRVQ_SIZE = 0,
|
||||
// caceh requests tag size
|
||||
parameter CORE_TAG_WIDTH = 1
|
||||
parameter CORE_TAG_WIDTH = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
39
hw/rtl/cache/VX_cache_req_queue.v
vendored
39
hw/rtl/cache/VX_cache_req_queue.v
vendored
|
@ -1,45 +1,14 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_req_queue #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 0,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 0,
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
parameter REQQ_SIZE = 0,
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
parameter CORE_TAG_WIDTH = 0,
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0
|
||||
) (
|
||||
|
|
35
hw/rtl/cache/VX_fill_invalidator.v
vendored
35
hw/rtl/cache/VX_fill_invalidator.v
vendored
|
@ -1,41 +1,12 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_fill_invalidator #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
parameter NUM_BANKS = 0,
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16
|
||||
parameter FILL_INVALIDAOR_SIZE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
8
hw/rtl/cache/VX_prefetcher.v
vendored
8
hw/rtl/cache/VX_prefetcher.v
vendored
|
@ -1,12 +1,12 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_prefetcher #(
|
||||
parameter PRFQ_SIZE = 64,
|
||||
parameter PRFQ_STRIDE = 2,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4
|
||||
parameter WORD_SIZE = 0,
|
||||
parameter PRFQ_SIZE = 0,
|
||||
parameter PRFQ_STRIDE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
2
hw/rtl/cache/VX_snp_fwd_arb.v
vendored
2
hw/rtl/cache/VX_snp_fwd_arb.v
vendored
|
@ -1,7 +1,7 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_snp_fwd_arb #(
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter BANK_LINE_SIZE = 1
|
||||
) (
|
||||
input wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid,
|
||||
|
|
53
hw/rtl/cache/VX_tag_data_access.v
vendored
53
hw/rtl/cache/VX_tag_data_access.v
vendored
|
@ -2,46 +2,22 @@
|
|||
|
||||
module VX_tag_data_access #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 0,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
parameter NUM_BANKS = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
parameter WORD_SIZE = 0,
|
||||
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
parameter STAGE_1_CYCLES = 0,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
parameter WRITE_ENABLE = 0,
|
||||
|
||||
// Enable dram update
|
||||
parameter DRAM_ENABLE = 1
|
||||
parameter DRAM_ENABLE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -100,18 +76,7 @@ module VX_tag_data_access #(
|
|||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
|
||||
.WORD_SIZE (WORD_SIZE)
|
||||
) tag_data_structure (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
37
hw/rtl/cache/VX_tag_data_structure.v
vendored
37
hw/rtl/cache/VX_tag_data_structure.v
vendored
|
@ -2,42 +2,13 @@
|
|||
|
||||
module VX_tag_data_structure #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
parameter CACHE_SIZE = 0,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 16,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 8,
|
||||
parameter NUM_BANKS = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
|
||||
parameter FUNC_ID = 0,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 8,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 2,
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// Lower Level Cache Hit Queue Size
|
||||
parameter LLVQ_SIZE = 16,
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16
|
||||
parameter WORD_SIZE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
|
@ -8,8 +8,8 @@ double sc_time_stamp() {
|
|||
return time_stamp;
|
||||
}
|
||||
|
||||
Simulator::Simulator(RAM *ram) {
|
||||
ram_ = ram;
|
||||
Simulator::Simulator() {
|
||||
ram_ = nullptr;
|
||||
vortex_ = new VVortex_Socket();
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
|
@ -27,12 +27,20 @@ Simulator::~Simulator() {
|
|||
delete vortex_;
|
||||
}
|
||||
|
||||
void Simulator::attach_ram(RAM* ram) {
|
||||
ram_ = ram;
|
||||
dram_rsp_vec_.clear();
|
||||
}
|
||||
|
||||
void Simulator::print_stats(std::ostream& out) {
|
||||
out << std::left;
|
||||
out << std::setw(24) << "# of total cycles:" << std::dec << time_stamp/2 << std::endl;
|
||||
}
|
||||
|
||||
void Simulator::dbus_driver() {
|
||||
if (ram_ == nullptr)
|
||||
return;
|
||||
|
||||
// handle DRAM response cycle
|
||||
int dequeue_index = -1;
|
||||
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
|
||||
|
@ -149,9 +157,6 @@ bool Simulator::is_busy() {
|
|||
}
|
||||
|
||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
// send snoop requests to the caches
|
||||
printf("[sim] total cycles: %ld\n", time_stamp/2);
|
||||
|
||||
// align address to LLC block boundaries
|
||||
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
|
||||
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
|
||||
|
|
|
@ -29,7 +29,7 @@ typedef struct {
|
|||
class Simulator {
|
||||
public:
|
||||
|
||||
Simulator(RAM *ram);
|
||||
Simulator();
|
||||
virtual ~Simulator();
|
||||
|
||||
bool is_busy();
|
||||
|
@ -37,6 +37,9 @@ public:
|
|||
void step();
|
||||
void wait(uint32_t cycles);
|
||||
void flush_caches(uint32_t mem_addr, uint32_t size);
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool run();
|
||||
void print_stats(std::ostream& out);
|
||||
|
||||
|
|
|
@ -71,7 +71,8 @@ int main(int argc, char **argv)
|
|||
RAM ram;
|
||||
loadHexImpl(s.c_str(), &ram);
|
||||
|
||||
Simulator simulator(&ram);
|
||||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
bool curr = simulator.run();
|
||||
|
||||
if (curr) std::cerr << GREEN << "Test Passed: " << s << std::endl;
|
||||
|
@ -106,7 +107,8 @@ int main(int argc, char **argv)
|
|||
RAM ram;
|
||||
loadHexImpl(testing, &ram);
|
||||
|
||||
Simulator simulator(&ram);
|
||||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
bool curr = simulator.run();
|
||||
|
||||
if (curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl;
|
||||
|
|
|
@ -35,26 +35,50 @@ vx_join:
|
|||
.type vx_warp_id, @function
|
||||
.global vx_warp_id
|
||||
vx_warp_id:
|
||||
csrr a0, CSR_LWID # read warp index
|
||||
csrr a0, CSR_LWID
|
||||
ret
|
||||
|
||||
.type vx_warp_gid, @function
|
||||
.global vx_warp_gid
|
||||
vx_warp_gid:
|
||||
csrr a0, CSR_GWID # read warp index
|
||||
csrr a0, CSR_GWID
|
||||
ret
|
||||
|
||||
.type vx_thread_id, @function
|
||||
.global vx_thread_id
|
||||
vx_thread_id:
|
||||
csrr a0, CSR_LTID # read thread index
|
||||
csrr a0, CSR_LTID
|
||||
ret
|
||||
|
||||
.type vx_thread_gid, @function
|
||||
.global vx_thread_gid
|
||||
vx_thread_gid:
|
||||
csrr a0, CSR_GTID # read thread index
|
||||
csrr a0, CSR_GTID
|
||||
ret
|
||||
|
||||
.type vx_core_id, @function
|
||||
.global vx_core_id
|
||||
vx_core_id:
|
||||
csrr a0, CSR_GCID
|
||||
ret
|
||||
|
||||
.type vx_num_threads, @function
|
||||
.global vx_num_threads
|
||||
vx_num_threads:
|
||||
csrr a0, CSR_NT
|
||||
ret
|
||||
|
||||
.type vx_num_warps, @function
|
||||
.global vx_num_warps
|
||||
vx_num_warps:
|
||||
csrr a0, CSR_NW
|
||||
ret
|
||||
|
||||
.type vx_num_cores, @function
|
||||
.global vx_num_cores
|
||||
vx_num_cores:
|
||||
csrr a0, CSR_NC
|
||||
ret
|
||||
|
||||
.type vx_num_cycles, @function
|
||||
.global vx_num_cycles
|
||||
|
|
|
@ -8,37 +8,49 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
// Spawn warps
|
||||
void vx_wspawn(unsigned numWarps, unsigned PC_spawn);
|
||||
void vx_wspawn(int numWarps, int PC_spawn);
|
||||
|
||||
// Set thread mask
|
||||
void vx_tmc(unsigned numThreads);
|
||||
void vx_tmc(int numThreads);
|
||||
|
||||
// Warp Barrier
|
||||
void vx_barrier(unsigned barriedID, unsigned numWarps);
|
||||
void vx_barrier(int barriedID, int numWarps);
|
||||
|
||||
// Split on a predicate
|
||||
void vx_split(unsigned predicate);
|
||||
void vx_split(int predicate);
|
||||
|
||||
// Join
|
||||
void vx_join(void);
|
||||
void vx_join();
|
||||
|
||||
// Return the warp thread index
|
||||
unsigned vx_thread_id(void);
|
||||
// Return the warp's unique thread id
|
||||
int vx_thread_id();
|
||||
|
||||
// Return the core warp index
|
||||
unsigned vx_warp_id(void);
|
||||
// Return the core's unique warp id
|
||||
int vx_warp_id();
|
||||
|
||||
// Return processsor unique thread id
|
||||
unsigned vx_thread_gid(void);
|
||||
// Return processsor unique core id
|
||||
int vx_core_id();
|
||||
|
||||
// Return processsor unique warp id
|
||||
unsigned vx_warp_gid(void);
|
||||
// Return processsor global thread id
|
||||
int vx_thread_gid();
|
||||
|
||||
// Return number cycles
|
||||
unsigned vx_num_cycles(void);
|
||||
// Return processsor global warp id
|
||||
int vx_warp_gid();
|
||||
|
||||
// Return number instructions
|
||||
unsigned vx_num_instrs(void);
|
||||
// Return the number of threads in a warp
|
||||
int vx_num_threads();
|
||||
|
||||
// Return the number of warps in a core
|
||||
int vx_num_warps();
|
||||
|
||||
// Return the number of cores in the processsor
|
||||
int vx_num_cores();
|
||||
|
||||
// Return the number of cycles
|
||||
int vx_num_cycles();
|
||||
|
||||
// Return the number of instructions
|
||||
int vx_num_instrs();
|
||||
|
||||
#define __if(b) vx_split(b); \
|
||||
if (b)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
.type _start, @function
|
||||
_start:
|
||||
la a1, vx_set_sp
|
||||
li a0, NUM_WARPS # activate all warps
|
||||
csrr a0, CSR_NW # get num warps
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
jal vx_set_sp
|
||||
li a0, 1
|
||||
|
@ -32,12 +32,11 @@ _exit:
|
|||
li a0, 0
|
||||
.word 0x0005006b # disable all threads
|
||||
|
||||
|
||||
.section .text
|
||||
.type vx_set_sp, @function
|
||||
.global vx_set_sp
|
||||
vx_set_sp:
|
||||
li a0, NUM_THREADS
|
||||
csrr a0, CSR_NT # get num threads
|
||||
.word 0x0005006b # activate all threads
|
||||
|
||||
.option push
|
||||
|
@ -46,10 +45,10 @@ vx_set_sp:
|
|||
addi gp, gp, %pcrel_lo(1b)
|
||||
.option pop
|
||||
|
||||
csrr a1, CSR_GTID # get gtid
|
||||
slli a1, a1, 10 # multiply tid by 1024
|
||||
csrr a2, CSR_LTID # get tid
|
||||
slli a2, a2, 2 # multiply tid by 4
|
||||
csrr a1, CSR_GTID # get global thread id
|
||||
slli a1, a1, 10 # multiply by 1024
|
||||
csrr a2, CSR_LTID # get local thread id
|
||||
slli a2, a2, 2 # multiply by 4
|
||||
lui sp, STACK_BASE_ADDR # load base sp
|
||||
sub sp, sp, a1 # sub thread block
|
||||
add sp, sp, a2 # reduce addr collision for perf
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue