rtl refactoring

This commit is contained in:
Blaise Tine 2020-05-04 20:12:05 -04:00
parent 69f607b73e
commit f142afac80
39 changed files with 31067 additions and 31607 deletions

View file

@ -1,9 +1,9 @@
CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors #CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
# CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
#DEBUG = 1 #DEBUG = 1
CFLAGS += -fPIC CFLAGS += -fPIC

View file

@ -1,4 +1,4 @@
all: singlecore all: build-s
CF += -std=c++11 -fms-extensions CF += -std=c++11 -fms-extensions

View file

@ -10,11 +10,9 @@
import local_mem_cfg_pkg::*; import local_mem_cfg_pkg::*;
module ccip_std_afu module ccip_std_afu #(
#(
parameter NUM_LOCAL_MEM_BANKS = 2 parameter NUM_LOCAL_MEM_BANKS = 2
) ) (
(
// CCI-P Clocks and Resets // CCI-P Clocks and Resets
input logic pClk, // Primary CCI-P interface clock. input logic pClk, // Primary CCI-P interface clock.
input logic pClkDiv2, // Aligned, pClk divided by 2. input logic pClkDiv2, // Aligned, pClk divided by 2.
@ -104,12 +102,9 @@ module ccip_std_afu
// choose which memory bank to test // choose which memory bank to test
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select; logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
vortex_afu vortex_afu #(
#(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
) ) vortex_afu_inst (
vortex_afu_inst
(
.clk (clk), .clk (clk),
.SoftReset (reset_T1), .SoftReset (reset_T1),

View file

@ -1,11 +1,12 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_back_end #( module VX_back_end #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire schedule_delay,
input wire schedule_delay,
VX_cache_core_rsp_if dcache_rsp_if, VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if, VX_cache_core_req_if dcache_req_if,
@ -22,104 +23,102 @@ module VX_back_end #(
VX_warp_ctl_if warp_ctl_if VX_warp_ctl_if warp_ctl_if
); );
VX_wb_if writeback_temp_if(); VX_wb_if wb_temp_if();
assign writeback_if.wb = writeback_temp_if.wb; assign writeback_if.wb = wb_temp_if.wb;
assign writeback_if.rd = writeback_temp_if.rd; assign writeback_if.rd = wb_temp_if.rd;
assign writeback_if.data = writeback_temp_if.data; assign writeback_if.data = wb_temp_if.data;
assign writeback_if.valid = writeback_temp_if.valid; assign writeback_if.valid = wb_temp_if.valid;
assign writeback_if.warp_num = writeback_temp_if.warp_num; assign writeback_if.warp_num = wb_temp_if.warp_num;
assign writeback_if.pc = writeback_temp_if.pc; assign writeback_if.pc = wb_temp_if.pc;
// assign VX_writeback_if(writeback_temp_if); wire no_slot_mem;
wire no_slot_exec;
wire no_slot_mem; // LSU input + output
wire no_slot_exec; VX_lsu_req_if lsu_req_if();
VX_wb_if mem_wb_if();
// LSU input + output // Exec unit input + output
VX_lsu_req_if lsu_req_if(); VX_exec_unit_req_if exec_unit_req_if();
VX_wb_if mem_wb_if(); VX_wb_if inst_exec_wb_if();
// Exec unit input + output // GPU unit input
VX_exec_unit_req_if exec_unit_req_if(); VX_gpu_inst_req_if gpu_inst_req_if();
VX_wb_if inst_exec_wb_if();
// GPU unit input // CSR unit inputs
VX_gpu_inst_req_if gpu_inst_req_if(); VX_csr_req_if csr_req_if();
VX_wb_if csr_wb_if();
wire no_slot_csr;
wire stall_gpr_csr;
// CSR unit inputs VX_gpr_stage gpr_stage (
VX_csr_req_if csr_req_if(); .clk (clk),
VX_wb_if csr_wb_if(); .reset (reset),
wire no_slot_csr; .schedule_delay (schedule_delay),
wire stall_gpr_csr; .writeback_if (wb_temp_if),
.bckE_req_if (bckE_req_if),
// New
.exec_unit_req_if (exec_unit_req_if),
.lsu_req_if (lsu_req_if),
.gpu_inst_req_if (gpu_inst_req_if),
.csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (mem_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_gpr_stage gpr_stage ( VX_lsu_unit lsu_unit (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.schedule_delay (schedule_delay), .lsu_req_if (lsu_req_if),
.writeback_if (writeback_temp_if), .mem_wb_if (mem_wb_if),
.bckE_req_if (bckE_req_if), .dcache_rsp_if (dcache_rsp_if),
// New .dcache_req_if (dcache_req_if),
.exec_unit_req_if (exec_unit_req_if), .delay (mem_delay),
.lsu_req_if (lsu_req_if), .no_slot_mem (no_slot_mem)
.gpu_inst_req_if (gpu_inst_req_if), );
.csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (mem_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_lsu_unit lsu_unit ( VX_exec_unit exec_unit (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.lsu_req_if (lsu_req_if), .exec_unit_req_if(exec_unit_req_if),
.mem_wb_if (mem_wb_if), .inst_exec_wb_if (inst_exec_wb_if),
.dcache_rsp_if (dcache_rsp_if), .jal_rsp_if (jal_rsp_if),
.dcache_req_if (dcache_req_if), .branch_rsp_if (branch_rsp_if),
.delay (mem_delay), .delay (exec_delay),
.no_slot_mem (no_slot_mem) .no_slot_exec (no_slot_exec)
); );
VX_exec_unit exec_unit ( VX_gpu_inst gpu_inst (
.clk (clk), .gpu_inst_req_if(gpu_inst_req_if),
.reset (reset), .warp_ctl_if (warp_ctl_if)
.exec_unit_req_if(exec_unit_req_if), );
.inst_exec_wb_if (inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.delay (exec_delay),
.no_slot_exec (no_slot_exec)
);
VX_gpu_inst gpu_inst ( VX_csr_pipe #(
.gpu_inst_req_if(gpu_inst_req_if), .CORE_ID(CORE_ID)
.warp_ctl_if (warp_ctl_if) ) csr_pipe (
); .clk (clk),
.reset (reset),
.no_slot_csr (no_slot_csr),
.csr_req_if (csr_req_if),
.writeback_if (wb_temp_if),
.csr_wb_if (csr_wb_if),
.stall_gpr_csr (stall_gpr_csr)
);
VX_csr_pipe #( VX_writeback writeback (
.CORE_ID(CORE_ID) .clk (clk),
) csr_pipe ( .reset (reset),
.clk (clk), .mem_wb_if (mem_wb_if),
.reset (reset), .inst_exec_wb_if(inst_exec_wb_if),
.no_slot_csr (no_slot_csr), .csr_wb_if (csr_wb_if),
.csr_req_if (csr_req_if),
.writeback_if(writeback_temp_if),
.csr_wb_if (csr_wb_if),
.stall_gpr_csr(stall_gpr_csr)
);
VX_writeback writeback ( .writeback_if (wb_temp_if),
.clk (clk), .no_slot_mem (no_slot_mem),
.reset (reset), .no_slot_exec (no_slot_exec),
.mem_wb_if (mem_wb_if), .no_slot_csr (no_slot_csr)
.inst_exec_wb_if (inst_exec_wb_if), );
.csr_wb_if (csr_wb_if),
.writeback_if (writeback_temp_if),
.no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr)
);
endmodule endmodule

View file

@ -36,15 +36,19 @@
`endif `endif
`ifndef SHARED_MEM_TOP_ADDR `ifndef SHARED_MEM_TOP_ADDR
`define SHARED_MEM_TOP_ADDR 8'hFF `define SHARED_MEM_TOP_ADDR 8'hFE
`endif
`ifndef IO_BUS_ADDR
`define IO_BUS_ADDR 32'h00010000
`endif `endif
`ifndef STACK_BASE_ADDR `ifndef STACK_BASE_ADDR
`define STACK_BASE_ADDR 20'h6ffff `define STACK_BASE_ADDR 20'h6FFFF
`endif
`ifndef IO_BUS_BASE_ADDR
`define IO_BUS_BASE_ADDR 32'hFFFFFF00
`endif
`ifndef IO_BUS_ADDR_COUT
`define IO_BUS_ADDR_COUT 32'hFFFFFFFC
`endif `endif
`ifndef L2_ENABLE `ifndef L2_ENABLE

View file

@ -1,6 +1,6 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_csr_data #( module VX_csr_data #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, // Clock input wire clk, // Clock

48
hw/rtl/VX_dcache_io_arb.v Normal file
View file

@ -0,0 +1,48 @@
`include "VX_define.vh"
module VX_dcache_io_arb (
input wire io_select,
// Core request
VX_cache_core_req_if core_req_if,
// Dcache request
VX_cache_core_req_if dcache_core_req_if,
// I/O request
VX_cache_core_req_if io_core_req_if,
// Dcache response
VX_cache_core_rsp_if dcache_core_rsp_if,
// I/O response
VX_cache_core_rsp_if io_core_rsp_if,
// Core response
VX_cache_core_rsp_if core_rsp_if
);
assign dcache_core_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{~io_select}};
assign dcache_core_req_if.core_req_read = core_req_if.core_req_read;
assign dcache_core_req_if.core_req_write = core_req_if.core_req_write;
assign dcache_core_req_if.core_req_addr = core_req_if.core_req_addr;
assign dcache_core_req_if.core_req_data = core_req_if.core_req_data;
assign dcache_core_req_if.core_req_tag = core_req_if.core_req_tag;
assign io_core_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{io_select}};
assign io_core_req_if.core_req_read = core_req_if.core_req_read;
assign io_core_req_if.core_req_write = core_req_if.core_req_write;
assign io_core_req_if.core_req_addr = core_req_if.core_req_addr;
assign io_core_req_if.core_req_data = core_req_if.core_req_data;
assign io_core_req_if.core_req_tag = core_req_if.core_req_tag;
assign core_req_if.core_req_ready = io_select ? io_core_req_if.core_req_ready : dcache_core_req_if.core_req_ready;
wire dcache_rsp_valid = (|dcache_core_rsp_if.core_rsp_valid);
assign core_rsp_if.core_rsp_valid = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_valid : io_core_rsp_if.core_rsp_valid;
assign core_rsp_if.core_rsp_data = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_data : io_core_rsp_if.core_rsp_data;
assign core_rsp_if.core_rsp_tag = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_tag : io_core_rsp_if.core_rsp_tag;
assign dcache_core_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready;
assign io_core_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready && ~dcache_rsp_valid;
endmodule

View file

@ -140,8 +140,8 @@ module VX_decode(
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG; assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
// MEM signals // MEM signals
assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `WORD_SEL_NO; assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `BYTE_EN_NO;
assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `WORD_SEL_NO; assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `BYTE_EN_NO;
// UPPER IMMEDIATE // UPPER IMMEDIATE
always @(*) begin always @(*) begin

View file

@ -127,75 +127,68 @@
`define ZERO_REG 5'h0 `define ZERO_REG 5'h0
///////////////////////////////////////////////////////////////////////////////
// Core request tag width pc, wb, rd, warp_num
`define CORE_REQ_TAG_WIDTH (32 + 2 + 5 + `NW_BITS)
// TAG sharing enable rd, warp_num
`define CORE_TAG_ID_BITS (5 + `NW_BITS)
////////////////////////// Dcache Configurable Knobs ////////////////////////// ////////////////////////// Dcache Configurable Knobs //////////////////////////
// Function ID
`define DFUNC_ID 0
// DRAM request data bits // DRAM request data bits
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8) `define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE)) `define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
// DRAM request tag bits // DRAM request tag bits
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH `define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
////////////////////////// Icache Configurable Knobs ////////////////////////// ////////////////////////// Icache Configurable Knobs //////////////////////////
// Function ID
`define IFUNC_ID 1
// DRAM request data bits // DRAM request data bits
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8) `define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE)) `define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
// DRAM request tag bits // DRAM request tag bits
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH `define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
////////////////////////// SM Configurable Knobs ////////////////////////////// ////////////////////////// SM Configurable Knobs //////////////////////////////
// Function ID
`define SFUNC_ID 2
// DRAM request data bits // DRAM request data bits
`define SDRAM_LINE_WIDTH (`SBANK_LINE_SIZE * 8) `define SDRAM_LINE_WIDTH (`SBANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE)) `define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
// DRAM request tag bits // DRAM request tag bits
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH `define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
////////////////////////// L2cache Configurable Knobs ///////////////////////// ////////////////////////// L2cache Configurable Knobs /////////////////////////
// Function ID
`define L2FUNC_ID 3
// DRAM request data bits // DRAM request data bits
`define L2DRAM_LINE_WIDTH (`L2BANK_LINE_SIZE * 8) `define L2DRAM_LINE_WIDTH (`L2BANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE)) `define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
// DRAM request tag bits // DRAM request tag bits
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2))) `define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
////////////////////////// L3cache Configurable Knobs ///////////////////////// ////////////////////////// L3cache Configurable Knobs /////////////////////////
// Function ID
`define L3FUNC_ID 3
// DRAM request data bits // DRAM request data bits
`define L3DRAM_LINE_WIDTH (`L3BANK_LINE_SIZE * 8) `define L3DRAM_LINE_WIDTH (`L3BANK_LINE_SIZE * 8)
// DRAM request address bits // DRAM request address bits
`define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3BANK_LINE_SIZE)) `define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3BANK_LINE_SIZE))
// DRAM request tag bits // DRAM request tag bits
`define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH) `define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
// VX_DEFINE // VX_DEFINE
`endif `endif

View file

@ -4,86 +4,46 @@ module VX_dmem_ctrl (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Core <-> Dcache
VX_cache_core_req_if dcache_core_req_if,
VX_cache_core_rsp_if dcache_core_rsp_if,
// Dram <-> Dcache // Dram <-> Dcache
VX_cache_dram_req_if dcache_dram_req_if, VX_cache_dram_req_if dcache_dram_req_if,
VX_cache_dram_rsp_if dcache_dram_rsp_if, VX_cache_dram_rsp_if dcache_dram_rsp_if,
VX_cache_snp_req_if dcache_snp_req_if, VX_cache_snp_req_if dcache_snp_req_if,
// Core <-> Icache
VX_cache_core_req_if icache_core_req_if,
VX_cache_core_rsp_if icache_core_rsp_if,
// Dram <-> Icache // Dram <-> Icache
VX_cache_dram_req_if icache_dram_req_if, VX_cache_dram_req_if icache_dram_req_if,
VX_cache_dram_rsp_if icache_dram_rsp_if, VX_cache_dram_rsp_if icache_dram_rsp_if
VX_cache_snp_req_if icache_snp_req_if,
// Core <-> Dcache
VX_cache_core_rsp_if dcache_core_rsp_if,
VX_cache_core_req_if dcache_core_req_if,
// Core <-> Icache
VX_cache_core_rsp_if icache_core_rsp_if,
VX_cache_core_req_if icache_core_req_if
); );
VX_cache_core_req_if #( VX_cache_core_req_if #(
.NUM_REQUESTS(`DNUM_REQUESTS), .NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE), .WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH) .CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
) dcache_req_smem_if(); .CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_req_qual_if(), smem_core_req_if();
VX_cache_core_rsp_if #( VX_cache_core_rsp_if #(
.NUM_REQUESTS(`DNUM_REQUESTS), .NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE), .WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH) .CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
) dcache_rsp_smem_if(); .CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_rsp_qual_if(), smem_core_rsp_if();
VX_cache_core_req_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_req_dcache_if();
VX_cache_core_rsp_if #( VX_dcache_io_arb dcache_io_arb (
.NUM_REQUESTS(`DNUM_REQUESTS), .io_select (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR),
.WORD_SIZE(`DWORD_SIZE), .core_req_if (dcache_core_req_if),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH) .dcache_core_req_if (dcache_core_req_qual_if),
) dcache_rsp_dcache_if(); .io_core_req_if (smem_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_qual_if),
wire to_shm = (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR); .io_core_rsp_if (smem_core_rsp_if),
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_rsp_valid); .core_rsp_if (dcache_core_rsp_if)
);
// Dcache Request
assign dcache_req_dcache_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~to_shm}};
assign dcache_req_dcache_if.core_req_read = dcache_core_req_if.core_req_read;
assign dcache_req_dcache_if.core_req_write = dcache_core_req_if.core_req_write;
assign dcache_req_dcache_if.core_req_addr = dcache_core_req_if.core_req_addr;
assign dcache_req_dcache_if.core_req_data = dcache_core_req_if.core_req_data;
assign dcache_req_dcache_if.core_req_tag = dcache_core_req_if.core_req_tag;
assign dcache_rsp_dcache_if.core_rsp_ready = dcache_core_rsp_if.core_rsp_ready;
// Shared Memory Request
assign dcache_req_smem_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{to_shm}};
assign dcache_req_smem_if.core_req_addr = dcache_core_req_if.core_req_addr;
assign dcache_req_smem_if.core_req_data = dcache_core_req_if.core_req_data;
assign dcache_req_smem_if.core_req_read = dcache_core_req_if.core_req_read;
assign dcache_req_smem_if.core_req_write = dcache_core_req_if.core_req_write;
assign dcache_req_smem_if.core_req_tag = dcache_core_req_if.core_req_tag;
assign dcache_core_req_if.core_req_ready = to_shm ? dcache_req_smem_if.core_req_ready : dcache_req_dcache_if.core_req_ready;
// Dcache Response
assign dcache_core_rsp_if.core_rsp_valid = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_valid : dcache_rsp_smem_if.core_rsp_valid;
assign dcache_core_rsp_if.core_rsp_data = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_data : dcache_rsp_smem_if.core_rsp_data;
assign dcache_core_rsp_if.core_rsp_tag = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_tag : dcache_rsp_smem_if.core_rsp_tag;
assign dcache_rsp_smem_if.core_rsp_ready = dcache_core_rsp_if.core_rsp_ready && ~dcache_wants_wb;
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) smem_dram_req_if();
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) smem_dram_rsp_if();
VX_cache #( VX_cache #(
.CACHE_SIZE (`SCACHE_SIZE), .CACHE_SIZE (`SCACHE_SIZE),
@ -92,7 +52,6 @@ module VX_dmem_ctrl (
.WORD_SIZE (`SWORD_SIZE), .WORD_SIZE (`SWORD_SIZE),
.NUM_REQUESTS (`SNUM_REQUESTS), .NUM_REQUESTS (`SNUM_REQUESTS),
.STAGE_1_CYCLES (`SSTAGE_1_CYCLES), .STAGE_1_CYCLES (`SSTAGE_1_CYCLES),
.FUNC_ID (`SFUNC_ID),
.REQQ_SIZE (`SREQQ_SIZE), .REQQ_SIZE (`SREQQ_SIZE),
.MRVQ_SIZE (`SMRVQ_SIZE), .MRVQ_SIZE (`SMRVQ_SIZE),
.DFPQ_SIZE (`SDFPQ_SIZE), .DFPQ_SIZE (`SDFPQ_SIZE),
@ -105,54 +64,56 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`SPRFQ_SIZE), .PRFQ_SIZE (`SPRFQ_SIZE),
.PRFQ_STRIDE (`SPRFQ_STRIDE), .PRFQ_STRIDE (`SPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.DRAM_ENABLE (0),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH), .CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
) gpu_smem ( ) gpu_smem (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
// Core request // Core request
.core_req_valid (dcache_req_smem_if.core_req_valid), .core_req_valid (smem_core_req_if.core_req_valid),
.core_req_read (dcache_req_smem_if.core_req_read), .core_req_read (smem_core_req_if.core_req_read),
.core_req_write (dcache_req_smem_if.core_req_write), .core_req_write (smem_core_req_if.core_req_write),
.core_req_addr (dcache_req_smem_if.core_req_addr), .core_req_addr (smem_core_req_if.core_req_addr),
.core_req_data (dcache_req_smem_if.core_req_data), .core_req_data (smem_core_req_if.core_req_data),
.core_req_tag (dcache_req_smem_if.core_req_tag), .core_req_tag (smem_core_req_if.core_req_tag),
.core_req_ready (dcache_req_smem_if.core_req_ready), .core_req_ready (smem_core_req_if.core_req_ready),
// Core response // Core response
.core_rsp_valid (dcache_rsp_smem_if.core_rsp_valid), .core_rsp_valid (smem_core_rsp_if.core_rsp_valid),
.core_rsp_data (dcache_rsp_smem_if.core_rsp_data), .core_rsp_data (smem_core_rsp_if.core_rsp_data),
.core_rsp_tag (dcache_rsp_smem_if.core_rsp_tag), .core_rsp_tag (smem_core_rsp_if.core_rsp_tag),
.core_rsp_ready (dcache_rsp_smem_if.core_rsp_ready), .core_rsp_ready (smem_core_rsp_if.core_rsp_ready),
`IGNORE_WARNINGS_BEGIN
// DRAM request // DRAM request
.dram_req_read (smem_dram_req_if.dram_req_read), .dram_req_read (),
.dram_req_write (smem_dram_req_if.dram_req_write), .dram_req_write (),
.dram_req_addr (smem_dram_req_if.dram_req_addr), .dram_req_addr (),
.dram_req_data (smem_dram_req_if.dram_req_data), .dram_req_data (),
.dram_req_tag (smem_dram_req_if.dram_req_tag), .dram_req_tag (),
.dram_req_ready (smem_dram_req_if.dram_req_ready), .dram_req_ready (0),
// DRAM response // DRAM response
.dram_rsp_valid (smem_dram_rsp_if.dram_rsp_valid), .dram_rsp_valid (0),
.dram_rsp_data (smem_dram_rsp_if.dram_rsp_data), .dram_rsp_data (0),
.dram_rsp_tag (smem_dram_rsp_if.dram_rsp_tag), .dram_rsp_tag (0),
.dram_rsp_ready (smem_dram_rsp_if.dram_rsp_ready), .dram_rsp_ready (),
// Snoop Request // Snoop request
.snp_req_valid (0), .snp_req_valid (0),
.snp_req_addr (0), .snp_req_addr (0),
`IGNORE_WARNINGS_BEGIN
.snp_req_ready (), .snp_req_ready (),
`IGNORE_WARNINGS_END
// Snoop Forward // Snoop forwarding
`IGNORE_WARNINGS_BEGIN
.snp_fwd_valid (), .snp_fwd_valid (),
.snp_fwd_addr (), .snp_fwd_addr (),
`IGNORE_WARNINGS_END
.snp_fwd_ready (0) .snp_fwd_ready (0)
`IGNORE_WARNINGS_END
); );
VX_cache #( VX_cache #(
@ -162,7 +123,6 @@ module VX_dmem_ctrl (
.WORD_SIZE (`DWORD_SIZE), .WORD_SIZE (`DWORD_SIZE),
.NUM_REQUESTS (`DNUM_REQUESTS), .NUM_REQUESTS (`DNUM_REQUESTS),
.STAGE_1_CYCLES (`DSTAGE_1_CYCLES), .STAGE_1_CYCLES (`DSTAGE_1_CYCLES),
.FUNC_ID (`DFUNC_ID),
.REQQ_SIZE (`DREQQ_SIZE), .REQQ_SIZE (`DREQQ_SIZE),
.MRVQ_SIZE (`DMRVQ_SIZE), .MRVQ_SIZE (`DMRVQ_SIZE),
.DFPQ_SIZE (`DDFPQ_SIZE), .DFPQ_SIZE (`DDFPQ_SIZE),
@ -175,26 +135,30 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`DPRFQ_SIZE), .PRFQ_SIZE (`DPRFQ_SIZE),
.PRFQ_STRIDE (`DPRFQ_STRIDE), .PRFQ_STRIDE (`DPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH), .CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
) gpu_dcache ( ) gpu_dcache (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
// Core req // Core req
.core_req_valid (dcache_req_dcache_if.core_req_valid), .core_req_valid (dcache_core_req_qual_if.core_req_valid),
.core_req_read (dcache_req_dcache_if.core_req_read), .core_req_read (dcache_core_req_qual_if.core_req_read),
.core_req_write (dcache_req_dcache_if.core_req_write), .core_req_write (dcache_core_req_qual_if.core_req_write),
.core_req_addr (dcache_req_dcache_if.core_req_addr), .core_req_addr (dcache_core_req_qual_if.core_req_addr),
.core_req_data (dcache_req_dcache_if.core_req_data), .core_req_data (dcache_core_req_qual_if.core_req_data),
.core_req_tag (dcache_req_dcache_if.core_req_tag), .core_req_tag (dcache_core_req_qual_if.core_req_tag),
.core_req_ready (dcache_req_dcache_if.core_req_ready), .core_req_ready (dcache_core_req_qual_if.core_req_ready),
// Core response // Core response
.core_rsp_valid (dcache_rsp_dcache_if.core_rsp_valid), .core_rsp_valid (dcache_core_rsp_qual_if.core_rsp_valid),
.core_rsp_data (dcache_rsp_dcache_if.core_rsp_data), .core_rsp_data (dcache_core_rsp_qual_if.core_rsp_data),
.core_rsp_tag (dcache_rsp_dcache_if.core_rsp_tag), .core_rsp_tag (dcache_core_rsp_qual_if.core_rsp_tag),
.core_rsp_ready (dcache_rsp_dcache_if.core_rsp_ready), .core_rsp_ready (dcache_core_rsp_qual_if.core_rsp_ready),
// DRAM request // DRAM request
.dram_req_read (dcache_dram_req_if.dram_req_read), .dram_req_read (dcache_dram_req_if.dram_req_read),
@ -214,13 +178,13 @@ module VX_dmem_ctrl (
.snp_req_valid (dcache_snp_req_if.snp_req_valid), .snp_req_valid (dcache_snp_req_if.snp_req_valid),
.snp_req_addr (dcache_snp_req_if.snp_req_addr), .snp_req_addr (dcache_snp_req_if.snp_req_addr),
.snp_req_ready (dcache_snp_req_if.snp_req_ready), .snp_req_ready (dcache_snp_req_if.snp_req_ready),
// Snoop Forward
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
// Snoop Forward
.snp_fwd_valid (), .snp_fwd_valid (),
.snp_fwd_addr (), .snp_fwd_addr (),
`IGNORE_WARNINGS_END
.snp_fwd_ready (0) .snp_fwd_ready (0)
`IGNORE_WARNINGS_END
); );
VX_cache #( VX_cache #(
@ -230,7 +194,6 @@ module VX_dmem_ctrl (
.WORD_SIZE (`IWORD_SIZE), .WORD_SIZE (`IWORD_SIZE),
.NUM_REQUESTS (`INUM_REQUESTS), .NUM_REQUESTS (`INUM_REQUESTS),
.STAGE_1_CYCLES (`ISTAGE_1_CYCLES), .STAGE_1_CYCLES (`ISTAGE_1_CYCLES),
.FUNC_ID (`IFUNC_ID),
.REQQ_SIZE (`IREQQ_SIZE), .REQQ_SIZE (`IREQQ_SIZE),
.MRVQ_SIZE (`IMRVQ_SIZE), .MRVQ_SIZE (`IMRVQ_SIZE),
.DFPQ_SIZE (`IDFPQ_SIZE), .DFPQ_SIZE (`IDFPQ_SIZE),
@ -243,7 +206,11 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`IPRFQ_SIZE), .PRFQ_SIZE (`IPRFQ_SIZE),
.PRFQ_STRIDE (`IPRFQ_STRIDE), .PRFQ_STRIDE (`IPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.DRAM_ENABLE (1),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH), .CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
) gpu_icache ( ) gpu_icache (
.clk (clk), .clk (clk),
@ -278,17 +245,17 @@ module VX_dmem_ctrl (
.dram_rsp_tag (icache_dram_rsp_if.dram_rsp_tag), .dram_rsp_tag (icache_dram_rsp_if.dram_rsp_tag),
.dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready), .dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready),
`IGNORE_WARNINGS_BEGIN
// Snoop Request // Snoop Request
.snp_req_valid (icache_snp_req_if.snp_req_valid), .snp_req_valid (0),
.snp_req_addr (icache_snp_req_if.snp_req_addr), .snp_req_addr (0),
.snp_req_ready (icache_snp_req_if.snp_req_ready), .snp_req_ready (),
// Snoop Forward // Snoop Forward
`IGNORE_WARNINGS_BEGIN
.snp_fwd_valid (), .snp_fwd_valid (),
.snp_fwd_addr (), .snp_fwd_addr (),
`IGNORE_WARNINGS_END
.snp_fwd_ready (0) .snp_fwd_ready (0)
`IGNORE_WARNINGS_END
); );
endmodule endmodule

View file

@ -26,8 +26,8 @@ module VX_gpr_stage (
`DEBUG_BEGIN `DEBUG_BEGIN
wire[31:0] curr_PC = bckE_req_if.curr_PC; wire[31:0] curr_PC = bckE_req_if.curr_PC;
wire[2:0] branchType = bckE_req_if.branch_type; wire[2:0] branchType = bckE_req_if.branch_type;
wire is_store = (bckE_req_if.mem_write != `WORD_SEL_NO); wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `WORD_SEL_NO); wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
wire jalQual = bckE_req_if.jalQual; wire jalQual = bckE_req_if.jalQual;
`DEBUG_END `DEBUG_END

View file

@ -16,14 +16,14 @@ module VX_icache_stage (
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0]; reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
wire valid_inst = (|fe_inst_meta_fi.valid); wire valid_inst = (|fe_inst_meta_fi.valid);
// Icache Request // Icache Request
assign icache_req_if.core_req_valid = valid_inst && !total_freeze; assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc; assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
assign icache_req_if.core_req_data = 32'b0; assign icache_req_if.core_req_data = 32'b0;
assign icache_req_if.core_req_read = `WORD_SEL_LW; assign icache_req_if.core_req_read = `BYTE_EN_LW;
assign icache_req_if.core_req_write = `WORD_SEL_NO; assign icache_req_if.core_req_write = `BYTE_EN_NO;
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num}; assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num};
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN

View file

@ -16,7 +16,7 @@ module VX_inst_multiplex (
wire[`NUM_THREADS-1:0] is_gpu_mask; wire[`NUM_THREADS-1:0] is_gpu_mask;
wire[`NUM_THREADS-1:0] is_csr_mask; wire[`NUM_THREADS-1:0] is_csr_mask;
wire is_mem = (bckE_req_if.mem_write != `WORD_SEL_NO) || (bckE_req_if.mem_read != `WORD_SEL_NO); wire is_mem = (bckE_req_if.mem_write != `BYTE_EN_NO) || (bckE_req_if.mem_read != `BYTE_EN_NO);
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split); wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
wire is_csr = bckE_req_if.is_csr; wire is_csr = bckE_req_if.is_csr;
// wire is_gpu = 0; // wire is_gpu = 0;

View file

@ -24,18 +24,15 @@ module VX_lsu_unit (
.address (address) .address (address)
); );
wire[`NUM_THREADS-1:0][31:0] use_address; wire[`NUM_THREADS-1:0][31:0] use_address;
wire[`NUM_THREADS-1:0][31:0] use_store_data; wire[`NUM_THREADS-1:0][31:0] use_store_data;
wire[`NUM_THREADS-1:0] use_valid; wire[`NUM_THREADS-1:0] use_valid;
wire[`WORD_SEL_BITS-1:0] use_mem_read; wire[`BYTE_EN_BITS-1:0] use_mem_read;
wire[`WORD_SEL_BITS-1:0] use_mem_write; wire[`BYTE_EN_BITS-1:0] use_mem_write;
wire[4:0] use_rd; wire[4:0] use_rd;
wire[`NW_BITS-1:0] use_warp_num; wire[`NW_BITS-1:0] use_warp_num;
wire[1:0] use_wb; wire[1:0] use_wb;
wire[31:0] use_pc; wire[31:0] use_pc;
wire[(`LOG2UP(`NUM_THREADS))-1:0] tag_index;
wire zero = 0;
VX_generic_register #( VX_generic_register #(
.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65) .N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)
@ -43,7 +40,7 @@ module VX_lsu_unit (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(delay), .stall(delay),
.flush(zero), .flush(0),
.in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}), .in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}),
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc }) .out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
); );
@ -54,36 +51,25 @@ module VX_lsu_unit (
assign dcache_req_if.core_req_write = {`NUM_THREADS{use_mem_write}}; assign dcache_req_if.core_req_write = {`NUM_THREADS{use_mem_write}};
assign dcache_req_if.core_req_addr = use_address; assign dcache_req_if.core_req_addr = use_address;
assign dcache_req_if.core_req_data = use_store_data; assign dcache_req_if.core_req_data = use_store_data;
assign dcache_req_if.core_req_tag = {`NUM_THREADS{use_pc, use_wb, use_rd, use_warp_num}}; assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num};
assign delay = ~dcache_req_if.core_req_ready; assign delay = ~dcache_req_if.core_req_ready;
// Core Response // Core Response
assign mem_wb_if.valid = dcache_rsp_if.core_rsp_valid; assign mem_wb_if.valid = dcache_rsp_if.core_rsp_valid;
assign mem_wb_if.data = dcache_rsp_if.core_rsp_data; assign mem_wb_if.data = dcache_rsp_if.core_rsp_data;
assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem; assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem;
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag[tag_index]; assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag;
// select first valid entry in tag array
VX_generic_priority_encoder #(
.N(`NUM_THREADS)
) tag_select (
.valids(dcache_rsp_if.core_rsp_valid),
.index (tag_index),
`IGNORE_WARNINGS_BEGIN
.found ()
`IGNORE_WARNINGS_END
);
/*always_comb begin /*always_comb begin
if (1'($time & 1) && dcache_req_if.core_req_ready && |dcache_req_if.core_req_valid) begin if (1'($time & 1) && dcache_req_if.core_req_ready && |dcache_req_if.core_req_valid) begin
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_rd, use_warp_num, use_store_data); $display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
end end
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && |dcache_rsp_if.core_rsp_valid) begin if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && |dcache_rsp_if.core_rsp_valid) begin
$display("*** %t: D$ rsp: valid=%b, rd=%d, warp=%d, data=%0h", $time, dcache_rsp_if.core_rsp_valid, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data); $display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
end end
end*/ end*/
endmodule // Memory endmodule

View file

@ -25,8 +25,8 @@ module VX_scheduler (
wire rs2_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0; wire rs2_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0;
wire rd_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0; wire rd_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0;
wire is_store = (bckE_req_if.mem_write != `WORD_SEL_NO); wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `WORD_SEL_NO); wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
// classify our next instruction. // classify our next instruction.
wire is_mem = is_store || is_load; wire is_mem = is_store || is_load;

View file

@ -148,7 +148,7 @@ module VX_warp_sched (
end end
if (is_barrier) begin if (is_barrier) begin
warp_stalled[barrier_warp_num] <= 0; warp_stalled[barrier_warp_num] <= 0;
if (reached_barrier_limit) begin if (reached_barrier_limit) begin
barrier_stall_mask[barrier_id] <= 0; barrier_stall_mask[barrier_id] <= 0;
end else begin end else begin

View file

@ -41,10 +41,20 @@ module Vortex #(
input wire [`DDRAM_ADDR_WIDTH-1:0] llc_snp_req_addr, input wire [`DDRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready, output wire llc_snp_req_ready,
// I/O // I/O request
output wire io_valid, output wire io_req_read,
output wire [31:0] io_data, output wire io_req_write,
input wire io_ready, output wire[31:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`BYTE_EN_BITS-1:0] io_req_byteen,
output wire[`CORE_REQ_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire[31:0] io_rsp_data,
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug // Debug
output wire ebreak output wire ebreak
@ -62,20 +72,16 @@ module Vortex #(
VX_cache_core_req_if #( VX_cache_core_req_if #(
.NUM_REQUESTS(`DNUM_REQUESTS), .NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE), .WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH) .CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
) dcache_core_req_if(); .CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_req_if(), io_core_req_if(), dcache_io_core_req_if();
VX_cache_core_req_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_core_req_qual_if();
VX_cache_core_rsp_if #( VX_cache_core_rsp_if #(
.NUM_REQUESTS(`DNUM_REQUESTS), .NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE), .WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH) .CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
) dcache_core_rsp_if(); .CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_rsp_if(), io_core_rsp_if(), dcache_io_core_rsp_if();
VX_cache_dram_req_if #( VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH), .DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
@ -100,29 +106,32 @@ module Vortex #(
assign dcache_dram_rsp_if.dram_rsp_tag = D_dram_rsp_tag; assign dcache_dram_rsp_if.dram_rsp_tag = D_dram_rsp_tag;
assign D_dram_rsp_ready = dcache_dram_rsp_if.dram_rsp_ready; assign D_dram_rsp_ready = dcache_dram_rsp_if.dram_rsp_ready;
wire to_io_bus = (dcache_core_req_if.core_req_addr[0] == `IO_BUS_ADDR); assign io_req_read = (io_core_req_if.core_req_read[0] != `BYTE_EN_NO);
assign io_valid = |dcache_core_req_if.core_req_valid && to_io_bus; assign io_req_write = (io_core_req_if.core_req_write[0] != `BYTE_EN_NO);
assign io_data = dcache_core_req_if.core_req_data[0]; assign io_req_addr = io_core_req_if.core_req_addr[0];
assign io_req_data = io_core_req_if.core_req_data[0];
assign io_req_byteen = io_req_read ? io_core_req_if.core_req_read[0] : io_core_req_if.core_req_write[0];
assign io_req_tag = io_core_req_if.core_req_tag[0];
assign io_core_req_if.core_req_ready = io_req_ready;
assign dcache_core_req_qual_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~to_io_bus}}; assign io_core_rsp_if.core_rsp_valid[0] = io_rsp_valid;
assign dcache_core_req_qual_if.core_req_read = dcache_core_req_if.core_req_read; assign io_core_rsp_if.core_rsp_data[0] = io_rsp_data;
assign dcache_core_req_qual_if.core_req_write = dcache_core_req_if.core_req_write; assign io_core_rsp_if.core_rsp_tag = io_rsp_tag;
assign dcache_core_req_qual_if.core_req_addr = dcache_core_req_if.core_req_addr; assign io_rsp_ready = io_core_rsp_if.core_rsp_ready;
assign dcache_core_req_qual_if.core_req_data = dcache_core_req_if.core_req_data;
assign dcache_core_req_qual_if.core_req_tag = dcache_core_req_if.core_req_tag;
assign dcache_core_req_if.core_req_ready = to_io_bus ? io_ready : dcache_core_req_qual_if.core_req_ready;
// Icache interfaces // Icache interfaces
VX_cache_core_req_if #( VX_cache_core_req_if #(
.NUM_REQUESTS(`INUM_REQUESTS), .NUM_REQUESTS(`INUM_REQUESTS),
.WORD_SIZE(`IWORD_SIZE), .WORD_SIZE(`IWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH) .CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) icache_core_req_if(); ) icache_core_req_if();
VX_cache_core_rsp_if #( VX_cache_core_rsp_if #(
.NUM_REQUESTS(`INUM_REQUESTS), .NUM_REQUESTS(`INUM_REQUESTS),
.WORD_SIZE(`IWORD_SIZE), .WORD_SIZE(`IWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH) .CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) icache_core_rsp_if(); ) icache_core_rsp_if();
VX_cache_dram_req_if #( VX_cache_dram_req_if #(
@ -162,7 +171,6 @@ VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch
VX_warp_ctl_if warp_ctl_if(); VX_warp_ctl_if warp_ctl_if();
// Cache snooping // Cache snooping
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`IDRAM_ADDR_WIDTH)) icache_snp_req_if();
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if(); VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if();
assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid; assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
@ -203,9 +211,9 @@ VX_back_end #(
.warp_ctl_if (warp_ctl_if), .warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if), .bckE_req_if (bckE_req_if),
.jal_rsp_if (jal_rsp_if), .jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if), .branch_rsp_if (branch_rsp_if),
.dcache_rsp_if (dcache_core_rsp_if), .dcache_req_if (dcache_io_core_req_if),
.dcache_req_if (dcache_core_req_if), .dcache_rsp_if (dcache_io_core_rsp_if),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.mem_delay (memory_delay), .mem_delay (memory_delay),
.exec_delay (exec_delay), .exec_delay (exec_delay),
@ -216,23 +224,32 @@ VX_dmem_ctrl dmem_ctrl (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
// Core <-> Dcache
.dcache_core_req_if (dcache_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if),
// Dram <-> Dcache // Dram <-> Dcache
.dcache_dram_req_if (dcache_dram_req_if), .dcache_dram_req_if (dcache_dram_req_if),
.dcache_dram_rsp_if (dcache_dram_rsp_if), .dcache_dram_rsp_if (dcache_dram_rsp_if),
.dcache_snp_req_if (dcache_snp_req_if), .dcache_snp_req_if (dcache_snp_req_if),
// Dram <-> Icache
.icache_dram_req_if (icache_dram_req_if),
.icache_dram_rsp_if (icache_dram_rsp_if),
.icache_snp_req_if (icache_snp_req_if),
// Core <-> Icache // Core <-> Icache
.icache_core_req_if (icache_core_req_if), .icache_core_req_if (icache_core_req_if),
.icache_core_rsp_if (icache_core_rsp_if), .icache_core_rsp_if (icache_core_rsp_if),
// Core <-> Dcache // Dram <-> Icache
.dcache_core_req_if (dcache_core_req_qual_if), .icache_dram_req_if (icache_dram_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if) .icache_dram_rsp_if (icache_dram_rsp_if)
);
VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR),
.core_req_if (dcache_io_core_req_if),
.dcache_core_req_if (dcache_core_req_if),
.io_core_req_if (io_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if),
.io_core_rsp_if (io_core_rsp_if),
.core_rsp_if (dcache_io_core_rsp_if)
); );
endmodule // Vortex endmodule // Vortex

View file

@ -27,15 +27,24 @@ module Vortex_Cluster #(
input wire[`L2DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr, input wire[`L2DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready, output wire llc_snp_req_ready,
// IO // I/O request
output wire io_valid, output wire io_req_read,
output wire [31:0] io_data, output wire io_req_write,
input wire io_ready, output wire[31:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`BYTE_EN_BITS-1:0] io_req_byteen,
output wire[`CORE_REQ_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire[31:0] io_rsp_data,
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug // Debug
output wire ebreak output wire ebreak
); );
// DRAM Dcache Req
wire[`NUM_CORES-1:0] per_core_D_dram_req_read; wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
wire[`NUM_CORES-1:0] per_core_D_dram_req_write; wire[`NUM_CORES-1:0] per_core_D_dram_req_write;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr;
@ -43,42 +52,39 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag; wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_req_ready; wire[`NUM_CORES-1:0] per_core_D_dram_req_ready;
// DRAM Dcache Rsp
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid; wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data; wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag; wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready; wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready;
// DRAM Icache Req
wire[`NUM_CORES-1:0] per_core_I_dram_req_read; wire[`NUM_CORES-1:0] per_core_I_dram_req_read;
wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr; wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data; wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag; wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_req_ready; wire[`NUM_CORES-1:0] per_core_I_dram_req_ready;
// DRAM Icache Rsp
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid; wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data; wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag; wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready; wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
// Snooping
wire snp_fwd_valid; wire snp_fwd_valid;
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr; wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready; wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire[`NUM_CORES-1:0] per_core_io_valid; wire[`NUM_CORES-1:0] per_core_io_req_read;
wire[`NUM_CORES-1:0][31:0] per_core_io_data; wire[`NUM_CORES-1:0] per_core_io_req_write;
wire[`NUM_CORES-1:0][31:0] per_core_io_req_addr;
wire[`NUM_CORES-1:0][31:0] per_core_io_req_data;
wire[`NUM_CORES-1:0][`BYTE_EN_BITS-1:0] per_core_io_req_byteen;
wire[`NUM_CORES-1:0][`CORE_REQ_TAG_WIDTH-1:0] per_core_io_req_tag;
wire[`NUM_CORES-1:0] per_core_io_rsp_ready;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
// ebreak
wire[`NUM_CORES-1:0] per_core_ebreak; wire[`NUM_CORES-1:0] per_core_ebreak;
assign io_valid = per_core_io_valid[0];
assign io_data = per_core_io_data[0];
assign ebreak = (& per_core_ebreak);
genvar i; genvar i;
for (i = 0; i < `NUM_CORES; i = i + 1) begin for (i = 0; i < `NUM_CORES; i = i + 1) begin
Vortex #( Vortex #(
@ -97,6 +103,7 @@ module Vortex_Cluster #(
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]), .D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]), .D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
.I_dram_req_read (per_core_I_dram_req_read [i]), .I_dram_req_read (per_core_I_dram_req_read [i]),
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
.I_dram_req_write (), .I_dram_req_write (),
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
@ -107,24 +114,47 @@ module Vortex_Cluster #(
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]), .I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]), .I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]), .I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]), .I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid), .llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr), .llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]), .llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.io_valid (per_core_io_valid [i]),
.io_data (per_core_io_data [i]), .io_req_read (per_core_io_req_read [i]),
.io_ready (io_ready), .io_req_write (per_core_io_req_write [i]),
.io_req_addr (per_core_io_req_addr [i]),
.io_req_data (per_core_io_req_data [i]),
.io_req_byteen (per_core_io_req_byteen [i]),
.io_req_tag (per_core_io_req_tag [i]),
.io_req_ready (io_req_ready),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (per_core_io_rsp_ready [i]),
.ebreak (per_core_ebreak [i]) .ebreak (per_core_ebreak [i])
); );
end end
assign io_req_read = per_core_io_req_read[0];
assign io_req_write = per_core_io_req_write[0];
assign io_req_addr = per_core_io_req_addr[0];
assign io_req_data = per_core_io_req_data[0];
assign io_req_byteen = per_core_io_req_byteen[0];
assign io_req_tag = per_core_io_req_tag[0];
assign io_rsp_ready = per_core_io_rsp_ready[0];
assign ebreak = (& per_core_ebreak);
if (`L2_ENABLE) begin if (`L2_ENABLE) begin
// L2 Cache /////////////////////////////////////////////////////////// // L2 Cache ///////////////////////////////////////////////////////////
wire[`L2NUM_REQUESTS-1:0] l2_core_req_valid; wire[`L2NUM_REQUESTS-1:0] l2_core_req_valid;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_write; wire[`L2NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] l2_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_read; wire[`L2NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] l2_core_req_read;
wire[`L2NUM_REQUESTS-1:0][31:0] l2_core_req_addr; wire[`L2NUM_REQUESTS-1:0][31:0] l2_core_req_addr;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_req_tag; wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_req_tag;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_req_data; wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_req_data;
@ -139,11 +169,11 @@ module Vortex_Cluster #(
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]); assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
assign l2_core_req_valid [i+1] = per_core_I_dram_req_read[(i/2)]; assign l2_core_req_valid [i+1] = per_core_I_dram_req_read[(i/2)];
assign l2_core_req_read [i] = per_core_D_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO; assign l2_core_req_read [i] = per_core_D_dram_req_read[(i/2)] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l2_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO; assign l2_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l2_core_req_write [i] = per_core_D_dram_req_write[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO; assign l2_core_req_write [i] = per_core_D_dram_req_write[(i/2)] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l2_core_req_write [i+1] = `WORD_SEL_NO; assign l2_core_req_write [i+1] = `BYTE_EN_NO;
assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}}; assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}}; assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}};
@ -177,7 +207,6 @@ module Vortex_Cluster #(
.WORD_SIZE (`L2WORD_SIZE), .WORD_SIZE (`L2WORD_SIZE),
.NUM_REQUESTS (`L2NUM_REQUESTS), .NUM_REQUESTS (`L2NUM_REQUESTS),
.STAGE_1_CYCLES (`L2STAGE_1_CYCLES), .STAGE_1_CYCLES (`L2STAGE_1_CYCLES),
.FUNC_ID (`L2FUNC_ID),
.REQQ_SIZE (`L2REQQ_SIZE), .REQQ_SIZE (`L2REQQ_SIZE),
.MRVQ_SIZE (`L2MRVQ_SIZE), .MRVQ_SIZE (`L2MRVQ_SIZE),
.DFPQ_SIZE (`L2DFPQ_SIZE), .DFPQ_SIZE (`L2DFPQ_SIZE),
@ -189,8 +218,12 @@ module Vortex_Cluster #(
.FFSQ_SIZE (`L2FFSQ_SIZE), .FFSQ_SIZE (`L2FFSQ_SIZE),
.PRFQ_SIZE (`L2PRFQ_SIZE), .PRFQ_SIZE (`L2PRFQ_SIZE),
.PRFQ_STRIDE (`L2PRFQ_STRIDE), .PRFQ_STRIDE (`L2PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING_ENABLE(1),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH), .CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
) gpu_l2cache ( ) gpu_l2cache (
.clk (clk), .clk (clk),
@ -266,8 +299,8 @@ module Vortex_Cluster #(
assign per_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; assign per_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign per_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; assign per_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign per_core_D_dram_req_ready[(i/2)] = per_core_req_ready[i]; assign per_core_D_dram_req_ready [(i/2)] = per_core_req_ready[i];
assign per_core_I_dram_req_ready[(i/2)] = per_core_req_ready[i+1]; assign per_core_I_dram_req_ready [(i/2)] = per_core_req_ready[i+1];
assign per_core_D_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i]; assign per_core_D_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i+1]; assign per_core_I_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i+1];

View file

@ -6,7 +6,7 @@ module Vortex_Socket (
input wire clk, input wire clk,
input wire reset, input wire reset,
// DRAM Req // DRAM request
output wire dram_req_read, output wire dram_req_read,
output wire dram_req_write, output wire dram_req_write,
output wire[`L3DRAM_ADDR_WIDTH-1:0] dram_req_addr, output wire[`L3DRAM_ADDR_WIDTH-1:0] dram_req_addr,
@ -14,21 +14,31 @@ module Vortex_Socket (
output wire[`L3DRAM_TAG_WIDTH-1:0] dram_req_tag, output wire[`L3DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready, input wire dram_req_ready,
// DRAM Rsp // DRAM response
input wire dram_rsp_valid, input wire dram_rsp_valid,
input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data, input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag, input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready, output wire dram_rsp_ready,
// Cache Snooping // Cache snooping
input wire llc_snp_req_valid, input wire llc_snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr, input wire[`L3DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready, output wire llc_snp_req_ready,
// I/O // I/O request
output wire io_valid, output wire io_req_read,
output wire [31:0] io_data, output wire io_req_write,
input wire io_ready, output wire[31:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`BYTE_EN_BITS-1:0] io_req_byteen,
output wire[`CORE_REQ_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire[31:0] io_rsp_data,
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug // Debug
output wire ebreak output wire ebreak
@ -57,24 +67,31 @@ module Vortex_Socket (
.llc_snp_req_addr (llc_snp_req_addr), .llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_ready (llc_snp_req_ready), .llc_snp_req_ready (llc_snp_req_ready),
.io_valid (io_valid), .io_req_read (io_req_read),
.io_data (io_data), .io_req_write (io_req_write),
.io_ready (io_ready), .io_req_addr (io_req_addr),
.io_req_data (io_req_data),
.io_req_byteen (io_req_byteen),
.io_req_tag (io_req_tag),
.io_req_ready (io_req_ready),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (io_rsp_ready),
.ebreak (ebreak) .ebreak (ebreak)
); );
end else begin end else begin
// DRAM Dcache Req
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr; wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data; wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag; wire[`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire l3_core_req_ready; wire l3_core_req_ready;
// DRAM Dcache Rsp
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
@ -85,16 +102,17 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready; wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire[`NUM_CLUSTERS-1:0] per_cluster_io_valid; wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_read;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_data; wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_write;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_addr;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
wire[`NUM_CLUSTERS-1:0][`BYTE_EN_BITS-1:0] per_cluster_io_req_byteen;
wire[`NUM_CLUSTERS-1:0][`CORE_REQ_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak; wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
assign io_valid = per_cluster_io_valid[0];
assign io_data = per_cluster_io_data[0];
assign ebreak = (& per_cluster_ebreak);
genvar i; genvar i;
for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin
@ -120,19 +138,39 @@ module Vortex_Socket (
.llc_snp_req_addr (snp_fwd_addr), .llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]), .llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
.io_valid (per_cluster_io_valid [i]), .io_req_read (per_cluster_io_req_read [i]),
.io_data (per_cluster_io_data [i]), .io_req_write (per_cluster_io_req_write [i]),
.io_ready (io_ready), .io_req_addr (per_cluster_io_req_addr [i]),
.io_req_data (per_cluster_io_req_data [i]),
.io_req_byteen (per_cluster_io_req_byteen [i]),
.io_req_tag (per_cluster_io_req_tag [i]),
.io_req_ready (io_req_ready),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
.ebreak (per_cluster_ebreak [i]) .ebreak (per_cluster_ebreak [i])
); );
end end
assign io_req_read = per_cluster_io_req_read[0];
assign io_req_write = per_cluster_io_req_write[0];
assign io_req_addr = per_cluster_io_req_addr[0];
assign io_req_data = per_cluster_io_req_data[0];
assign io_req_byteen = per_cluster_io_req_byteen[0];
assign io_req_tag = per_cluster_io_req_tag[0];
assign io_rsp_ready = per_cluster_io_rsp_ready[0];
assign ebreak = (& per_cluster_ebreak);
// L3 Cache /////////////////////////////////////////////////////////// // L3 Cache ///////////////////////////////////////////////////////////
wire[`L3NUM_REQUESTS-1:0] l3_core_req_valid; wire[`L3NUM_REQUESTS-1:0] l3_core_req_valid;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_read; wire[`L3NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] l3_core_req_read;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_write; wire[`L3NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] l3_core_req_write;
wire[`L3NUM_REQUESTS-1:0][31:0] l3_core_req_addr; wire[`L3NUM_REQUESTS-1:0][31:0] l3_core_req_addr;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data; wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag; wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
@ -145,8 +183,8 @@ module Vortex_Socket (
for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin
// Core Request // Core Request
assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]); assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]);
assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `WORD_SEL_LW : `WORD_SEL_NO; assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `WORD_SEL_LW : `WORD_SEL_NO; assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}}; assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i]; assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
assign l3_core_req_data [i] = per_cluster_dram_req_data [i]; assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
@ -167,7 +205,6 @@ module Vortex_Socket (
.WORD_SIZE (`L3WORD_SIZE), .WORD_SIZE (`L3WORD_SIZE),
.NUM_REQUESTS (`L3NUM_REQUESTS), .NUM_REQUESTS (`L3NUM_REQUESTS),
.STAGE_1_CYCLES (`L3STAGE_1_CYCLES), .STAGE_1_CYCLES (`L3STAGE_1_CYCLES),
.FUNC_ID (`L2FUNC_ID),
.REQQ_SIZE (`L3REQQ_SIZE), .REQQ_SIZE (`L3REQQ_SIZE),
.MRVQ_SIZE (`L3MRVQ_SIZE), .MRVQ_SIZE (`L3MRVQ_SIZE),
.DFPQ_SIZE (`L3DFPQ_SIZE), .DFPQ_SIZE (`L3DFPQ_SIZE),
@ -180,7 +217,11 @@ module Vortex_Socket (
.PRFQ_SIZE (`L3PRFQ_SIZE), .PRFQ_SIZE (`L3PRFQ_SIZE),
.PRFQ_STRIDE (`L3PRFQ_STRIDE), .PRFQ_STRIDE (`L3PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING_ENABLE(1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH)
) gpu_l3cache ( ) gpu_l3cache (
.clk (clk), .clk (clk),

117
hw/rtl/cache/VX_bank.v vendored
View file

@ -12,9 +12,7 @@ module VX_bank #(
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
@ -38,11 +36,23 @@ module VX_bank #(
// Fill Forward SNP Queue // Fill Forward SNP Queue
parameter FFSQ_SIZE = 8, parameter FFSQ_SIZE = 8,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// caceh requests tag size // Enable cache writeable
parameter CORE_TAG_WIDTH = 1 parameter WRITE_ENABLE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1,
// Enable snoop forwarding
parameter SNOOP_FORWARDING_ENABLE = 0,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -50,11 +60,11 @@ module VX_bank #(
// Core Request // Core Request
input wire core_req_ready, input wire core_req_ready,
input wire [NUM_REQUESTS-1:0] core_req_valids, input wire [NUM_REQUESTS-1:0] core_req_valids,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_read, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_write, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr, input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_full, output wire core_req_full,
// Core Response // Core Response
@ -98,7 +108,7 @@ module VX_bank #(
if (reset) begin if (reset) begin
snoop_state <= 0; snoop_state <= 0;
end else begin end else begin
snoop_state <= (snoop_state | snp_req_valid) && ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)); snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING_ENABLE;
end end
end end
@ -156,12 +166,12 @@ module VX_bank #(
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0; wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0;
wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0; wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0;
wire [`WORD_SEL_BITS-1:0] reqq_req_mem_read_st0; wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0;
wire [`WORD_SEL_BITS-1:0] reqq_req_mem_write_st0; wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0;
assign reqq_push = core_req_ready && (|core_req_valids); assign reqq_push = core_req_ready && (|core_req_valids);
VX_cache_req_queue #( VX_cache_req_queue #(
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
@ -177,7 +187,8 @@ module VX_bank #(
.DFQQ_SIZE (DFQQ_SIZE), .DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE), .LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH) .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) req_queue ( ) req_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -212,17 +223,17 @@ module VX_bank #(
wire [`BASE_ADDR_BITS-1:0] mrvq_wsel_st0; wire [`BASE_ADDR_BITS-1:0] mrvq_wsel_st0;
wire [`WORD_WIDTH-1:0] mrvq_writeword_st0; wire [`WORD_WIDTH-1:0] mrvq_writeword_st0;
wire [CORE_TAG_WIDTH-1:0] mrvq_tag_st0; wire [CORE_TAG_WIDTH-1:0] mrvq_tag_st0;
wire [`WORD_SEL_BITS-1:0] mrvq_mem_read_st0; wire [`BYTE_EN_BITS-1:0] mrvq_mem_read_st0;
wire [`WORD_SEL_BITS-1:0] mrvq_mem_write_st0; wire [`BYTE_EN_BITS-1:0] mrvq_mem_write_st0;
wire miss_add; wire miss_add;
wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr; wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr;
wire[`BASE_ADDR_BITS-1:0] miss_add_wsel; wire[`BASE_ADDR_BITS-1:0] miss_add_wsel;
wire[`WORD_WIDTH-1:0] miss_add_data; wire[`WORD_WIDTH-1:0] miss_add_data;
wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid; wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid;
wire[CORE_TAG_WIDTH-1:0] miss_add_tag; wire[CORE_TAG_WIDTH-1:0] miss_add_tag;
wire[`WORD_SEL_BITS-1:0] miss_add_mem_read; wire[`BYTE_EN_BITS-1:0] miss_add_mem_read;
wire[`WORD_SEL_BITS-1:0] miss_add_mem_write; wire[`BYTE_EN_BITS-1:0] miss_add_mem_write;
wire[`LINE_ADDR_WIDTH-1:0] addr_st2; wire[`LINE_ADDR_WIDTH-1:0] addr_st2;
wire is_fill_st2; wire is_fill_st2;
@ -294,8 +305,8 @@ module VX_bank #(
0; 0;
assign qual_going_to_write_st0 = dfpq_pop ? 1 : assign qual_going_to_write_st0 = dfpq_pop ? 1 :
(mrvq_pop && (mrvq_mem_write_st0 != `WORD_SEL_NO)) ? 1 : (mrvq_pop && (mrvq_mem_write_st0 != `BYTE_EN_NO)) ? 1 :
(reqq_pop && (reqq_req_mem_write_st0 != `WORD_SEL_NO)) ? 1 : (reqq_pop && (reqq_req_mem_write_st0 != `BYTE_EN_NO)) ? 1 :
(snrq_pop) ? 1 : (snrq_pop) ? 1 :
0; 0;
@ -339,8 +350,8 @@ module VX_bank #(
wire [CORE_TAG_WIDTH-1:0] tag_st1e; wire [CORE_TAG_WIDTH-1:0] tag_st1e;
wire [`LOG2UP(NUM_REQUESTS)-1:0] tid_st1e; wire [`LOG2UP(NUM_REQUESTS)-1:0] tid_st1e;
`DEBUG_END `DEBUG_END
wire [`WORD_SEL_BITS-1:0] mem_read_st1e; wire [`BYTE_EN_BITS-1:0] mem_read_st1e;
wire [`WORD_SEL_BITS-1:0] mem_write_st1e; wire [`BYTE_EN_BITS-1:0] mem_write_st1e;
wire fill_saw_dirty_st1e; wire fill_saw_dirty_st1e;
wire is_snp_st1e; wire is_snp_st1e;
@ -354,7 +365,6 @@ module VX_bank #(
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS), .NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES), .STAGE_1_CYCLES (STAGE_1_CYCLES),
.FUNC_ID (FUNC_ID),
.REQQ_SIZE (REQQ_SIZE), .REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE), .DFPQ_SIZE (DFPQ_SIZE),
@ -363,7 +373,9 @@ module VX_bank #(
.DWBQ_SIZE (DWBQ_SIZE), .DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE), .DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE), .LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE) .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE)
) tag_data_access ( ) tag_data_access (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -429,7 +441,17 @@ module VX_bank #(
wire invalidate_fill; wire invalidate_fill;
// Enqueue to miss reserv if it's a valid miss // Enqueue to miss reserv if it's a valid miss
assign miss_add = valid_st2 && !is_snp_st2 && miss_st2 && !mrvq_full && !(should_flush && dwbq_push) && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); assign miss_add = valid_st2
&& !is_snp_st2
&& miss_st2
&& !mrvq_full
&& !(should_flush && dwbq_push)
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
assign miss_add_addr = addr_st2; assign miss_add_addr = addr_st2;
assign miss_add_wsel = wsel_st2; assign miss_add_wsel = wsel_st2;
assign miss_add_data = writeword_st2; assign miss_add_data = writeword_st2;
@ -484,7 +506,14 @@ module VX_bank #(
); );
// Enqueue to CWB Queue // Enqueue to CWB Queue
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `L2FUNC_ID) && (miss_add_mem_write == `WORD_SEL_NO)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); wire cwbq_push = (valid_st2 && !miss_st2)
&& !cwbq_full
&& !(SNOOP_FORWARDING_ENABLE && (miss_add_mem_write == `BYTE_EN_NO))
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2; wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2;
wire [`LOG2UP(NUM_REQUESTS)-1:0] cwbq_tid = miss_add_tid; wire [`LOG2UP(NUM_REQUESTS)-1:0] cwbq_tid = miss_add_tid;
wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag; wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag;
@ -507,18 +536,27 @@ module VX_bank #(
.full (cwbq_full) .full (cwbq_full)
); );
assign should_flush = snoop_state && valid_st2 && (miss_add_mem_write != `WORD_SEL_NO) && !is_snp_st2 && !is_fill_st2; assign should_flush = snoop_state
&& valid_st2
&& (miss_add_mem_write != `BYTE_EN_NO)
&& !is_snp_st2 && !is_fill_st2;
// Enqueue to DWB Queue // Enqueue to DWB Queue
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush)
&& !dwbq_full
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
wire[`LINE_ADDR_WIDTH-1:0] dwbq_req_addr; wire[`LINE_ADDR_WIDTH-1:0] dwbq_req_addr;
wire dwbq_empty; wire dwbq_empty;
wire[`BANK_LINE_WORDS-1:0][`WORD_WIDTH-1:0] dwbq_req_data; wire[`BANK_LINE_WORDS-1:0][`WORD_WIDTH-1:0] dwbq_req_data;
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin if (SNOOP_FORWARDING_ENABLE) begin
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2; assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; assign dwbq_req_addr = (should_flush && dwbq_push) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
end else begin end else begin
assign dwbq_req_data = readdata_st2; assign dwbq_req_data = readdata_st2;
assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
@ -579,7 +617,14 @@ module VX_bank #(
wire snp_fwd_push; wire snp_fwd_push;
wire ffsq_empty; wire ffsq_empty;
assign snp_fwd_push = is_snp_st2 && valid_st2 && !ffsq_full && !(((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); assign snp_fwd_push = is_snp_st2
&& valid_st2
&& !ffsq_full
&& !(((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
assign snp_fwd_valid = !ffsq_empty; assign snp_fwd_valid = !ffsq_empty;
VX_generic_queue #( VX_generic_queue #(
@ -596,6 +641,10 @@ module VX_bank #(
.full (ffsq_full) .full (ffsq_full)
); );
assign stall_bank_pipe = (is_snp_st2 && valid_st2 && ffsq_full) || ((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full); assign stall_bank_pipe = (is_snp_st2 && valid_st2 && ffsq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full);
endmodule : VX_bank endmodule : VX_bank

View file

@ -12,9 +12,7 @@ module VX_cache #(
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 3,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
@ -40,14 +38,28 @@ module VX_cache #(
parameter FFSQ_SIZE = 8, parameter FFSQ_SIZE = 8,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1,
// Enable snoop forwarding
parameter SNOOP_FORWARDING_ENABLE = 0,
// Prefetcher // Prefetcher
parameter PRFQ_SIZE = 64, parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 0, parameter PRFQ_STRIDE = 0,
// caceh requests tag size // core request tag size
parameter CORE_TAG_WIDTH = 1, parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size
parameter DRAM_TAG_WIDTH = 1 parameter DRAM_TAG_WIDTH = 1
) ( ) (
input wire clk, input wire clk,
@ -55,17 +67,17 @@ module VX_cache #(
// Core request // Core request
input wire [NUM_REQUESTS-1:0] core_req_valid, input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_read, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_write, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr, input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready, output wire core_req_ready,
// Core response // Core response
output wire [NUM_REQUESTS-1:0] core_rsp_valid, output wire [NUM_REQUESTS-1:0] core_rsp_valid,
output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready, input wire core_rsp_ready,
// DRAM request // DRAM request
@ -127,7 +139,7 @@ module VX_cache #(
assign snp_req_ready = ~(|per_bank_snp_req_full); assign snp_req_ready = ~(|per_bank_snp_req_full);
assign dram_rsp_ready = (|per_bank_dram_fill_rsp_ready); assign dram_rsp_ready = (|per_bank_dram_fill_rsp_ready);
VX_cache_core_req_bank_sel #( VX_cache_core_req_bank_sel #(
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
@ -154,10 +166,10 @@ module VX_cache #(
for (i = 0; i < NUM_BANKS; i = i + 1) begin for (i = 0; i < NUM_BANKS; i = i + 1) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valids; wire [NUM_REQUESTS-1:0] curr_bank_core_req_valids;
wire [NUM_REQUESTS-1:0][31:0] curr_bank_core_req_addr; wire [NUM_REQUESTS-1:0][31:0] curr_bank_core_req_addr;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data; wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] curr_bank_core_req_read; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_read;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] curr_bank_core_req_write; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_write;
wire curr_bank_core_rsp_pop; wire curr_bank_core_rsp_pop;
wire curr_bank_core_rsp_valid; wire curr_bank_core_rsp_valid;
@ -241,7 +253,6 @@ module VX_cache #(
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS), .NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES), .STAGE_1_CYCLES (STAGE_1_CYCLES),
.FUNC_ID (FUNC_ID),
.REQQ_SIZE (REQQ_SIZE), .REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE), .DFPQ_SIZE (DFPQ_SIZE),
@ -252,7 +263,11 @@ module VX_cache #(
.LLVQ_SIZE (LLVQ_SIZE), .LLVQ_SIZE (LLVQ_SIZE),
.FFSQ_SIZE (FFSQ_SIZE), .FFSQ_SIZE (FFSQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH) .DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE),
.SNOOP_FORWARDING_ENABLE(SNOOP_FORWARDING_ENABLE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) bank ( ) bank (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -304,14 +319,13 @@ module VX_cache #(
end end
endgenerate endgenerate
VX_cache_core_rsp_merge #( VX_cache_core_rsp_merge #(
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS), .NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES), .STAGE_1_CYCLES (STAGE_1_CYCLES),
.FUNC_ID (FUNC_ID),
.REQQ_SIZE (REQQ_SIZE), .REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE), .DFPQ_SIZE (DFPQ_SIZE),
@ -321,7 +335,8 @@ module VX_cache #(
.DFQQ_SIZE (DFQQ_SIZE), .DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE), .LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH) .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_rsp_merge ( ) cache_core_rsp_merge (
.per_bank_core_rsp_tid (per_bank_core_rsp_tid), .per_bank_core_rsp_tid (per_bank_core_rsp_tid),
.per_bank_core_rsp_valid (per_bank_core_rsp_valid), .per_bank_core_rsp_valid (per_bank_core_rsp_valid),

View file

@ -3,19 +3,19 @@
`include "VX_define.vh" `include "VX_define.vh"
`define WORD_SEL_NO 3'h7 `define BYTE_EN_NO 3'h7
`define WORD_SEL_LB 3'h0 `define BYTE_EN_LB 3'h0
`define WORD_SEL_LH 3'h1 `define BYTE_EN_LH 3'h1
`define WORD_SEL_LW 3'h2 `define BYTE_EN_LW 3'h2
`define WORD_SEL_HB 3'h4 `define BYTE_EN_HB 3'h4
`define WORD_SEL_HH 3'h5 `define BYTE_EN_HH 3'h5
`define WORD_SEL_BITS 3 `define BYTE_EN_BITS 3
// data tid tag read write base addr // data tid tag read write base addr
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `LOG2UP(NUM_REQUESTS) + CORE_TAG_WIDTH + `WORD_SEL_BITS + `WORD_SEL_BITS + `BASE_ADDR_BITS) `define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `LOG2UP(NUM_REQUESTS) + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS)
// tag read write reqs // tag read write reqs
`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + `WORD_SEL_BITS + `WORD_SEL_BITS + `LOG2UP(NUM_REQUESTS)) `define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `LOG2UP(NUM_REQUESTS))
`define WORD_WIDTH (8 * WORD_SIZE) `define WORD_WIDTH (8 * WORD_SIZE)
`define BYTE_WIDTH (`WORD_WIDTH / 4) `define BYTE_WIDTH (`WORD_WIDTH / 4)
@ -66,13 +66,8 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Core request tag width pc, wb, rd, warp_num `define CORE_REQ_TAG_COUNT ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQUESTS)
`define CORE_REQ_TAG_WIDTH (32 + 2 + 5 + `NW_BITS)
// Core request tag info rd + warp_num
`define CORE_REQ_TAG_WARP(x) x[(5 + `NW_BITS)-1:0]
// DRAM response tag bank info
`define DRAM_ADDR_BANK(x) x[`BANK_SELECT_BITS-1:0] `define DRAM_ADDR_BANK(x) x[`BANK_SELECT_BITS-1:0]
`define DRAM_TO_LINE_ADDR(x) x[`DRAM_ADDR_WIDTH-1:`BANK_SELECT_BITS] `define DRAM_TO_LINE_ADDR(x) x[`DRAM_ADDR_WIDTH-1:`BANK_SELECT_BITS]

View file

@ -13,8 +13,6 @@ module VX_cache_core_rsp_merge #(
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
@ -39,8 +37,12 @@ module VX_cache_core_rsp_merge #(
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// caceh requests tag size // core request tag size
parameter CORE_TAG_WIDTH = 1, parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size
parameter DRAM_TAG_WIDTH = 1 parameter DRAM_TAG_WIDTH = 1
) ( ) (
// Per Bank WB // Per Bank WB
@ -53,7 +55,7 @@ module VX_cache_core_rsp_merge #(
// Core Writeback // Core Writeback
output reg [NUM_REQUESTS-1:0] core_rsp_valid, output reg [NUM_REQUESTS-1:0] core_rsp_valid,
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output reg [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready input wire core_rsp_ready
); );
@ -73,43 +75,48 @@ module VX_cache_core_rsp_merge #(
); );
integer i; integer i;
generate
if (CORE_TAG_ID_BITS != 0) begin
assign core_rsp_tag = per_bank_core_rsp_tag[main_bank_index];
always @(*) begin always @(*) begin
core_rsp_valid = 0; core_rsp_valid = 0;
core_rsp_data = 0; core_rsp_data = 0;
core_rsp_tag = 0; core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin for (i = 0; i < NUM_BANKS; i = i + 1) begin
if ((FUNC_ID == `L2FUNC_ID) if (found_bank
|| (FUNC_ID == `L3FUNC_ID)) begin && per_bank_core_rsp_valid[i]
if (found_bank && !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& per_bank_core_rsp_valid[i] && ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
&& !core_rsp_valid[per_bank_core_rsp_tid[i]] || (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i)) && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1; core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; per_bank_core_rsp_pop_unqual[i] = 1;
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
per_bank_core_rsp_pop_unqual[i] = 1;
end else begin
per_bank_core_rsp_pop_unqual[i] = 0;
end
end else begin end else begin
if (found_bank per_bank_core_rsp_pop_unqual[i] = 0;
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))
&& (`CORE_REQ_TAG_WARP(per_bank_core_rsp_tag[i]) == `CORE_REQ_TAG_WARP(per_bank_core_rsp_tag[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
per_bank_core_rsp_pop_unqual[i] = 1;
end else begin
per_bank_core_rsp_pop_unqual[i] = 0;
end
end end
end end
end end
endgenerate end else begin
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
per_bank_core_rsp_pop_unqual[i] = 1;
end else begin
per_bank_core_rsp_pop_unqual[i] = 0;
end
end
end
end
endmodule endmodule

View file

@ -87,7 +87,7 @@ module VX_cache_dfq_queue #(
assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr; assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr;
wire[`LOG2UP(NUM_BANKS)-1:0] qual_request_index; wire[`LOG2UP(NUM_BANKS)-1:0] qual_request_index;
wire qual_has_request; wire qual_has_request;
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(NUM_BANKS) .N(NUM_BANKS)
@ -105,12 +105,12 @@ module VX_cache_dfq_queue #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
use_per_bank_dram_fill_req_valid <= 0; use_per_bank_dram_fill_req_valid <= 0;
use_per_bank_dram_fill_req_addr <= 0; use_per_bank_dram_fill_req_addr <= 0;
end else begin end else begin
if (dfqq_pop && qual_has_request) begin if (dfqq_pop && qual_has_request) begin
use_per_bank_dram_fill_req_valid <= updated_bank_dram_fill_req_valid; use_per_bank_dram_fill_req_valid <= updated_bank_dram_fill_req_valid;
use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr; use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr;
end end
end end
end end

View file

@ -79,7 +79,7 @@ module VX_cache_dram_req_arb #(
.PRFQ_STRIDE (PRFQ_STRIDE), .PRFQ_STRIDE (PRFQ_STRIDE),
.BANK_LINE_SIZE(BANK_LINE_SIZE), .BANK_LINE_SIZE(BANK_LINE_SIZE),
.WORD_SIZE (WORD_SIZE) .WORD_SIZE (WORD_SIZE)
) prfqq ( ) prfqq (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -1,4 +1,3 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_cache_miss_resrv #( module VX_cache_miss_resrv #(
@ -51,8 +50,8 @@ module VX_cache_miss_resrv #(
input wire[`WORD_WIDTH-1:0] miss_add_data, input wire[`WORD_WIDTH-1:0] miss_add_data,
input wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid, input wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid,
input wire[CORE_TAG_WIDTH-1:0] miss_add_tag, input wire[CORE_TAG_WIDTH-1:0] miss_add_tag,
input wire[`WORD_SEL_BITS-1:0] miss_add_mem_read, input wire[`BYTE_EN_BITS-1:0] miss_add_mem_read,
input wire[`WORD_SEL_BITS-1:0] miss_add_mem_write, input wire[`BYTE_EN_BITS-1:0] miss_add_mem_write,
output wire miss_resrv_full, output wire miss_resrv_full,
output wire miss_resrv_stop, output wire miss_resrv_stop,
@ -72,8 +71,8 @@ module VX_cache_miss_resrv #(
output wire[`WORD_WIDTH-1:0] miss_resrv_data_st0, output wire[`WORD_WIDTH-1:0] miss_resrv_data_st0,
output wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_resrv_tid_st0, output wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_resrv_tid_st0,
output wire[CORE_TAG_WIDTH-1:0] miss_resrv_tag_st0, output wire[CORE_TAG_WIDTH-1:0] miss_resrv_tag_st0,
output wire[`WORD_SEL_BITS-1:0] miss_resrv_mem_read_st0, output wire[`BYTE_EN_BITS-1:0] miss_resrv_mem_read_st0,
output wire[`WORD_SEL_BITS-1:0] miss_resrv_mem_write_st0 output wire[`BYTE_EN_BITS-1:0] miss_resrv_mem_write_st0
); );
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0]; reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;

View file

@ -37,8 +37,11 @@ module VX_cache_req_queue #(
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// caceh requests tag size // core request tag size
parameter CORE_TAG_WIDTH = 1 parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -46,18 +49,18 @@ module VX_cache_req_queue #(
// Enqueue Data // Enqueue Data
input wire reqq_push, input wire reqq_push,
input wire [NUM_REQUESTS-1:0] bank_valids, input wire [NUM_REQUESTS-1:0] bank_valids,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] bank_mem_read, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] bank_mem_read,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] bank_mem_write, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] bank_mem_write,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] bank_writedata, input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] bank_writedata,
input wire [NUM_REQUESTS-1:0][31:0] bank_addr, input wire [NUM_REQUESTS-1:0][31:0] bank_addr,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] bank_tag, input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] bank_tag,
// Dequeue Data // Dequeue Data
input wire reqq_pop, input wire reqq_pop,
output wire reqq_req_st0, output wire reqq_req_st0,
output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0, output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0,
output wire [`WORD_SEL_BITS-1:0] reqq_req_mem_read_st0, output wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0,
output wire [`WORD_SEL_BITS-1:0] reqq_req_mem_write_st0, output wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0,
output wire [`WORD_WIDTH-1:0] reqq_req_writedata_st0, output wire [`WORD_WIDTH-1:0] reqq_req_writedata_st0,
output wire [31:0] reqq_req_addr_st0, output wire [31:0] reqq_req_addr_st0,
output wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0, output wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0,
@ -70,23 +73,23 @@ module VX_cache_req_queue #(
wire [NUM_REQUESTS-1:0] out_per_valids; wire [NUM_REQUESTS-1:0] out_per_valids;
wire [NUM_REQUESTS-1:0][31:0] out_per_addr; wire [NUM_REQUESTS-1:0][31:0] out_per_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] out_per_writedata; wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] out_per_writedata;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] out_per_mem_read; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] out_per_mem_read;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] out_per_mem_write; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] out_per_mem_write;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] out_per_tag; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] out_per_tag;
reg [NUM_REQUESTS-1:0] use_per_valids; reg [NUM_REQUESTS-1:0] use_per_valids;
reg [NUM_REQUESTS-1:0][31:0] use_per_addr; reg [NUM_REQUESTS-1:0][31:0] use_per_addr;
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] use_per_writedata; reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] use_per_writedata;
reg [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] use_per_mem_read; reg [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] use_per_mem_read;
reg [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] use_per_mem_write; reg [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] use_per_mem_write;
reg [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] use_per_tag; reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] use_per_tag;
wire [NUM_REQUESTS-1:0] qual_valids; wire [NUM_REQUESTS-1:0] qual_valids;
wire [NUM_REQUESTS-1:0][31:0] qual_addr; wire [NUM_REQUESTS-1:0][31:0] qual_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] qual_writedata; wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] qual_writedata;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] qual_mem_read; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] qual_mem_read;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] qual_mem_write; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] qual_mem_write;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] qual_tag; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] qual_tag;
`DEBUG_BEGIN `DEBUG_BEGIN
reg [NUM_REQUESTS-1:0] updated_valids; reg [NUM_REQUESTS-1:0] updated_valids;
@ -123,8 +126,8 @@ module VX_cache_req_queue #(
assign qual_mem_read = use_per_mem_read; assign qual_mem_read = use_per_mem_read;
assign qual_mem_write = use_per_mem_write; assign qual_mem_write = use_per_mem_write;
wire[`LOG2UP(NUM_REQUESTS)-1:0]qual_request_index; wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index;
wire qual_has_request; wire qual_has_request;
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(NUM_REQUESTS) .N(NUM_REQUESTS)
@ -139,9 +142,15 @@ module VX_cache_req_queue #(
assign reqq_req_tid_st0 = qual_request_index; assign reqq_req_tid_st0 = qual_request_index;
assign reqq_req_addr_st0 = qual_addr[qual_request_index]; assign reqq_req_addr_st0 = qual_addr[qual_request_index];
assign reqq_req_writedata_st0 = qual_writedata[qual_request_index]; assign reqq_req_writedata_st0 = qual_writedata[qual_request_index];
assign reqq_req_tag_st0 = qual_tag[qual_request_index];
assign reqq_req_mem_read_st0 = qual_mem_read [qual_request_index]; if (CORE_TAG_ID_BITS != 0) begin
assign reqq_req_mem_write_st0 = qual_mem_write[qual_request_index]; assign reqq_req_tag_st0 = qual_tag;
end else begin
assign reqq_req_tag_st0 = qual_tag[qual_request_index];
end
assign reqq_req_mem_read_st0 = qual_mem_read [qual_request_index];
assign reqq_req_mem_write_st0 = qual_mem_write[qual_request_index];
always @(*) begin always @(*) begin
updated_valids = qual_valids; updated_valids = qual_valids;

View file

@ -13,8 +13,6 @@ module VX_tag_data_access #(
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
@ -37,7 +35,13 @@ module VX_tag_data_access #(
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16 parameter FILL_INVALIDAOR_SIZE = 16,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -55,8 +59,8 @@ module VX_tag_data_access #(
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
input wire[`WORD_SELECT_ADDR_END:0] writewsel_st1e, input wire[`WORD_SELECT_ADDR_END:0] writewsel_st1e,
input wire[`WORD_SEL_BITS-1:0] mem_write_st1e, input wire[`BYTE_EN_BITS-1:0] mem_write_st1e,
input wire[`WORD_SEL_BITS-1:0] mem_read_st1e, input wire[`BYTE_EN_BITS-1:0] mem_read_st1e,
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
output wire[`WORD_WIDTH-1:0] readword_st1e, output wire[`WORD_WIDTH-1:0] readword_st1e,
@ -101,7 +105,6 @@ module VX_tag_data_access #(
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS), .NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES), .STAGE_1_CYCLES (STAGE_1_CYCLES),
.FUNC_ID (FUNC_ID),
.REQQ_SIZE (REQQ_SIZE), .REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE), .DFPQ_SIZE (DFPQ_SIZE),
@ -139,8 +142,8 @@ module VX_tag_data_access #(
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),
.flush(0), .flush(0),
.in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_tag_st1, qual_read_data_st1}), .in({qual_read_valid_st1, qual_read_dirty_st1, qual_read_tag_st1, qual_read_data_st1}),
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_tag_st1c[0], read_data_st1c[0]}) .out({read_valid_st1c[0], read_dirty_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
); );
genvar i; genvar i;
@ -152,14 +155,14 @@ module VX_tag_data_access #(
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),
.flush(0), .flush(0),
.in ({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}), .in({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}),
.out ({read_valid_st1c[i], read_dirty_st1c[i], read_tag_st1c[i], read_data_st1c[i]}) .out({read_valid_st1c[i], read_dirty_st1c[i], read_tag_st1c[i], read_data_st1c[i]})
); );
end end
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || ~DRAM_ENABLE; // If shared memory, always valid
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && (FUNC_ID != `SFUNC_ID); // Dirty only applies in Dcache assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_LINE_ADDR_RNG] : read_tag_st1c[STAGE_1_CYCLES-1]; // Tag is always the same in SM assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writeaddr_st1e[`TAG_LINE_ADDR_RNG]; // Tag is always the same in SM
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
assign use_read_data_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] = read_data_st1c[STAGE_1_CYCLES-1][i * `WORD_WIDTH +: `WORD_WIDTH]; assign use_read_data_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] = read_data_st1c[STAGE_1_CYCLES-1][i * `WORD_WIDTH +: `WORD_WIDTH];
@ -170,30 +173,49 @@ module VX_tag_data_access #(
wire [`BANK_LINE_WORDS-1:0][3:0] we; wire [`BANK_LINE_WORDS-1:0][3:0] we;
wire [`BANK_LINE_WIDTH-1:0] data_write; wire [`BANK_LINE_WIDTH-1:0] data_write;
if (WORD_SIZE == 4) begin if (WORD_SIZE == BANK_LINE_SIZE) begin
wire should_write = ((mem_write_st1e != `BYTE_EN_NO))
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e;
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000;
end
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
assign data_write = force_write ? writedata_st1e : writeword_st1e;
end else begin
wire[`OFFSET_ADDR_BITS-1:0] byte_select = writewsel_st1e[`OFFSET_ADDR_RNG]; wire[`OFFSET_ADDR_BITS-1:0] byte_select = writewsel_st1e[`OFFSET_ADDR_RNG];
wire[`WORD_SELECT_BITS-1:0] block_offset = writewsel_st1e[`WORD_SELECT_ADDR_RNG]; wire[`WORD_SELECT_BITS-1:0] block_offset = writewsel_st1e[`WORD_SELECT_ADDR_RNG];
wire lb = valid_req_st1e && (mem_read_st1e == `WORD_SEL_LB); wire lb = valid_req_st1e && (mem_read_st1e == `BYTE_EN_LB);
wire lh = valid_req_st1e && (mem_read_st1e == `WORD_SEL_LH); wire lh = valid_req_st1e && (mem_read_st1e == `BYTE_EN_LH);
wire lbu = valid_req_st1e && (mem_read_st1e == `WORD_SEL_HB); wire lbu = valid_req_st1e && (mem_read_st1e == `BYTE_EN_HB);
wire lhu = valid_req_st1e && (mem_read_st1e == `WORD_SEL_HH); wire lhu = valid_req_st1e && (mem_read_st1e == `BYTE_EN_HH);
wire lw = valid_req_st1e && (mem_read_st1e == `WORD_SEL_LW); wire lw = valid_req_st1e && (mem_read_st1e == `BYTE_EN_LW);
wire b0 = (byte_select == 0); wire b0 = (byte_select == 0);
wire b1 = (byte_select == 1); wire b1 = (byte_select == 1);
wire b2 = (byte_select == 2); wire b2 = (byte_select == 2);
wire b3 = (byte_select == 3); wire b3 = (byte_select == 3);
wire sb = valid_req_st1e && (mem_write_st1e == `WORD_SEL_LB); wire sb = valid_req_st1e && (mem_write_st1e == `BYTE_EN_LB);
wire sh = valid_req_st1e && (mem_write_st1e == `WORD_SEL_LH); wire sh = valid_req_st1e && (mem_write_st1e == `BYTE_EN_LH);
wire sw = valid_req_st1e && (mem_write_st1e == `WORD_SEL_LW); wire sw = valid_req_st1e && (mem_write_st1e == `BYTE_EN_LW);
wire [3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000))); wire [3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000)));
wire [3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100); wire [3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e; wire should_write = (sw || sb || sh)
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e;
wire[`WORD_WIDTH-1:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset * 32 +: 32]; wire[`WORD_WIDTH-1:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset * 32 +: 32];
wire[`WORD_WIDTH-1:0] data_unQual = (b0 || lw) ? (data_unmod) : wire[`WORD_WIDTH-1:0] data_unQual = (b0 || lw) ? (data_unmod) :
@ -234,13 +256,6 @@ module VX_tag_data_access #(
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = force_write ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : use_write_dat; assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = force_write ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : use_write_dat;
end end
end else begin
wire should_write = ((mem_write_st1e != `WORD_SEL_NO)) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e;
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000;
end
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
assign data_write = force_write ? writedata_st1e : writeword_st1e;
end end
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we; assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;

View file

@ -4,17 +4,18 @@
`include "../cache/VX_cache_config.vh" `include "../cache/VX_cache_config.vh"
interface VX_cache_core_req_if #( interface VX_cache_core_req_if #(
parameter NUM_REQUESTS = 1, parameter NUM_REQUESTS = 1,
parameter WORD_SIZE = 1, parameter WORD_SIZE = 1,
parameter CORE_TAG_WIDTH = 1 parameter CORE_TAG_WIDTH = 1,
parameter CORE_TAG_ID_BITS = 0
) (); ) ();
wire [NUM_REQUESTS-1:0] core_req_valid; wire [NUM_REQUESTS-1:0] core_req_valid;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_read; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_write; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write;
wire [NUM_REQUESTS-1:0][31:0] core_req_addr; wire [NUM_REQUESTS-1:0][31:0] core_req_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data; wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag;
wire core_req_ready; wire core_req_ready;
endinterface endinterface

View file

@ -4,14 +4,15 @@
`include "../cache/VX_cache_config.vh" `include "../cache/VX_cache_config.vh"
interface VX_cache_core_rsp_if #( interface VX_cache_core_rsp_if #(
parameter NUM_REQUESTS = 1, parameter NUM_REQUESTS = 1,
parameter WORD_SIZE = 1, parameter WORD_SIZE = 1,
parameter CORE_TAG_WIDTH = 1 parameter CORE_TAG_WIDTH = 1,
parameter CORE_TAG_ID_BITS = 0
) (); ) ();
wire [NUM_REQUESTS-1:0] core_rsp_valid; wire [NUM_REQUESTS-1:0] core_rsp_valid;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data; wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag;
wire core_rsp_ready; wire core_rsp_ready;
endinterface endinterface

View file

@ -16,8 +16,8 @@ interface VX_frE_to_bckE_req_if ();
wire [1:0] wb; wire [1:0] wb;
wire rs2_src; wire rs2_src;
wire [31:0] itype_immed; wire [31:0] itype_immed;
wire [`WORD_SEL_BITS-1:0] mem_read; wire [`BYTE_EN_BITS-1:0] mem_read;
wire [`WORD_SEL_BITS-1:0] mem_write; wire [`BYTE_EN_BITS-1:0] mem_write;
wire [2:0] branch_type; wire [2:0] branch_type;
wire [19:0] upper_immed; wire [19:0] upper_immed;
wire [31:0] curr_PC; wire [31:0] curr_PC;

View file

@ -12,8 +12,8 @@ interface VX_lsu_req_if ();
wire [`NUM_THREADS-1:0][31:0] store_data; wire [`NUM_THREADS-1:0][31:0] store_data;
wire [`NUM_THREADS-1:0][31:0] base_address; // A reg data wire [`NUM_THREADS-1:0][31:0] base_address; // A reg data
wire [31:0] offset; // itype_immed wire [31:0] offset; // itype_immed
wire [`WORD_SEL_BITS-1:0] mem_read; wire [`BYTE_EN_BITS-1:0] mem_read;
wire [`WORD_SEL_BITS-1:0] mem_write; wire [`BYTE_EN_BITS-1:0] mem_write;
wire [4:0] rd; // dest register wire [4:0] rd; // dest register
wire [1:0] wb; // wire [1:0] wb; //

View file

@ -1,5 +1,5 @@
module VX_generic_stack #( module VX_generic_stack #(
parameter WIDTH = 40, parameter WIDTH = 40,
parameter DEPTH = 2 parameter DEPTH = 2
) ( ) (

View file

@ -99,12 +99,13 @@ void Simulator::dbus_driver() {
} }
void Simulator::io_driver() { void Simulator::io_driver() {
if (vortex_->io_valid) { if (vortex_->io_req_write
uint32_t data_write = (uint32_t)vortex_->io_data; && vortex_->io_req_addr == IO_BUS_ADDR_COUT) {
uint32_t data_write = (uint32_t)vortex_->io_req_data;
char c = (char)data_write; char c = (char)data_write;
std::cerr << c; std::cerr << c;
} }
vortex_->io_ready = true; vortex_->io_req_ready = true;
} }
void Simulator::reset() { void Simulator::reset() {

View file

@ -29,5 +29,5 @@ vx_printc:
.section .data .section .data
print_addr: print_addr:
.word IO_BUS_ADDR .word IO_BUS_ADDR_COUT

View file

@ -24,7 +24,7 @@ unsigned y[] = {1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1}; 1, 1, 1, 1};
F
unsigned z[] = {0, 0, 0, 0, unsigned z[] = {0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff