DCRS refactoring

This commit is contained in:
Blaise Tine 2022-07-05 00:09:32 -04:00
parent 9bc70b04b3
commit fa6daabe35
36 changed files with 328 additions and 510 deletions

View file

@ -118,7 +118,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = device->dcrs.read(DCR_STARTUP_ADDR);
*value = device->dcrs.read(DCR_BASE_STARTUP_ADDR);
break;
case VX_CAPS_ISA_FLAGS:
*value = device->isa_caps;
@ -239,14 +239,16 @@ extern int vx_dev_open(vx_device_h* hdevice) {
return ret;
}
}
#endif
dcr_initialize(device);
#ifdef DUMP_PERF_STATS
perf_add_device(device);
#endif
*hdevice = device;
#ifdef DUMP_PERF_STATS
perf_add_device(*hdevice);
#endif
return 0;
}

View file

@ -20,13 +20,6 @@ bool is_aligned(uint64_t addr, uint64_t alignment) {
///////////////////////////////////////////////////////////////////////////////
DeviceConfig::DeviceConfig() {
data_[DCR_STARTUP_ADDR] = STARTUP_ADDR;
data_[DCR_MPM_CLASS] = 0;
}
///////////////////////////////////////////////////////////////////////////////
class AutoPerfDump {
public:
AutoPerfDump() : perf_class_(0) {}
@ -38,10 +31,10 @@ public:
}
void add_device(vx_device_h device) {
auto perf_class_s = getenv ("PERF_CLASS");
auto perf_class_s = getenv("PERF_CLASS");
if (perf_class_s) {
perf_class_ = std::atoi(perf_class_s);
vx_dcr_write(device, DCR_MPM_CLASS, perf_class_);
vx_dcr_write(device, DCR_BASE_MPM_CLASS, perf_class_);
}
devices_.push_back(device);
}
@ -156,10 +149,35 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
///////////////////////////////////////////////////////////////////////////////
/*static uint32_t get_csr_32(const uint32_t* buffer, int addr) {
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
return value_lo;
}*/
void DeviceConfig::write(uint32_t addr, uint64_t value) {
data_[addr] = value;
}
uint64_t DeviceConfig::read(uint32_t addr) const {
return data_.at(addr);
}
void dcr_initialize(vx_device_h device) {
vx_dcr_write(device, DCR_BASE_STARTUP_ADDR, STARTUP_ADDR);
vx_dcr_write(device, DCR_BASE_MPM_CLASS, 0);
for (int i = 0; i < DCR_RASTER_STATE_COUNT; ++i) {
vx_dcr_write(device, DCR_RASTER_STATE_BEGIN + i, 0);
}
for (int i = 0; i < DCR_ROP_STATE_COUNT; ++i) {
vx_dcr_write(device, DCR_ROP_STATE_BEGIN + i, 0);
}
for (int i = 0; i < TEX_STAGE_COUNT; ++i) {
vx_dcr_write(device, DCR_TEX_STAGE + i, 0);
for (int j = 1; j < DCR_TEX_STATE_COUNT; ++j) {
vx_dcr_write(device, DCR_TEX_STATE_BEGIN + j, 0);
}
}
}
///////////////////////////////////////////////////////////////////////////////
static uint64_t get_csr_64(const uint32_t* buffer, int addr) {
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];

View file

@ -5,22 +5,14 @@
#include <unordered_map>
class DeviceConfig {
public:
DeviceConfig();
void write(uint32_t addr, uint64_t value) {
data_[addr] = value;
}
uint64_t read(uint32_t addr) const {
return data_.at(addr);
}
public:
void write(uint32_t addr, uint64_t value);
uint64_t read(uint32_t addr) const;
private:
std::unordered_map<uint32_t, uint64_t> data_;
};
void dcr_initialize();
void dcr_initialize(vx_device_h device);
uint64_t aligned_size(uint64_t size, uint64_t alignment);

View file

@ -206,7 +206,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = device->read_dcr(DCR_STARTUP_ADDR);
*value = device->read_dcr(DCR_BASE_STARTUP_ADDR);
break;
case VX_CAPS_ISA_FLAGS:
*value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
@ -226,9 +226,11 @@ extern int vx_dev_open(vx_device_h* hdevice) {
*hdevice = new vx_device();
dcr_initialize(*hdevice);
#ifdef DUMP_PERF_STATS
perf_add_device(*hdevice);
#endif
#endif
return 0;
}

View file

@ -183,11 +183,13 @@ extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
*hdevice = new vx_device();
dcr_initialize(*hdevice);
#ifdef DUMP_PERF_STATS
perf_add_device(*hdevice);
#endif
#endif
return 0;
}
@ -236,7 +238,7 @@ extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = device->read_dcr(DCR_STARTUP_ADDR);
*value = device->read_dcr(DCR_BASE_STARTUP_ADDR);
break;
case VX_CAPS_ISA_FLAGS:
*value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;

View file

@ -2,6 +2,18 @@
`include "VX_gpu_types.vh"
`include "VX_cache_types.vh"
`ifdef EXT_TEX_ENABLE
`include "VX_tex_define.vh"
`endif
`ifdef EXT_RASTER_ENABLE
`include "VX_raster_define.vh"
`endif
`ifdef EXT_ROP_ENABLE
`include "VX_rop_define.vh"
`endif
`IGNORE_WARNINGS_BEGIN
import VX_gpu_types::*;
import VX_cache_types::*;
@ -21,7 +33,7 @@ module VX_cluster #(
VX_perf_memsys_if.slave perf_memsys_total_if,
`endif
input base_dcrs_t base_dcrs,
VX_dcr_write_if.slave dcr_write_if,
`ifdef EXT_TEX_ENABLE
`ifdef PERF_ENABLE
@ -30,7 +42,6 @@ module VX_cluster #(
VX_tex_perf_if.slave perf_tex_total_if,
VX_perf_cache_if.slave perf_tcache_total_if,
`endif
VX_tex_dcr_if.slave tex_dcr_if,
`endif
`ifdef EXT_RASTER_ENABLE
@ -40,7 +51,6 @@ module VX_cluster #(
VX_raster_perf_if.slave perf_raster_total_if,
VX_perf_cache_if.slave perf_rcache_total_if,
`endif
VX_raster_dcr_if.slave raster_dcr_if,
`endif
`ifdef EXT_ROP_ENABLE
@ -50,7 +60,6 @@ module VX_cluster #(
VX_rop_perf_if.slave perf_rop_total_if,
VX_perf_cache_if.slave perf_ocache_total_if,
`endif
VX_rop_dcr_if.slave rop_dcr_if,
`endif
// Memory
@ -88,11 +97,18 @@ module VX_cluster #(
.NUM_LANES (`NUM_THREADS)
) raster_req_if[`NUM_RASTER_UNITS]();
VX_dcr_write_if raster_dcr_write_if();
assign raster_dcr_write_if.valid = dcr_write_if.valid && (dcr_write_if.addr >= `DCR_RASTER_STATE_BEGIN && dcr_write_if.addr < `DCR_RASTER_STATE_END);
assign raster_dcr_write_if.addr = dcr_write_if.addr;
assign raster_dcr_write_if.data = dcr_write_if.data;
// Generate all raster units
for (genvar i = 0; i < `NUM_RASTER_UNITS; ++i) begin
`RESET_RELAY (raster_reset, reset);
`BUFFER_DCR_WRITE_IF(unit_dcr_write_if, raster_dcr_write_if, (`NUM_RASTER_UNITS > 1));
VX_raster_unit #(
.INSTANCE_ID ($sformatf("cluster%0d-raster%0d", CLUSTER_ID, i)),
.INSTANCE_IDX (CLUSTER_ID * `NUM_RASTER_UNITS + i),
@ -109,7 +125,7 @@ module VX_cluster #(
`ifdef PERF_ENABLE
.perf_raster_if(perf_raster_unit_if[i]),
`endif
.raster_dcr_if (raster_dcr_if),
.dcr_write_if (unit_dcr_write_if),
.raster_req_if (raster_req_if[i]),
.cache_req_if (rcache_req_if[i]),
.cache_rsp_if (rcache_rsp_if[i])
@ -175,11 +191,18 @@ module VX_cluster #(
.req_out_if (rop_req_if)
);
VX_dcr_write_if rop_dcr_write_if();
assign rop_dcr_write_if.valid = dcr_write_if.valid && (dcr_write_if.addr >= `DCR_ROP_STATE_BEGIN && dcr_write_if.addr < `DCR_ROP_STATE_END);
assign rop_dcr_write_if.addr = dcr_write_if.addr;
assign rop_dcr_write_if.data = dcr_write_if.data;
// Generate all rop units
for (genvar i = 0; i < `NUM_ROP_UNITS; ++i) begin
`RESET_RELAY (rop_reset, reset);
`BUFFER_DCR_WRITE_IF(unit_dcr_write_if, rop_dcr_write_if, (`NUM_ROP_UNITS > 1));
VX_rop_unit #(
.INSTANCE_ID ($sformatf("cluster%0d-rop%0d", CLUSTER_ID, i)),
.NUM_LANES (`NUM_THREADS)
@ -189,7 +212,7 @@ module VX_cluster #(
`ifdef PERF_ENABLE
.perf_rop_if (perf_rop_unit_if[i]),
`endif
.rop_dcr_if (rop_dcr_if),
.dcr_write_if (unit_dcr_write_if),
.rop_req_if (rop_req_if[i]),
.cache_req_if (ocache_req_if[i]),
.cache_rsp_if (ocache_rsp_if[i])
@ -253,11 +276,18 @@ module VX_cluster #(
.rsp_out_if (tex_rsp_if)
);
VX_dcr_write_if tex_dcr_write_if();
assign tex_dcr_write_if.valid = dcr_write_if.valid && (dcr_write_if.addr >= `DCR_TEX_STATE_BEGIN && dcr_write_if.addr < `DCR_TEX_STATE_END);
assign tex_dcr_write_if.addr = dcr_write_if.addr;
assign tex_dcr_write_if.data = dcr_write_if.data;
// Generate all texture units
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
`RESET_RELAY (tex_reset, reset);
`BUFFER_DCR_WRITE_IF(unit_dcr_write_if, tex_dcr_write_if, (`NUM_TEX_UNITS > 1));
VX_tex_unit #(
.INSTANCE_ID ($sformatf("cluster%0d-tex%0d", CLUSTER_ID, i)),
.NUM_LANES (`NUM_THREADS),
@ -268,7 +298,7 @@ module VX_cluster #(
`ifdef PERF_ENABLE
.perf_tex_if (perf_tex_unit_if[i]),
`endif
.tex_dcr_if (tex_dcr_if),
.dcr_write_if (unit_dcr_write_if),
.tex_req_if (tex_req_if[i]),
.tex_rsp_if (tex_rsp_if[i]),
.cache_req_if (tcache_req_if[i]),
@ -412,6 +442,11 @@ module VX_cluster #(
`UNUSED_VAR (per_socket_sim_ebreak)
`UNUSED_VAR (per_socket_sim_wb_value)
VX_dcr_write_if base_dcr_write_if();
assign base_dcr_write_if.valid = dcr_write_if.valid && (dcr_write_if.addr >= `DCR_BASE_STATE_BEGIN && dcr_write_if.addr < `DCR_BASE_STATE_END);
assign base_dcr_write_if.addr = dcr_write_if.addr;
assign base_dcr_write_if.data = dcr_write_if.data;
wire [`NUM_SOCKETS-1:0] per_socket_busy;
// Generate all sockets
@ -419,7 +454,7 @@ module VX_cluster #(
`RESET_RELAY_EX (socket_reset, reset, (`NUM_SOCKETS > 1));
`BUFFER_EX (socket_base_dcrs, base_dcrs, (`NUM_SOCKETS > 1));
`BUFFER_DCR_WRITE_IF(socket_dcr_write_if, base_dcr_write_if, (`NUM_SOCKETS > 1));
VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
@ -433,7 +468,7 @@ module VX_cluster #(
.perf_memsys_if (perf_memsys_total_if),
`endif
.base_dcrs (socket_base_dcrs),
.dcr_write_if (socket_dcr_write_if),
.dcache_req_if (per_socket_dcache_req_if[i]),
.dcache_rsp_if (per_socket_dcache_rsp_if[i]),

View file

@ -111,7 +111,7 @@
`define RESET_DELAY 6
`ifndef STALL_TIMEOUT
`define STALL_TIMEOUT (10000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
`define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
`endif
`ifndef DEBUG_LEVEL

View file

@ -18,7 +18,7 @@ module VX_core #(
VX_perf_memsys_if.slave perf_memsys_if,
`endif
input base_dcrs_t base_dcrs,
VX_dcr_write_if.slave dcr_write_if,
VX_cache_req_if.master dcache_req_if,
VX_cache_rsp_if.slave dcache_rsp_if,
@ -98,13 +98,22 @@ module VX_core #(
`RESET_RELAY (execute_reset, reset);
`RESET_RELAY (commit_reset, reset);
base_dcrs_t base_dcrs;
VX_dcr_data dcr_data (
.clk (clk),
.reset (reset),
.dcr_write_if(dcr_write_if),
.base_dcrs (base_dcrs)
);
VX_fetch #(
.CORE_ID(CORE_ID)
) fetch (
`SCOPE_BIND_VX_core_fetch
.clk (clk),
.base_dcrs (base_dcrs),
.reset (fetch_reset),
.base_dcrs (base_dcrs),
.icache_req_if (icache_req_if),
.icache_rsp_if (icache_rsp_if),
.wrelease_if (wrelease_if),

View file

@ -6,135 +6,39 @@
import VX_gpu_types::*;
`IGNORE_WARNINGS_END
module VX_dcr_data (
input wire clk,
input wire reset,
VX_dcr_base_if.master dcr_base_if,
// Inputs
VX_dcr_write_if.slave dcr_write_if,
`ifdef EXT_TEX_ENABLE
VX_tex_dcr_if.master tex_dcr_if,
`endif
`ifdef EXT_RASTER_ENABLE
VX_raster_dcr_if.master raster_dcr_if,
`endif
`ifdef EXT_ROP_ENABLE
VX_rop_dcr_if.master rop_dcr_if,
`endif
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
output wire dcr_wr_ready
// Outputs
output base_dcrs_t base_dcrs
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire is_base_dcr = (dcr_wr_addr >= `DCR_BASE_STATE_BEGIN && dcr_wr_addr < `DCR_BASE_STATE_END);
`ifdef EXT_TEX_ENABLE
wire is_tex_dcr = (dcr_wr_addr >= `DCR_TEX_STATE_BEGIN && dcr_wr_addr < `DCR_TEX_STATE_END);
`endif
`ifdef EXT_RASTER_ENABLE
wire is_raster_dcr = (dcr_wr_addr >= `DCR_RASTER_STATE_BEGIN && dcr_wr_addr < `DCR_RASTER_STATE_END);
`endif
`ifdef EXT_ROP_ENABLE
wire is_rop_dcr = (dcr_wr_addr >= `DCR_ROP_STATE_BEGIN && dcr_wr_addr < `DCR_ROP_STATE_END);
`endif
reg dcr_addr_valid;
always @(*) begin
dcr_addr_valid = is_base_dcr;
`ifdef EXT_TEX_ENABLE
if (is_tex_dcr) begin
dcr_addr_valid = 1;
end
`endif
`ifdef EXT_RASTER_ENABLE
if (is_raster_dcr) begin
dcr_addr_valid = 1;
end
`endif
`ifdef EXT_ROP_ENABLE
if (is_rop_dcr) begin
dcr_addr_valid = 1;
end
`endif
end
`RUNTIME_ASSERT(~dcr_wr_valid || dcr_addr_valid, ("%t: *** invalid device configuration register write address: 0x%0h, data=0x%0h", $time, dcr_wr_addr, dcr_wr_data));
assign dcr_wr_ready = 1'b1; // no handshaking needed
///////////////////////////////////////////////////////////////////////////
base_dcrs_t base_dcrs;
base_dcrs_t dcrs;
always @(posedge clk) begin
if (reset) begin
base_dcrs <= '0;
base_dcrs.startup_addr <= `STARTUP_ADDR;
end else if (dcr_wr_valid) begin
case (dcr_wr_addr)
`DCR_STARTUP_ADDR : base_dcrs.startup_addr <= dcr_wr_data[`XLEN-1:0];
`DCR_MPM_CLASS : base_dcrs.mpm_class <= dcr_wr_data[7:0];
if (dcr_write_if.valid) begin
case (dcr_write_if.addr)
`DCR_BASE_STARTUP_ADDR : dcrs.startup_addr <= dcr_write_if.data[`XLEN-1:0];
`DCR_BASE_MPM_CLASS : dcrs.mpm_class <= dcr_write_if.data[7:0];
default:;
endcase
end
end
assign dcr_base_if.data = base_dcrs;
///////////////////////////////////////////////////////////////////////////
`ifdef EXT_TEX_ENABLE
VX_tex_dcr #(
.NUM_STAGES (`TEX_STAGE_COUNT)
) tex_dcr (
.clk (clk),
.reset (reset),
.dcr_wr_valid (dcr_wr_valid && is_tex_dcr),
.dcr_wr_addr (dcr_wr_addr),
.dcr_wr_data (dcr_wr_data),
.tex_dcr_if (tex_dcr_if)
);
`endif
`ifdef EXT_RASTER_ENABLE
VX_raster_dcr raster_dcr (
.clk (clk),
.reset (reset),
.dcr_wr_valid (dcr_wr_valid && is_raster_dcr),
.dcr_wr_addr (dcr_wr_addr),
.dcr_wr_data (dcr_wr_data),
.raster_dcr_if (raster_dcr_if)
);
`endif
`ifdef EXT_ROP_ENABLE
VX_rop_dcr rop_dcr (
.clk (clk),
.reset (reset),
.dcr_wr_valid (dcr_wr_valid && is_rop_dcr),
.dcr_wr_addr (dcr_wr_addr),
.dcr_wr_data (dcr_wr_data),
.rop_dcr_if (rop_dcr_if)
);
`endif
assign base_dcrs = dcrs;
`ifdef DBG_TRACE_CORE_PIPELINE
always @(posedge clk) begin
if (dcr_wr_valid && is_base_dcr) begin
if (dcr_write_if.valid) begin
`TRACE(1, ("%d: base-dcr: state=", $time));
trace_base_dcr(1, dcr_wr_addr);
`TRACE(1, (", data=0x%0h\n", dcr_wr_data));
trace_base_dcr(1, dcr_write_if.addr);
`TRACE(1, (", data=0x%0h\n", dcr_write_if.data));
end
end
`endif

View file

@ -384,4 +384,14 @@
end \
assign ``dst.``field = __reduce_add_r_``dst``field
`define BUFFER_DCR_WRITE_IF(dst, src, ENABLE) \
VX_dcr_write_if dst(); \
VX_pipe_register #(.DATAW(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), .DEPTH(ENABLE)) __``dst ( \
.clk (clk), \
.reset (1'b0), \
.enable (1'b1), \
.data_in ({src.valid, src.addr, src.data}), \
.data_out ({dst.valid, dst.addr, dst.data}) \
)
`endif

View file

@ -158,26 +158,6 @@
.reset_o (dst) \
)
`define BUFFER(dst, src) \
wire [$bits(src)-1:0] dst; \
VX_pipe_register #(.DATAW($bits(src))) __``dst ( \
.clk (clk), \
.reset (1'b0), \
.enable (1'b1), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER_EX(dst, src, ENABLE) \
wire [$bits(src)-1:0] dst; \
VX_pipe_register #(.DATAW($bits(src)), .DEPTH(ENABLE)) __``dst ( \
.clk (clk), \
.reset (1'b0), \
.enable (1'b1), \
.data_in (src), \
.data_out (dst) \
)
`define POP_COUNT(out, in) \
VX_popcount #( \
.N ($bits(in)) \

View file

@ -20,7 +20,7 @@ module VX_socket #(
VX_perf_memsys_if.slave perf_memsys_if,
`endif
input base_dcrs_t base_dcrs,
VX_dcr_write_if.slave dcr_write_if,
VX_cache_req_if.master dcache_req_if,
VX_cache_rsp_if.slave dcache_rsp_if,
@ -310,7 +310,7 @@ module VX_socket #(
`RESET_RELAY_EX (core_reset, reset, (`SOCKET_SIZE > 1));
`BUFFER_EX (core_base_dcrs, base_dcrs, (`SOCKET_SIZE > 1));
`BUFFER_DCR_WRITE_IF(core_dcr_write_if, dcr_write_if, (`SOCKET_SIZE > 1));
VX_core #(
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
@ -324,7 +324,8 @@ module VX_socket #(
.perf_memsys_if (perf_memsys_if),
`endif
.base_dcrs (core_base_dcrs),
.dcr_write_if (core_dcr_write_if),
.dcache_req_if (per_core_dcache_req_if[i]),
.dcache_rsp_if (per_core_dcache_rsp_if[i]),

View file

@ -155,7 +155,8 @@ task trace_base_dcr (
input [`DCR_ADDR_BITS-1:0] addr
);
case (addr)
`DCR_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
`DCR_BASE_STARTUP_ADDR: `TRACE(level, ("STARTUP_ADDR"));
`DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
default: `TRACE(level, ("?"));
endcase
endtask

View file

@ -7,8 +7,8 @@
`define DCR_BITS 12
`define DCR_BASE_STATE_BEGIN 12'h001
`define DCR_STARTUP_ADDR 12'h001
`define DCR_MPM_CLASS 12'h002
`define DCR_BASE_STARTUP_ADDR 12'h001
`define DCR_BASE_MPM_CLASS 12'h002
`define DCR_BASE_STATE_END 12'h003
`define DCR_BASE_STATE(addr) ((addr) - `DCR_BASE_STATE_BEGIN)

View file

@ -45,10 +45,8 @@ module Vortex (
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
output wire dcr_wr_ready,
// Control / status
input wire start,
// Status
output wire busy
);
@ -82,9 +80,6 @@ module Vortex (
assign mem_rsp_if.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_rsp_if.ready;
VX_dcr_base_if #(
) dcr_base_if();
`ifdef EXT_TEX_ENABLE
`ifdef PERF_ENABLE
VX_tex_perf_if perf_tex_if[`NUM_CLUSTERS]();
@ -94,9 +89,6 @@ module Vortex (
`PERF_TEX_ADD (perf_tex_total_if, perf_tex_if, `NUM_CLUSTERS);
`PERF_CACHE_ADD (perf_tcache_total_if, perf_tcache_if, `NUM_CLUSTERS);
`endif
VX_tex_dcr_if #(
.NUM_STAGES (`TEX_STAGE_COUNT)
) tex_dcr_if();
`endif
`ifdef EXT_RASTER_ENABLE
@ -108,7 +100,6 @@ module Vortex (
`PERF_RASTER_ADD (perf_raster_total_if, perf_raster_if, `NUM_CLUSTERS);
`PERF_CACHE_ADD (perf_rcache_total_if, perf_rcache_if, `NUM_CLUSTERS);
`endif
VX_raster_dcr_if raster_dcr_if();
`endif
`ifdef EXT_ROP_ENABLE
@ -120,30 +111,8 @@ module Vortex (
`PERF_ROP_ADD (perf_rop_total_if, perf_rop_if, `NUM_CLUSTERS);
`PERF_CACHE_ADD (perf_ocache_total_if, perf_ocache_if, `NUM_CLUSTERS);
`endif
VX_rop_dcr_if rop_dcr_if();
`endif
`RESET_RELAY (dcr_reset, reset);
VX_dcr_data dcr_data(
.clk (clk),
.reset (dcr_reset),
.dcr_base_if (dcr_base_if),
`ifdef EXT_TEX_ENABLE
.tex_dcr_if (tex_dcr_if),
`endif
`ifdef EXT_RASTER_ENABLE
.raster_dcr_if (raster_dcr_if),
`endif
`ifdef EXT_ROP_ENABLE
.rop_dcr_if (rop_dcr_if),
`endif
.dcr_wr_valid (dcr_wr_valid),
.dcr_wr_addr (dcr_wr_addr),
.dcr_wr_data (dcr_wr_data),
.dcr_wr_ready (dcr_wr_ready)
);
wire sim_ebreak /* verilator public */;
wire [`NUM_REGS-1:0][31:0] sim_wb_value /* verilator public */;
wire [`NUM_CLUSTERS-1:0] per_cluster_sim_ebreak;
@ -153,9 +122,6 @@ module Vortex (
`UNUSED_VAR (per_cluster_sim_ebreak)
`UNUSED_VAR (per_cluster_sim_wb_value)
// also reset device on start
wire reset_or_start = reset || start;
VX_mem_req_if #(
.DATA_WIDTH (L2_MEM_DATA_WIDTH),
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
@ -166,18 +132,19 @@ module Vortex (
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
) per_cluster_mem_rsp_if[`NUM_CLUSTERS]();
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
VX_dcr_write_if dcr_write_if();
assign dcr_write_if.valid = dcr_wr_valid;
assign dcr_write_if.addr = dcr_wr_addr;
assign dcr_write_if.data = dcr_wr_data;
base_dcrs_t base_dcrs;
assign base_dcrs = dcr_base_if.data;
`UNUSED_VAR (base_dcrs)
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
// Generate all clusters
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
`RESET_RELAY_EX (cluster_reset, reset_or_start, (`NUM_CLUSTERS > 1));
`RESET_RELAY_EX (cluster_reset, reset, (`NUM_CLUSTERS > 1));
`BUFFER_EX (cluster_base_dcrs, base_dcrs, (`NUM_CLUSTERS > 1));
`BUFFER_DCR_WRITE_IF(cluster_dcr_write_if, dcr_write_if, (`NUM_CLUSTERS > 1));
VX_cluster #(
.CLUSTER_ID (i)
@ -192,7 +159,7 @@ module Vortex (
.perf_memsys_total_if (perf_memsys_total_if),
`endif
.base_dcrs (cluster_base_dcrs),
.dcr_write_if (cluster_dcr_write_if),
`ifdef EXT_TEX_ENABLE
`ifdef PERF_ENABLE
@ -201,7 +168,6 @@ module Vortex (
.perf_tex_total_if (perf_tex_total_if),
.perf_tcache_total_if (perf_tcache_total_if),
`endif
.tex_dcr_if (tex_dcr_if),
`endif
`ifdef EXT_RASTER_ENABLE
@ -211,7 +177,6 @@ module Vortex (
.perf_raster_total_if (perf_raster_total_if),
.perf_rcache_total_if (perf_rcache_total_if),
`endif
.raster_dcr_if (raster_dcr_if),
`endif
`ifdef EXT_ROP_ENABLE
@ -221,7 +186,6 @@ module Vortex (
.perf_rop_total_if (perf_rop_total_if),
.perf_ocache_total_if (perf_ocache_total_if),
`endif
.rop_dcr_if (rop_dcr_if),
`endif
.mem_req_if (per_cluster_mem_req_if[i]),
@ -236,7 +200,7 @@ module Vortex (
assign busy = (| per_cluster_busy);
`RESET_RELAY (l3_reset, reset_or_start);
`RESET_RELAY (l3_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ("l3cache"),

View file

@ -66,10 +66,8 @@ module Vortex_axi #(
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
output wire dcr_wr_ready,
// Control / status
input wire start,
// Status
output wire busy
);
wire mem_req_valid;
@ -174,9 +172,7 @@ module Vortex_axi #(
.dcr_wr_valid (dcr_wr_valid),
.dcr_wr_addr (dcr_wr_addr),
.dcr_wr_data (dcr_wr_data),
.dcr_wr_ready (dcr_wr_ready),
.start (start),
.busy (busy)
);

View file

@ -299,7 +299,6 @@ end
wire cmd_mem_rd_done;
reg cmd_mem_wr_done;
wire cmd_dcr_wr_done;
wire cmd_run_done;
reg vx_started;
@ -374,12 +373,10 @@ always @(posedge clk) begin
end
STATE_DCR_WRITE: begin
if (cmd_dcr_wr_done) begin
state <= STATE_IDLE;
`ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: STATE IDLE\n", $time));
`endif
end
state <= STATE_IDLE;
`ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: STATE IDLE\n", $time));
`endif
end
STATE_RUN: begin
@ -873,13 +870,12 @@ assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_
wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state);
wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr;
wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data;
wire vx_dcr_wr_ready;
Vortex vortex (
`SCOPE_BIND_afu_vortex
.clk (clk),
.reset (reset),
.reset (reset || vx_start),
// Memory request
.mem_req_valid (vx_mem_req_valid),
@ -900,14 +896,11 @@ Vortex vortex (
.dcr_wr_valid (vx_dcr_wr_valid),
.dcr_wr_addr (vx_dcr_wr_addr),
.dcr_wr_data (vx_dcr_wr_data),
.dcr_wr_ready (vx_dcr_wr_ready),
// Control / status
.start (vx_start),
// Status
.busy (vx_busy)
);
assign cmd_dcr_wr_done = vx_dcr_wr_ready;
assign cmd_run_done = !vx_busy;
// COUT HANDLING //////////////////////////////////////////////////////////////

View file

@ -1,20 +0,0 @@
`include "VX_define.vh"
`include "VX_gpu_types.vh"
`IGNORE_WARNINGS_BEGIN
import VX_gpu_types::*;
`IGNORE_WARNINGS_END
interface VX_dcr_base_if ();
base_dcrs_t data;
modport master (
output data
);
modport slave (
input data
);
endinterface

View file

@ -0,0 +1,21 @@
`include "VX_define.vh"
interface VX_dcr_write_if ();
wire valid;
wire [`VX_DCR_ADDR_WIDTH-1:0] addr;
wire [`VX_DCR_DATA_WIDTH-1:0] data;
modport master (
output valid,
output addr,
output data
);
modport slave (
input valid,
input addr,
input data
);
endinterface

View file

@ -7,52 +7,50 @@ module VX_raster_dcr #(
input wire reset,
// Inputs
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
VX_dcr_write_if.slave dcr_write_if,
// Output
VX_raster_dcr_if.master raster_dcr_if
output raster_dcrs_t raster_dcrs
);
`UNUSED_VAR (reset)
// DCR registers
raster_dcrs_t dcrs;
// DCRs write
always @(posedge clk) begin
if (reset) begin
dcrs <= '0;
end else if (dcr_wr_valid) begin
case (dcr_wr_addr)
if (dcr_write_if.valid) begin
case (dcr_write_if.addr)
`DCR_RASTER_TBUF_ADDR: begin
dcrs.tbuf_addr <= dcr_wr_data[`RASTER_DCR_DATA_BITS-1:0];
dcrs.tbuf_addr <= dcr_write_if.data[`RASTER_DCR_DATA_BITS-1:0];
end
`DCR_RASTER_TILE_COUNT: begin
dcrs.tile_count <= dcr_wr_data[`RASTER_TILE_BITS-1:0];
dcrs.tile_count <= dcr_write_if.data[`RASTER_TILE_BITS-1:0];
end
`DCR_RASTER_PBUF_ADDR: begin
dcrs.pbuf_addr <= dcr_wr_data[`RASTER_DCR_DATA_BITS-1:0];
dcrs.pbuf_addr <= dcr_write_if.data[`RASTER_DCR_DATA_BITS-1:0];
end
`DCR_RASTER_PBUF_STRIDE: begin
dcrs.pbuf_stride <= dcr_wr_data[`RASTER_STRIDE_BITS-1:0];
dcrs.pbuf_stride <= dcr_write_if.data[`RASTER_STRIDE_BITS-1:0];
end
`DCR_RASTER_DST_SIZE: begin
dcrs.dst_width <= dcr_wr_data[0 +: `RASTER_DIM_BITS];
dcrs.dst_height <= dcr_wr_data[16 +: `RASTER_DIM_BITS];
dcrs.dst_width <= dcr_write_if.data[0 +: `RASTER_DIM_BITS];
dcrs.dst_height <= dcr_write_if.data[16 +: `RASTER_DIM_BITS];
end
endcase
end
end
// DCRs read
assign raster_dcr_if.data = dcrs;
assign raster_dcrs = dcrs;
`ifdef DBG_TRACE_RASTER
always @(posedge clk) begin
if (dcr_wr_valid) begin
if (dcr_write_if.valid) begin
`TRACE(1, ("%d: raster-dcr: state=", $time));
trace_raster_state(1, dcr_wr_addr);
`TRACE(1, (", data=0x%0h\n", dcr_wr_data));
trace_raster_state(1, dcr_write_if.addr);
`TRACE(1, (", data=0x%0h\n", dcr_write_if.data));
end
end
`endif

View file

@ -1,15 +0,0 @@
`include "VX_raster_define.vh"
interface VX_raster_dcr_if ();
raster_dcrs_t data;
modport master (
output data
);
modport slave (
input data
);
endinterface

View file

@ -9,8 +9,7 @@ module VX_raster_unit #(
parameter BLOCK_LOGSIZE = 2, // block log size
parameter MEM_FIFO_DEPTH = 4, // memory queue size
parameter QUAD_FIFO_DEPTH = 4, // quad queue size
parameter OUTPUT_QUADS = 4 // number of output quads
parameter OUTPUT_QUADS = 4 // number of output quads
) (
input wire clk,
input wire reset,
@ -25,7 +24,7 @@ module VX_raster_unit #(
VX_cache_rsp_if.slave cache_rsp_if,
// Inputs
VX_raster_dcr_if.slave raster_dcr_if,
VX_dcr_write_if.slave dcr_write_if,
// Outputs
VX_raster_req_if.master raster_req_if
@ -36,10 +35,19 @@ module VX_raster_unit #(
localparam PRIM_DATA_WIDTH = 2 * `RASTER_DIM_BITS + `RASTER_PID_BITS + 9 * `RASTER_DATA_BITS + 3 * `RASTER_DATA_BITS;
`STATIC_ASSERT(TILE_LOGSIZE > BLOCK_LOGSIZE, ("invalid parameter"))
// DCRs
raster_dcrs_t raster_dcrs;
assign raster_dcrs = raster_dcr_if.data;
`UNUSED_VAR (raster_dcrs)
VX_raster_dcr raster_dcr (
.clk (clk),
.reset (reset),
.dcr_write_if(dcr_write_if),
.raster_dcrs(raster_dcrs)
);
///////////////////////////////////////////////////////////////////////////
// Output from the request
wire [`RASTER_DIM_BITS-1:0] mem_x_loc;

View file

@ -5,14 +5,14 @@ module VX_rop_dcr (
input wire reset,
// Inputs
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
VX_dcr_write_if.slave dcr_write_if,
// Output
VX_rop_dcr_if.master rop_dcr_if
output rop_dcrs_t rop_dcrs
);
`UNUSED_VAR (reset)
`define DEPTH_TEST_ENABLE(func, writemask) \
~((func == `ROP_DEPTH_FUNC_ALWAYS) && ~writemask)
@ -34,98 +34,96 @@ module VX_rop_dcr (
// DCRs write
always @(posedge clk) begin
if (reset) begin
dcrs <= '0;
end else if (dcr_wr_valid) begin
case (dcr_wr_addr)
if (dcr_write_if.valid) begin
case (dcr_write_if.addr)
`DCR_ROP_CBUF_ADDR: begin
dcrs.cbuf_addr <= dcr_wr_data[31:0];
dcrs.cbuf_addr <= dcr_write_if.data[31:0];
end
`DCR_ROP_CBUF_PITCH: begin
dcrs.cbuf_pitch <= dcr_wr_data[`ROP_PITCH_BITS-1:0];
dcrs.cbuf_pitch <= dcr_write_if.data[`ROP_PITCH_BITS-1:0];
end
`DCR_ROP_CBUF_WRITEMASK: begin
dcrs.cbuf_writemask <= dcr_wr_data[3:0];
dcrs.cbuf_writemask <= dcr_write_if.data[3:0];
end
`DCR_ROP_ZBUF_ADDR: begin
dcrs.zbuf_addr <= dcr_wr_data[31:0];
dcrs.zbuf_addr <= dcr_write_if.data[31:0];
end
`DCR_ROP_ZBUF_PITCH: begin
dcrs.zbuf_pitch <= dcr_wr_data[`ROP_PITCH_BITS-1:0];
dcrs.zbuf_pitch <= dcr_write_if.data[`ROP_PITCH_BITS-1:0];
end
`DCR_ROP_DEPTH_FUNC: begin
dcrs.depth_func <= dcr_wr_data[0 +: `ROP_DEPTH_FUNC_BITS];
dcrs.depth_enable <= `DEPTH_TEST_ENABLE(dcr_wr_data[0 +: `ROP_DEPTH_FUNC_BITS], dcrs.depth_writemask);
dcrs.depth_func <= dcr_write_if.data[0 +: `ROP_DEPTH_FUNC_BITS];
dcrs.depth_enable <= `DEPTH_TEST_ENABLE(dcr_write_if.data[0 +: `ROP_DEPTH_FUNC_BITS], dcrs.depth_writemask);
end
`DCR_ROP_DEPTH_WRITEMASK: begin
dcrs.depth_writemask <= dcr_wr_data[0];
dcrs.depth_enable <= `DEPTH_TEST_ENABLE(dcrs.depth_func, dcr_wr_data[0]);
dcrs.depth_writemask <= dcr_write_if.data[0];
dcrs.depth_enable <= `DEPTH_TEST_ENABLE(dcrs.depth_func, dcr_write_if.data[0]);
end
`DCR_ROP_STENCIL_FUNC: begin
dcrs.stencil_func[0] <= dcr_wr_data[0 +: `ROP_DEPTH_FUNC_BITS];
dcrs.stencil_func[1] <= dcr_wr_data[16 +: `ROP_DEPTH_FUNC_BITS];
dcrs.stencil_enable[0] <= `STENCIL_TEST_ENABLE(dcr_wr_data[0 +: `ROP_DEPTH_FUNC_BITS], dcrs.stencil_zpass[0], dcrs.stencil_zfail[0]);
dcrs.stencil_enable[1] <= `STENCIL_TEST_ENABLE(dcr_wr_data[16 +: `ROP_DEPTH_FUNC_BITS], dcrs.stencil_zpass[1], dcrs.stencil_zfail[1]);
dcrs.stencil_func[0] <= dcr_write_if.data[0 +: `ROP_DEPTH_FUNC_BITS];
dcrs.stencil_func[1] <= dcr_write_if.data[16 +: `ROP_DEPTH_FUNC_BITS];
dcrs.stencil_enable[0] <= `STENCIL_TEST_ENABLE(dcr_write_if.data[0 +: `ROP_DEPTH_FUNC_BITS], dcrs.stencil_zpass[0], dcrs.stencil_zfail[0]);
dcrs.stencil_enable[1] <= `STENCIL_TEST_ENABLE(dcr_write_if.data[16 +: `ROP_DEPTH_FUNC_BITS], dcrs.stencil_zpass[1], dcrs.stencil_zfail[1]);
end
`DCR_ROP_STENCIL_ZPASS: begin
dcrs.stencil_zpass[0] <= dcr_wr_data[0 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_zpass[1] <= dcr_wr_data[16 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_enable[0] <= `STENCIL_TEST_ENABLE(dcrs.stencil_func[0], dcr_wr_data[0 +: `ROP_STENCIL_OP_BITS], dcrs.stencil_zfail[0]);
dcrs.stencil_enable[1] <= `STENCIL_TEST_ENABLE(dcrs.stencil_func[1], dcr_wr_data[16 +: `ROP_STENCIL_OP_BITS], dcrs.stencil_zfail[1]);
dcrs.stencil_zpass[0] <= dcr_write_if.data[0 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_zpass[1] <= dcr_write_if.data[16 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_enable[0] <= `STENCIL_TEST_ENABLE(dcrs.stencil_func[0], dcr_write_if.data[0 +: `ROP_STENCIL_OP_BITS], dcrs.stencil_zfail[0]);
dcrs.stencil_enable[1] <= `STENCIL_TEST_ENABLE(dcrs.stencil_func[1], dcr_write_if.data[16 +: `ROP_STENCIL_OP_BITS], dcrs.stencil_zfail[1]);
end
`DCR_ROP_STENCIL_ZFAIL: begin
dcrs.stencil_zfail[0] <= dcr_wr_data[0 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_zfail[1] <= dcr_wr_data[16 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_enable[0] <= `STENCIL_TEST_ENABLE(dcrs.stencil_func[0], dcrs.stencil_zpass[0], dcr_wr_data[0 +: `ROP_STENCIL_OP_BITS]);
dcrs.stencil_enable[1] <= `STENCIL_TEST_ENABLE(dcrs.stencil_func[1], dcrs.stencil_zpass[1], dcr_wr_data[16 +: `ROP_STENCIL_OP_BITS]);
dcrs.stencil_zfail[0] <= dcr_write_if.data[0 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_zfail[1] <= dcr_write_if.data[16 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_enable[0] <= `STENCIL_TEST_ENABLE(dcrs.stencil_func[0], dcrs.stencil_zpass[0], dcr_write_if.data[0 +: `ROP_STENCIL_OP_BITS]);
dcrs.stencil_enable[1] <= `STENCIL_TEST_ENABLE(dcrs.stencil_func[1], dcrs.stencil_zpass[1], dcr_write_if.data[16 +: `ROP_STENCIL_OP_BITS]);
end
`DCR_ROP_STENCIL_FAIL: begin
dcrs.stencil_fail[0] <= dcr_wr_data[0 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_fail[1] <= dcr_wr_data[16 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_fail[0] <= dcr_write_if.data[0 +: `ROP_STENCIL_OP_BITS];
dcrs.stencil_fail[1] <= dcr_write_if.data[16 +: `ROP_STENCIL_OP_BITS];
end
`DCR_ROP_STENCIL_REF: begin
dcrs.stencil_ref[0] <= dcr_wr_data[0 +: `ROP_STENCIL_BITS];
dcrs.stencil_ref[1] <= dcr_wr_data[16 +: `ROP_STENCIL_BITS];
dcrs.stencil_ref[0] <= dcr_write_if.data[0 +: `ROP_STENCIL_BITS];
dcrs.stencil_ref[1] <= dcr_write_if.data[16 +: `ROP_STENCIL_BITS];
end
`DCR_ROP_STENCIL_MASK: begin
dcrs.stencil_mask[0] <= dcr_wr_data[0 +: `ROP_STENCIL_BITS];
dcrs.stencil_mask[1] <= dcr_wr_data[16 +: `ROP_STENCIL_BITS];
dcrs.stencil_mask[0] <= dcr_write_if.data[0 +: `ROP_STENCIL_BITS];
dcrs.stencil_mask[1] <= dcr_write_if.data[16 +: `ROP_STENCIL_BITS];
end
`DCR_ROP_STENCIL_WRITEMASK: begin
dcrs.stencil_writemask[0] <= dcr_wr_data[0 +: `ROP_STENCIL_BITS];
dcrs.stencil_writemask[1] <= dcr_wr_data[16 +: `ROP_STENCIL_BITS];
dcrs.stencil_writemask[0] <= dcr_write_if.data[0 +: `ROP_STENCIL_BITS];
dcrs.stencil_writemask[1] <= dcr_write_if.data[16 +: `ROP_STENCIL_BITS];
end
`DCR_ROP_BLEND_MODE: begin
dcrs.blend_mode_rgb <= dcr_wr_data[0 +: `ROP_BLEND_MODE_BITS];
dcrs.blend_mode_a <= dcr_wr_data[16 +: `ROP_BLEND_MODE_BITS];
dcrs.blend_enable <= `BLEND_ENABLE(dcr_wr_data[0 +: `ROP_BLEND_MODE_BITS], dcr_wr_data[16 +: `ROP_BLEND_MODE_BITS], dcrs.blend_src_rgb, dcrs.blend_src_a, dcrs.blend_dst_rgb, dcrs.blend_dst_a);
dcrs.blend_mode_rgb <= dcr_write_if.data[0 +: `ROP_BLEND_MODE_BITS];
dcrs.blend_mode_a <= dcr_write_if.data[16 +: `ROP_BLEND_MODE_BITS];
dcrs.blend_enable <= `BLEND_ENABLE(dcr_write_if.data[0 +: `ROP_BLEND_MODE_BITS], dcr_write_if.data[16 +: `ROP_BLEND_MODE_BITS], dcrs.blend_src_rgb, dcrs.blend_src_a, dcrs.blend_dst_rgb, dcrs.blend_dst_a);
end
`DCR_ROP_BLEND_FUNC: begin
dcrs.blend_src_rgb <= dcr_wr_data[0 +: `ROP_BLEND_FUNC_BITS];
dcrs.blend_src_a <= dcr_wr_data[8 +: `ROP_BLEND_FUNC_BITS];
dcrs.blend_dst_rgb <= dcr_wr_data[16 +: `ROP_BLEND_FUNC_BITS];
dcrs.blend_dst_a <= dcr_wr_data[24 +: `ROP_BLEND_FUNC_BITS];
dcrs.blend_enable <= `BLEND_ENABLE(dcrs.blend_mode_rgb, dcrs.blend_mode_a, dcr_wr_data[0 +: `ROP_BLEND_FUNC_BITS], dcr_wr_data[8 +: `ROP_BLEND_FUNC_BITS], dcr_wr_data[16 +: `ROP_BLEND_FUNC_BITS], dcr_wr_data[24 +: `ROP_BLEND_FUNC_BITS]);
dcrs.blend_src_rgb <= dcr_write_if.data[0 +: `ROP_BLEND_FUNC_BITS];
dcrs.blend_src_a <= dcr_write_if.data[8 +: `ROP_BLEND_FUNC_BITS];
dcrs.blend_dst_rgb <= dcr_write_if.data[16 +: `ROP_BLEND_FUNC_BITS];
dcrs.blend_dst_a <= dcr_write_if.data[24 +: `ROP_BLEND_FUNC_BITS];
dcrs.blend_enable <= `BLEND_ENABLE(dcrs.blend_mode_rgb, dcrs.blend_mode_a, dcr_write_if.data[0 +: `ROP_BLEND_FUNC_BITS], dcr_write_if.data[8 +: `ROP_BLEND_FUNC_BITS], dcr_write_if.data[16 +: `ROP_BLEND_FUNC_BITS], dcr_write_if.data[24 +: `ROP_BLEND_FUNC_BITS]);
end
`DCR_ROP_BLEND_CONST: begin
dcrs.blend_const <= dcr_wr_data[0 +: 32];
dcrs.blend_const <= dcr_write_if.data[0 +: 32];
end
`DCR_ROP_LOGIC_OP: begin
dcrs.logic_op <= dcr_wr_data[0 +: `ROP_LOGIC_OP_BITS];
dcrs.logic_op <= dcr_write_if.data[0 +: `ROP_LOGIC_OP_BITS];
end
endcase
end
end
// DCRs read
assign rop_dcr_if.data = dcrs;
assign rop_dcrs = dcrs;
`ifdef DBG_TRACE_ROP
always @(posedge clk) begin
if (dcr_wr_valid) begin
if (dcr_write_if.valid) begin
`TRACE(1, ("%d: rop-dcr: state=", $time));
trace_rop_state(1, dcr_wr_addr);
`TRACE(1, (", data=0x%0h\n", dcr_wr_data));
trace_rop_state(1, dcr_write_if.addr);
`TRACE(1, (", data=0x%0h\n", dcr_write_if.data));
end
end
`endif

View file

@ -1,15 +0,0 @@
`include "VX_rop_define.vh"
interface VX_rop_dcr_if ();
rop_dcrs_t data;
modport master (
output data
);
modport slave (
input data
);
endinterface

View file

@ -9,21 +9,34 @@ module VX_rop_unit #(
// PERF
`ifdef PERF_ENABLE
VX_rop_perf_if.master perf_rop_if,
VX_rop_perf_if.master perf_rop_if,
`endif
// Memory interface
VX_cache_req_if.master cache_req_if,
VX_cache_rsp_if.slave cache_rsp_if,
VX_cache_req_if.master cache_req_if,
VX_cache_rsp_if.slave cache_rsp_if,
// Inputs
VX_rop_dcr_if.slave rop_dcr_if,
VX_rop_req_if.slave rop_req_if
VX_dcr_write_if.slave dcr_write_if,
VX_rop_req_if.slave rop_req_if
);
localparam MEM_TAG_WIDTH = NUM_LANES * (`ROP_DIM_BITS + `ROP_DIM_BITS + 32 + `ROP_DEPTH_BITS + 1);
localparam DS_TAG_WIDTH = NUM_LANES * (`ROP_DIM_BITS + `ROP_DIM_BITS + 1 + 1 + 32);
localparam BLEND_TAG_WIDTH = NUM_LANES * (`ROP_DIM_BITS + `ROP_DIM_BITS + 1);
// DCRs
rop_dcrs_t rop_dcrs;
VX_rop_dcr rop_dcr (
.clk (clk),
.reset (reset),
.dcr_write_if(dcr_write_if),
.rop_dcrs (rop_dcrs)
);
///////////////////////////////////////////////////////////////////////////
wire mem_req_valid, mem_req_valid_r;
wire [NUM_LANES-1:0] mem_req_mask, mem_req_mask_r;
wire [NUM_LANES-1:0] mem_req_ds_pass, mem_req_ds_pass_r;
@ -45,9 +58,6 @@ module VX_rop_unit #(
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_ready;
rop_dcrs_t dcrs;
assign dcrs = rop_dcr_if.data;
VX_rop_mem #(
.INSTANCE_ID (INSTANCE_ID),
.NUM_LANES (NUM_LANES),
@ -56,7 +66,7 @@ module VX_rop_unit #(
.clk (clk),
.reset (reset),
.dcrs (dcrs),
.dcrs (rop_dcrs),
.cache_req_if (cache_req_if),
.cache_rsp_if (cache_rsp_if),
@ -110,7 +120,7 @@ module VX_rop_unit #(
.clk (clk),
.reset (reset),
.dcrs (dcrs),
.dcrs (rop_dcrs),
.valid_in (ds_valid_in),
.tag_in (ds_tag_in),
@ -151,7 +161,7 @@ module VX_rop_unit #(
.clk (clk),
.reset (reset),
.dcrs (dcrs),
.dcrs (rop_dcrs),
.valid_in (blend_valid_in),
.tag_in (blend_tag_in),
@ -168,20 +178,20 @@ module VX_rop_unit #(
///////////////////////////////////////////////////////////////////////////
wire color_writeen = (dcrs.cbuf_writemask != 0);
wire color_writeen = (rop_dcrs.cbuf_writemask != 0);
wire depth_enable = dcrs.depth_enable;
wire depth_writeen = dcrs.depth_enable && (dcrs.depth_writemask != 0);
wire depth_enable = rop_dcrs.depth_enable;
wire depth_writeen = rop_dcrs.depth_enable && (rop_dcrs.depth_writemask != 0);
wire stencil_enable = (| dcrs.stencil_enable);
wire stencil_writeen = (dcrs.stencil_enable[0] && (dcrs.stencil_writemask[0] != 0))
| (dcrs.stencil_enable[1] && (dcrs.stencil_writemask[1] != 0));
wire stencil_enable = (| rop_dcrs.stencil_enable);
wire stencil_writeen = (rop_dcrs.stencil_enable[0] && (rop_dcrs.stencil_writemask[0] != 0))
| (rop_dcrs.stencil_enable[1] && (rop_dcrs.stencil_writemask[1] != 0));
wire ds_enable = depth_enable | stencil_enable;
wire ds_writeen = depth_writeen | stencil_writeen;
wire blend_enable = dcrs.blend_enable;
wire blend_writeen = dcrs.blend_enable & color_writeen;
wire blend_enable = rop_dcrs.blend_enable;
wire blend_writeen = rop_dcrs.blend_enable & color_writeen;
wire mem_readen = ds_enable | blend_enable;

View file

@ -7,57 +7,51 @@ module VX_tex_dcr #(
input wire reset,
// Inputs
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
VX_dcr_write_if.slave dcr_write_if,
// Output
VX_tex_dcr_if.master tex_dcr_if
input wire [`TEX_STAGE_BITS-1:0] stage,
output tex_dcrs_t tex_dcrs
);
`UNUSED_VAR (reset)
// DCR registers
reg [$clog2(NUM_STAGES)-1:0] tex_stage;
tex_dcrs_t tex_dcrs [NUM_STAGES-1:0];
reg [$clog2(NUM_STAGES)-1:0] dcr_stage;
tex_dcrs_t dcrs [NUM_STAGES-1:0];
// DCRs write
always @(posedge clk) begin
if (reset) begin
tex_stage <= 0;
for (integer i = 0; i < NUM_STAGES; ++i) begin
tex_dcrs[i] <= '0;
end
end else if (dcr_wr_valid) begin
case (dcr_wr_addr)
if (dcr_write_if.valid) begin
case (dcr_write_if.addr)
`DCR_TEX_STAGE: begin
tex_stage <= dcr_wr_data[$clog2(NUM_STAGES)-1:0];
dcr_stage <= dcr_write_if.data[$clog2(NUM_STAGES)-1:0];
end
`DCR_TEX_ADDR: begin
tex_dcrs[tex_stage].baseaddr <= dcr_wr_data[`TEX_ADDR_BITS-1:0];
dcrs[dcr_stage].baseaddr <= dcr_write_if.data[`TEX_ADDR_BITS-1:0];
end
`DCR_TEX_FORMAT: begin
tex_dcrs[tex_stage].format <= dcr_wr_data[`TEX_FORMAT_BITS-1:0];
dcrs[dcr_stage].format <= dcr_write_if.data[`TEX_FORMAT_BITS-1:0];
end
`DCR_TEX_FILTER: begin
tex_dcrs[tex_stage].filter <= dcr_wr_data[`TEX_FILTER_BITS-1:0];
dcrs[dcr_stage].filter <= dcr_write_if.data[`TEX_FILTER_BITS-1:0];
end
`DCR_TEX_WRAP: begin
tex_dcrs[tex_stage].wraps[0] <= dcr_wr_data[0 +: `TEX_WRAP_BITS];
tex_dcrs[tex_stage].wraps[1] <= dcr_wr_data[16 +: `TEX_WRAP_BITS];
dcrs[dcr_stage].wraps[0] <= dcr_write_if.data[0 +: `TEX_WRAP_BITS];
dcrs[dcr_stage].wraps[1] <= dcr_write_if.data[16 +: `TEX_WRAP_BITS];
end
`DCR_TEX_LOGDIM: begin
tex_dcrs[tex_stage].logdims[0] <= dcr_wr_data[0 +: `TEX_LOD_BITS];
tex_dcrs[tex_stage].logdims[1] <= dcr_wr_data[16 +: `TEX_LOD_BITS];
dcrs[dcr_stage].logdims[0] <= dcr_write_if.data[0 +: `TEX_LOD_BITS];
dcrs[dcr_stage].logdims[1] <= dcr_write_if.data[16 +: `TEX_LOD_BITS];
end
default: begin
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
`IGNORE_WARNINGS_BEGIN
if (dcr_wr_addr == `DCR_TEX_MIPOFF(j)) begin
if (dcr_write_if.addr == `DCR_TEX_MIPOFF(j)) begin
`IGNORE_WARNINGS_END
tex_dcrs[tex_stage].mipoff[j] <= dcr_wr_data[`TEX_MIPOFF_BITS-1:0];
dcrs[dcr_stage].mipoff[j] <= dcr_write_if.data[`TEX_MIPOFF_BITS-1:0];
end
end
end
@ -66,14 +60,14 @@ module VX_tex_dcr #(
end
// DCRs read
assign tex_dcr_if.data = tex_dcrs;
assign tex_dcrs = dcrs[stage];
`ifdef DBG_TRACE_TEX
always @(posedge clk) begin
if (dcr_wr_valid) begin
`TRACE(1, ("%d: tex-dcr: stage=%0d, state=", $time, tex_stage));
trace_tex_dcr(1, dcr_wr_addr);
`TRACE(1, (", data=0x%0h\n", dcr_wr_data));
if (dcr_write_if.valid) begin
`TRACE(1, ("%d: tex-dcr: stage=%0d, state=", $time, dcr_stage));
trace_tex_dcr(1, dcr_write_if.addr);
`TRACE(1, (", data=0x%0h\n", dcr_write_if.data));
end
end
`endif

View file

@ -1,16 +0,0 @@
`include "VX_tex_define.vh"
interface VX_tex_dcr_if #(
parameter NUM_STAGES = 1
);
tex_dcrs_t data [NUM_STAGES-1:0];
modport master (
output data
);
modport slave (
input data
);
endinterface

View file

@ -18,7 +18,7 @@ module VX_tex_unit #(
VX_cache_rsp_if.slave cache_rsp_if,
// Inputs
VX_tex_dcr_if.slave tex_dcr_if,
VX_dcr_write_if.slave dcr_write_if,
VX_tex_req_if.slave tex_req_if,
// Outputs
@ -27,6 +27,20 @@ module VX_tex_unit #(
localparam BLEND_FRAC_W = (2 * NUM_LANES * `TEX_BLEND_FRAC);
// DCRs
tex_dcrs_t tex_dcrs;
VX_tex_dcr #(
.NUM_STAGES (`TEX_STAGE_COUNT)
) tex_dcr (
.clk (clk),
.reset (reset),
.dcr_write_if(dcr_write_if),
.stage (tex_req_if.stage),
.tex_dcrs (tex_dcrs)
);
// Texture stage select
wire req_valid;
@ -42,9 +56,6 @@ module VX_tex_unit #(
wire [TAG_WIDTH-1:0] req_tag;
wire req_ready;
tex_dcrs_t tex_dcrs;
assign tex_dcrs = tex_dcr_if.data[tex_req_if.stage];
for (genvar i = 0; i < NUM_LANES; ++i) begin
assign sel_miplevel[i] = tex_req_if.lod[i][`TEX_LOD_BITS-1:0];
assign sel_mipoff[i] = tex_dcrs.mipoff[sel_miplevel[i]];

View file

@ -599,10 +599,9 @@ private:
void eval_dcr_bus(bool clk) {
if (!clk) {
dcr_wr_ready_ = device_->dcr_wr_ready;
return;
}
if (device_->dcr_wr_valid && dcr_wr_ready_) {
if (device_->dcr_wr_valid) {
device_->dcr_wr_valid = 0;
}
}
@ -658,8 +657,6 @@ private:
bool mem_wr_rsp_active_;
bool mem_wr_rsp_ready_;
bool dcr_wr_ready_;
RAM *ram_;
ramulator::Gem5Wrapper* dram_;

View file

@ -463,7 +463,7 @@ uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
if ((addr >= CSR_MPM_BASE && addr < (CSR_MPM_BASE + 32))
|| (addr >= CSR_MPM_BASE_H && addr < (CSR_MPM_BASE_H + 32))) {
// user-defined MPM CSRs
auto perf_class = dcrs_.base_dcrs.read(DCR_MPM_CLASS);
auto perf_class = dcrs_.base_dcrs.read(DCR_BASE_MPM_CLASS);
switch (perf_class) {
case DCR_MPM_CLASS_NONE:
break;

View file

@ -2,19 +2,6 @@
using namespace vortex;
DCRS::DCRS() {
this->clear();
}
DCRS::~DCRS() {}
void DCRS::clear() {
base_dcrs.clear();
tex_dcrs.clear();
raster_dcrs.clear();
rop_dcrs.clear();
}
void DCRS::write(uint32_t addr, uint64_t value) {
if (addr >= DCR_BASE_STATE_BEGIN
&& addr < DCR_BASE_STATE_END) {

View file

@ -12,17 +12,6 @@ private:
std::array<uint32_t, DCR_BASE_STATE_COUNT> states_;
public:
BaseDCRS() {
this->clear();
}
void clear() {
for (auto& state : states_) {
state = 0;
}
this->write(DCR_STARTUP_ADDR, STARTUP_ADDR);
}
uint32_t read(uint32_t addr) const {
uint32_t state = DCR_BASE_STATE(addr);
return states_.at(state);
@ -36,17 +25,12 @@ public:
class DCRS {
public:
DCRS();
~DCRS();
void clear();
void write(uint32_t addr, uint64_t value);
BaseDCRS base_dcrs;
TexUnit::DCRS tex_dcrs;
BaseDCRS base_dcrs;
TexUnit::DCRS tex_dcrs;
RasterUnit::DCRS raster_dcrs;
RopUnit::DCRS rop_dcrs;
RopUnit::DCRS rop_dcrs;
};
}

View file

@ -81,16 +81,6 @@ public:
std::array<uint32_t, DCR_RASTER_STATE_COUNT> states_;
public:
DCRS() {
this->clear();
}
void clear() {
for (auto& state : states_) {
state = 0;
}
}
uint32_t read(uint32_t addr) const {
uint32_t state = DCR_RASTER_STATE(addr);
return states_.at(state);

View file

@ -46,16 +46,6 @@ public:
std::array<uint32_t, DCR_ROP_STATE_COUNT> states_;
public:
DCRS() {
this->clear();
}
void clear() {
for (auto& state : states_) {
state = 0;
}
}
uint32_t read(uint32_t addr) const {
uint32_t state = DCR_ROP_STATE(addr);
return states_.at(state);

View file

@ -26,19 +26,6 @@ public:
uint32_t stage_;
public:
DCRS() {
this->clear();
}
void clear() {
stage_ = 0;
for (auto& states : states_) {
for (auto& state : states) {
state = 0;
}
}
}
uint32_t read(uint32_t stage, uint32_t addr) const {
uint32_t state = DCR_TEX_STATE(addr-1);
return states_.at(stage).at(state);

View file

@ -23,7 +23,7 @@ Warp::Warp(Core *core, uint32_t warp_id)
void Warp::clear() {
active_ = false;
PC_ = core_->dcrs().base_dcrs.read(DCR_STARTUP_ADDR);
PC_ = core_->dcrs().base_dcrs.read(DCR_BASE_STARTUP_ADDR);
tmask_.reset();
issued_instrs_ = 0;
for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i) {