Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics

This commit is contained in:
Blaise Tine 2022-03-12 16:26:36 -05:00
commit a306f89334
34 changed files with 1888 additions and 105 deletions

View file

@ -11,10 +11,12 @@ module VX_bank #(
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of bankS
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Number of associative ways
parameter NUM_WAYS = 8,
// Size of a word in bytes
parameter WORD_SIZE = 1,
@ -36,9 +38,6 @@ module VX_bank #(
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
//Swetha: added ways
parameter WAYS = 8, //dummy value - change this to 1 later
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
@ -234,11 +233,26 @@ module VX_bank #(
wire tag_match_st0;
//Swetha: added for associativity
wire [WAYS-1:0] tag_match_way_st0;
wire [WAYS-1:0] tag_match_way_st1;
// localparam way_width = $clog2(WAYS);
// wire[way_width-1:0] tag_match_way_num_st0;
wire [NUM_WAYS-1:0] tag_match_way_st0;
wire [NUM_WAYS-1:0] tag_match_way_st1;
//Swetha: added for eviction
wire[NUM_WAYS-1:0] repl_way_st0;
wire[NUM_WAYS-1:0] repl_way_st1;
//Swetha: using left shift register to determine replacement way
genvar i;
for (i = 0; i < NUM_WAYS; i = i+1) begin
assign repl_way_st0[i] = 1;
end
// if (is_flush_st0) begin
// //assign 1 as the initial value
// assign repl_way_st0[0] = 1'b1;
// for (i = 1; i < NUM_WAYS; i = i+1) begin
// assign repl_way_st0[i] = 0;
// end
// end else begin
// assign repl_way_st0 = {repl_way_st0[NUM_WAYS-2:0],repl_way_st0[NUM_WAYS-1]};
// end
VX_tag_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
@ -247,7 +261,7 @@ module VX_bank #(
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET),
.WAYS(WAYS)
.NUM_WAYS(NUM_WAYS)
) tag_access (
.clk (clk),
.reset (reset),
@ -263,8 +277,8 @@ module VX_bank #(
.flush (do_flush_st0),
//Swetha: added for associativity
.tag_match_way (tag_match_way_st0),
//.tag_match_way_num(tag_match_way_num_st0),
//Swetha: added for eviction
.repl_way (repl_way_st0),
.tag_match (tag_match_st0)
);
@ -274,14 +288,14 @@ module VX_bank #(
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_a_st0 = (is_read_st0 || is_write_st0) ? mshr_alloc_id : mshr_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1 + WAYS),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1 + NUM_WAYS + NUM_WAYS),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!crsq_stall),
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, miss_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_a_st0, mshr_pending_st0, tag_match_way_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, miss_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1, tag_match_way_st1})
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, miss_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_a_st0, mshr_pending_st0, tag_match_way_st0, repl_way_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, miss_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1, tag_match_way_st1, repl_way_st1})
);
assign req_id_st1 = tag_st1[0][`CACHE_REQ_ID_RNG];
@ -304,7 +318,7 @@ module VX_bank #(
.NUM_PORTS (NUM_PORTS),
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WAYS(WAYS)
.NUM_WAYS(NUM_WAYS)
) data_access (
.clk (clk),
.reset (reset),
@ -314,7 +328,8 @@ module VX_bank #(
.stall (crsq_stall),
//Swetha: added for associativity
.tag_match_way (tag_match_way_st1),
//.tag_match_way_num(tag_match_way_num_st0),
//Swetha: added for eviction
.repl_way (repl_way_st1),
.read (do_read_st1 || do_mshr_st1),
.fill (do_fill_st1),

View file

@ -14,6 +14,8 @@ module VX_cache #(
parameter NUM_BANKS = NUM_REQS,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Number of associative ways
parameter NUM_WAYS = 8,
// Size of a word in bytes
parameter WORD_SIZE = 4,
@ -43,9 +45,6 @@ module VX_cache #(
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
//Swetha: added ways
parameter WAYS = 8, //dummy value - change this to 1 later
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
@ -406,7 +405,7 @@ module VX_cache #(
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
//Swetha: added ways here
.WAYS(WAYS),
.NUM_WAYS(NUM_WAYS),
.NUM_BANKS (NUM_BANKS)
) flush_ctrl (
.clk (clk),
@ -462,7 +461,7 @@ module VX_cache #(
.NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_X_WIDTH),
//Swetha: added ways here
.WAYS(WAYS),
.NUM_WAYS(NUM_WAYS),
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET)
) core_req_bank_sel (
.clk (clk),
@ -588,7 +587,7 @@ module VX_cache #(
.WRITE_ENABLE (WRITE_ENABLE),
.CORE_TAG_WIDTH (CORE_TAG_X_WIDTH),
//Swetha: added ways here
.WAYS(WAYS),
.NUM_WAYS(NUM_WAYS),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
) bank (
`SCOPE_BIND_VX_cache_bank(i)

View file

@ -20,11 +20,10 @@
`define BANK_SIZE (CACHE_SIZE / NUM_BANKS)
//Swetha: added ways
`define WAYS 8 //dummy value
`define WAY_SEL_WIDTH `CLOG2(WAYS)
//`define LINES_PER_BANK (`BANK_SIZE / CACHE_LINE_SIZE)
`define NUM_WAYS 8 //dummy value
`define WAY_SEL_WIDTH `CLOG2(NUM_WAYS)
//Swetha: modified LINES_PER_BANK definition
`define LINES_PER_BANK (`BANK_SIZE / (CACHE_LINE_SIZE*WAYS))
`define LINES_PER_BANK (`BANK_SIZE / (CACHE_LINE_SIZE*NUM_WAYS))
`define WORDS_PER_LINE (CACHE_LINE_SIZE / WORD_SIZE)
`define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE))

View file

@ -13,10 +13,11 @@ module VX_core_req_bank_sel #(
parameter NUM_PORTS = 1,
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Number of associative ways
parameter NUM_WAYS = 8,
// core request tag size
parameter CORE_TAG_WIDTH = 3,
//Swetha: added ways
parameter WAYS = 1, //dummy value - change this to 1 later
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (

View file

@ -11,14 +11,13 @@ module VX_data_access #(
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Number of associative ways
parameter NUM_WAYS = 8,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
//Swetha: added ways
parameter WAYS = 8, //dummy value - change this to 1 later
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
input wire clk,
@ -40,8 +39,10 @@ module VX_data_access #(
input wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] fill_data,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] write_data,
//Swetha: added for associativity
input wire[WAYS-1:0] tag_match_way,
input wire[NUM_WAYS-1:0] tag_match_way,
//input wire[$clog2(WAYS)-1:0] tag_match_way_num,
//Swetha: added for eviction
input wire[NUM_WAYS-1:0] repl_way,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] read_data
);
@ -107,13 +108,13 @@ module VX_data_access #(
/* CHANGES START HERE */
//Swetha: Local variable to capture data from all ways before assigning to output wire
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] read_data_local [WAYS-1:0];
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] read_data_local [NUM_WAYS-1:0];
localparam [`WAY_SEL_WIDTH-1:0] which_way = 0; //dummy assignment
generate
genvar m;
for (m = 0; m < WAYS; m = m+1) begin
assign which_way = tag_match_way[m] ? m : 'z;
for (m = 0; m < NUM_WAYS; m = m+1) begin
assign which_way = ((fill & repl_way[m]) || (!fill & tag_match_way[m])) ? m : 'z;
end
endgenerate
@ -157,7 +158,7 @@ module VX_data_access #(
localparam temp = which_way + 1;
generate
genvar k;
for (k = temp; k < WAYS; k = k+1) begin
for (k = temp; k < NUM_WAYS; k = k+1) begin
//assign which_way = tag_match_way[j] ? j : 'z;
//assign wren = (tag_match_way[j] == 1'b0) ? {BYTEENW{1'b0}} : wren;
VX_sp_ram #(
@ -187,7 +188,7 @@ localparam temp = which_way + 1;
//localparam [`WAY_SEL_WIDTH-1:0] which_way = 0;
//output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] read_data
if (WAYS > 1) begin
if (NUM_WAYS > 1) begin
assign rdata = read_data_local[which_way];
end else begin
//`UNUSED_VAR (sel_in)

View file

@ -5,10 +5,10 @@ module VX_flush_ctrl #(
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
//Swetha: added ways
parameter WAYS = 1, //dummy value - change this to 1 later
// Number of banks
parameter NUM_BANKS = 1
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 8
) (
input wire clk,
input wire reset,

View file

@ -6,7 +6,9 @@ module VX_shared_mem #(
// Size of cache in bytes
parameter CACHE_SIZE = (1024*16),
// Number of banks
parameter NUM_BANKS = 2,
parameter NUM_BANKS = 2,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle
@ -21,10 +23,7 @@ module VX_shared_mem #(
parameter CORE_TAG_ID_BITS = 8,
// core request tag size
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
//Swetha: added ways
parameter WAYS = 1, //dummy value - change this to 1 later
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = `CLOG2(256)

View file

@ -9,10 +9,11 @@ module VX_tag_access #(
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 8,
// Size of a word in bytes
parameter WORD_SIZE = 1,
//Swetha: added ways
parameter WAYS = 8, //dummy value - change this to 1 later
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (
@ -30,8 +31,10 @@ module VX_tag_access #(
input wire[`LINE_ADDR_WIDTH-1:0] addr,
input wire fill,
input wire flush,
//Swetha: added for eviction
input wire[NUM_WAYS-1:0] repl_way,
//Swetha: added for associativity
output wire[WAYS-1:0] tag_match_way,
output wire[NUM_WAYS-1:0] tag_match_way,
//output wire[$clog2(WAYS)-1:0] tag_match_way_num,
output wire tag_match
);
@ -51,9 +54,16 @@ module VX_tag_access #(
/* CHANGES START HERE */
//We use a tag match array to check if each of the arrays has a match
//assign the output wire to the ANDed result of tag_match_array
//Swetha: added for eviction
wire fill_local[NUM_WAYS-1:0];
for (i = 0; i < NUM_WAYS; i = i+1) begin
assign fill_local[i] = (fill && repl_way[i]) ? 1 : 0;
end
generate
genvar i;
for (i = 0; i < WAYS; i = i+1) begin
for (i = 0; i < NUM_WAYS; i = i+1) begin
VX_sp_ram #(
.DATAW (`TAG_SELECT_BITS + 1),
.SIZE (`LINES_PER_BANK),
@ -61,8 +71,8 @@ module VX_tag_access #(
) tag_store (
.clk( clk),
.addr (line_addr),
.wren (fill || flush),
.wdata ({!flush, line_tag}),
.wren (fill_local[i] || flush),
.wdata ({!flush, line_tag}), //Swetha: modified this line for eviction
.rdata ({read_valid, read_tag})
);
assign tag_match_way[i] = read_valid && (line_tag == read_tag);

View file

@ -0,0 +1,150 @@
// Block level evaluator
// Functionality: Receives a block of AxA (where A is pow(2))
// 1. Breaks it into quad and runs quad evaluators on it
// 2. Stores the result in quad queues
// 3. Queues direction read as outputs
`include "VX_raster_define.vh"
module VX_raster_be #(
parameter RASTER_BLOCK_SIZE = 4,
parameter RASTER_QUAD_OUTPUT_RATE = 2,
parameter RASTER_QUAD_FIFO_DEPTH = 16
) (
// Standard inputs
input logic clk, reset,
input logic input_valid, // to indicate current input is a valid update
input logic pop, // to fetch data from the quad queue
output logic empty, // to indicate no data left in data queue
output logic ready, // to indicate it has sent all previous quad data
// Block related input data
input logic [`RASTER_TILE_DATA_BITS-1:0] x_loc, y_loc,
// edge equation data for the 3 edges and ax+by+c
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] edges[2:0][2:0],
// edge function computation value propagated
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] edge_func_val[2:0],
// Quad related output data
output logic [`RASTER_TILE_DATA_BITS-1:0] out_quad_x_loc[RASTER_QUAD_OUTPUT_RATE-1:0],
output logic [`RASTER_TILE_DATA_BITS-1:0] out_quad_y_loc[RASTER_QUAD_OUTPUT_RATE-1:0],
output logic [3:0] out_quad_masks[RASTER_QUAD_OUTPUT_RATE-1:0],
output logic valid[RASTER_QUAD_OUTPUT_RATE-1:0]
);
// Local parameter setup
localparam RASTER_QUAD_NUM = RASTER_BLOCK_SIZE/2;
localparam RASTER_QUAD_SPACE = RASTER_QUAD_NUM*RASTER_QUAD_NUM;
localparam RASTER_QUAD_ARBITER_RANGE = RASTER_QUAD_SPACE/RASTER_QUAD_OUTPUT_RATE;
localparam ARBITER_BITS = $clog2(RASTER_QUAD_ARBITER_RANGE) + 1;
// Temporary (temp_) for combinatorial part, quad_ register for data storage
logic [`RASTER_TILE_DATA_BITS-1:0] temp_quad_x_loc[RASTER_QUAD_SPACE-1:0],
quad_x_loc[RASTER_QUAD_SPACE-1:0];
logic [`RASTER_TILE_DATA_BITS-1:0] temp_quad_y_loc[RASTER_QUAD_SPACE-1:0],
quad_y_loc[RASTER_QUAD_SPACE-1:0];
logic [3:0] temp_quad_masks[RASTER_QUAD_SPACE-1:0],
quad_masks[RASTER_QUAD_SPACE-1:0];
// Wire to hold the edge function values for quad evaluation
logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] local_edge_func_val[RASTER_QUAD_SPACE-1:0][2:0];
// Status signal to log if it is working on valid data
logic valid_data;
// Fifo and arbiter signals
logic full, push;
logic [ARBITER_BITS-1:0] arbiter_index;
// Per fifo signals
logic [RASTER_QUAD_OUTPUT_RATE-1:0] full_flag, empty_flag;
// Generate the RASTER_QUAD_NUM x RASTER_QUAD_NUM quad evaluators
for (genvar i = 0; i < RASTER_QUAD_NUM; ++i) begin
for (genvar j = 0; j < RASTER_QUAD_NUM; ++j) begin
always_comb begin
temp_quad_x_loc[i*RASTER_QUAD_NUM+j] = x_loc + i*2;
temp_quad_y_loc[i*RASTER_QUAD_NUM+j] = y_loc + j*2;
for (integer k = 0; k < 3; ++k)
local_edge_func_val[i*RASTER_QUAD_NUM+j][k] = edge_func_val[k] + i*2*edges[k][0] + j*2*edges[k][1];
end
VX_raster_qe qe (
.edges(edges),
.edge_func_val(local_edge_func_val[i*RASTER_QUAD_NUM+j]),
.masks(temp_quad_masks[i*RASTER_QUAD_NUM+j])
);
end
end
// Store the temp results in registers
for(genvar i = 0; i < RASTER_QUAD_SPACE; ++i) begin
// Save the temp data into quad registers to prevent overwrite by redundant data
always @(posedge clk) begin
if (input_valid == 1) begin // overwrite only the first time
quad_x_loc[i] <= temp_quad_x_loc[i];
quad_y_loc[i] <= temp_quad_y_loc[i];
quad_masks[i] <= temp_quad_masks[i];
end
end
end
// Simple arbiter implementation
always @(posedge clk) begin
// Reset condition
if (reset == 1) begin
arbiter_index <= RASTER_QUAD_ARBITER_RANGE[ARBITER_BITS-1:0] - 1;
valid_data <= 0;
end
// Initialization condition
else if (input_valid == 1) begin
arbiter_index <= 0;
valid_data <= 1;
end
// Arbitration condition
else if (full == 0 && push == 1)
arbiter_index <= arbiter_index + 1;
else if (ready)
valid_data <= 0;
end
assign push = (arbiter_index < (RASTER_QUAD_ARBITER_RANGE[ARBITER_BITS-1:0])) && !full;
assign ready = (arbiter_index >= (RASTER_QUAD_ARBITER_RANGE[ARBITER_BITS-1:0]-1)) && !full;
localparam FIFO_DATA_WIDTH = 2*`RASTER_TILE_DATA_BITS + 4 + 1;
// Generate the required number of FIFOs
for (genvar i = 0; i < RASTER_QUAD_OUTPUT_RATE; ++i) begin
// Quad queue
logic [FIFO_DATA_WIDTH-1:0] fifo_push_data, fifo_pop_data;
assign fifo_push_data = (arbiter_index*RASTER_QUAD_OUTPUT_RATE + i) < RASTER_QUAD_SPACE ?
{
quad_x_loc[arbiter_index*RASTER_QUAD_OUTPUT_RATE + i],
quad_y_loc[arbiter_index*RASTER_QUAD_OUTPUT_RATE + i],
quad_masks[arbiter_index*RASTER_QUAD_OUTPUT_RATE + i],
(1'b1 && valid_data)
} : {FIFO_DATA_WIDTH{1'bz}};
logic fifo_valid;
assign {out_quad_x_loc[i], out_quad_y_loc[i], out_quad_masks[i], fifo_valid} = fifo_pop_data;
assign valid[i] = fifo_valid && !empty_flag[i];
VX_fifo_queue #(
.DATAW (FIFO_DATA_WIDTH),
.SIZE (RASTER_QUAD_FIFO_DEPTH),
.OUT_REG (1)
) tile_fifo_queue (
.clk (clk),
.reset (reset),
.push (push),
.pop (pop),
.data_in (fifo_push_data),
.data_out (fifo_pop_data),
.full (full_flag[i]),
.empty (empty_flag[i]),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
end
assign full = &(full_flag);
assign empty = &(empty_flag);
endmodule

View file

@ -1,52 +1,69 @@
`include "VX_raster_define.vh"
module VX_raster_dcr (
module VX_raster_dcr #(
parameter CORE_ID = 0
// TODO
) (
input wire clk,
input wire reset,
// Inputs
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
// Inputs
VX_raster_dcr_if.slave raster_dcr_if,
// TODO: Not used
//VX_raster_req_if.slave raster_req_if,
// Output
VX_raster_dcr_if.master raster_dcr_if
output raster_dcrs_t raster_dcrs
);
raster_dcrs_t dcrs;
// DCRs write
// DCR registers
raster_dcrs_t reg_dcrs;
// DCR read
always @(posedge clk) begin
if (reset) begin
dcrs <= 0;
end else if (dcr_wr_valid) begin
case (dcr_wr_addr)
`DCR_RASTER_TBUF_ADDR: begin
dcrs.tbuf_addr <= dcr_wr_data[31:0];
reg_dcrs.pidx_addr <= 0;
reg_dcrs.pidx_size <= 0;
reg_dcrs.pbuf_stride <= 0;
reg_dcrs.tile_left <= 0;
reg_dcrs.tile_top <= 0;
reg_dcrs.tile_width <= 0;
reg_dcrs.tile_height <= 0;
end else if (raster_dcr_if.write_enable) begin
case (raster_dcr_if.write_addr)
`DCR_RASTER_PIDX_ADDR: begin
reg_dcrs.pidx_addr <= raster_dcr_if.write_data[`RASTER_DCR_DATA_BITS-1:0];
end
`DCR_RASTER_TILE_COUNT: begin
dcrs.tile_count <= dcr_wr_data[31:0];
`DCR_RASTER_PIDX_SIZE: begin
reg_dcrs.pidx_size <= raster_dcr_if.write_data[`RASTER_DCR_DATA_BITS-1:0];
end
`DCR_RASTER_PBUF_ADDR: begin
dcrs.pbuf_addr <= dcr_wr_data[31:0];
reg_dcrs.pbuf_addr <= raster_dcr_if.write_data[`RASTER_DCR_DATA_BITS-1:0];
end
`DCR_RASTER_PBUF_STRIDE: begin
dcrs.pbuf_stride <= dcr_wr_data[31:0];
reg_dcrs.pbuf_stride <= raster_dcr_if.write_data[`RASTER_DCR_DATA_BITS:0];
end
`DCR_RASTER_TILE_XY: begin
reg_dcrs.tile_left <= raster_dcr_if.write_data[`RASTER_TILE_DATA_BITS-1:0];
reg_dcrs.tile_top <= raster_dcr_if.write_data[`RASTER_DCR_DATA_BITS-1:`RASTER_TILE_DATA_BITS];
end
`DCR_RASTER_TILE_WH: begin
reg_dcrs.tile_width <= raster_dcr_if.write_data[`RASTER_DCR_DATA_BITS-1:0];
reg_dcrs.tile_height <= raster_dcr_if.write_data[`RASTER_DCR_DATA_BITS-1:RASTER_DCR_DATA_BITS];
end
endcase
end
end
// DCRs read
assign raster_dcr_if.data = dcrs;
// Data write to output
assign raster_dcrs = reg_dcrs;
`ifdef DBG_TRACE_RASTER
always @(posedge clk) begin
if (dcr_wr_valid) begin
dpi_trace("%d: raster-dcr: state=", $time);
trace_raster_state(dcr_wr_addr);
dpi_trace(", data=0x%0h\n", dcr_wr_data);
if (raster_dcr_if.write_enable) begin
dpi_trace("%d: core%0d-raster-dcr: state=", $time, CORE_ID);
trace_raster_state(raster_dcr_if.write_addr);
dpi_trace(", data=0x%0h (#%0d)\n", raster_dcr_if.write_data, raster_dcr_if.write_uuid);
end
end
`endif

View file

@ -3,6 +3,11 @@
`include "VX_define.vh"
`define RASTER_ADDR_BITS 32
`define RASTER_DCR_DATA_BITS 32
`define RASTER_TILE_DATA_BITS 16
`define RASTER_PRIMITIVE_DATA_BITS 32
task trace_raster_state (
input [`DCR_ADDR_BITS-1:0] state
);

View file

@ -1,6 +1,11 @@
`include "VX_raster_define.vh"
module VX_raster_fetch #(
// Module for primitive fetch
// Descrption: Performs strided fetch
// of primitive data from the buffer
module VX_raster_fetch #(
parameter CORE_ID = 0
// TODO
) (
input wire clk,

View file

@ -1,13 +1,35 @@
// Quad evaluator block
// Functionality: Receives a 2x2 quad with primitive information
// check whether quad pixels are within the primitive
`include "VX_raster_define.vh"
module VX_raster_qe #(
// TODO
) (
input wire clk,
input wire reset
// TODO
module VX_raster_qe (
// Primitive related data
// edge equation data for the 3 edges and ax+by+c
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] edges[2:0][2:0],
// edge function computation value propagated
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] edge_func_val[2:0],
// Mask bits for the 2x2 quad
output logic [3:0] masks
);
// TODO
// New edge value for all 4 pixels (0,0) (0,1) (1,0) (1,1)
logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] new_edge_val [2:0][1:0][1:0];
for (genvar i = 0; i < 2; ++i) begin
for (genvar j = 0; j < 2; ++j) begin
always_comb begin
integer k;
for (k = 0; k < 3; ++k)
new_edge_val[k][i][j] = edge_func_val[k] + i*edges[k][0] + j*edges[k][1];
masks[i*2 + j] = 0;
if (new_edge_val[0][i][j] >= 0 && new_edge_val[1][i][j] >= 0 && new_edge_val[2][i][j] >= 0) begin
masks[i*2 + j] = 1;
end
end
end
end
endmodule

View file

@ -1,6 +1,7 @@
`include "VX_raster_define.vh"
module VX_raster_req_switch #(
parameter CORE_ID = 0
// TODO
) (
input wire clk,

View file

@ -1,6 +1,7 @@
`include "VX_raster_define.vh"
module VX_raster_rsp_switch #(
module VX_raster_rsp_switch #(
parameter CORE_ID = 0
// TODO
) (
input wire clk,

View file

@ -1,6 +1,10 @@
`include "VX_raster_define.vh"
module VX_raster_setup #(
// Module for triangle setup
// Description: Perform edge equation computation
module VX_raster_setup #(
parameter CORE_ID = 0
// TODO
) (
input wire clk,

View file

@ -1,13 +1,268 @@
// Raster slice
// Functionality:
// 1. Recursive descent
// 2. Tile evaluation
// 3. Quad evaluation and storage
// 4. Return the quad(s)
`include "VX_raster_define.vh"
module VX_raster_slice #(
// TODO
module VX_raster_slice #(
parameter RASTER_BLOCK_SIZE = 8,
parameter RASTER_TILE_SIZE = 16,
parameter RASTER_QUAD_OUTPUT_RATE = 4,
parameter RASTER_QUAD_FIFO_DEPTH = 64,
parameter RASTER_TILE_FIFO_DEPTH = 16
) (
input wire clk,
input wire reset
// TODO
// Standard inputs
input logic clk, reset,
// To indicate valid input provided
input logic input_valid,
// Tile information
input logic [`RASTER_TILE_DATA_BITS-1:0] x_loc, y_loc,
// Primitive information
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] edges[2:0][2:0],
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] edge_func_val[2:0],
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] extents[2:0],
// Hand-shaking signals
input logic pop_quad,
output logic ready, quad_queue_empty,
// Output sub-tiles data
output logic [`RASTER_TILE_DATA_BITS-1:0] out_quad_x_loc[RASTER_QUAD_OUTPUT_RATE-1:0],
out_quad_y_loc[RASTER_QUAD_OUTPUT_RATE-1:0],
output logic [3:0] out_quad_masks[RASTER_QUAD_OUTPUT_RATE-1:0],
output logic valid[RASTER_QUAD_OUTPUT_RATE-1:0]
);
// TODO
localparam RASTER_LEVEL_DATA_BITS = $clog2(RASTER_TILE_SIZE/RASTER_BLOCK_SIZE) + 1;
localparam RASTER_FIFO_DATA_WIDTH = (RASTER_LEVEL_DATA_BITS + 2*`RASTER_TILE_DATA_BITS + 3*`RASTER_PRIMITIVE_DATA_BITS);
// Store data which will stay same for tile throughout operation
logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] global_extents[2:0];
logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] global_edges[2:0][2:0];
// Store the tile relevant data as global regs as TE is combinatorial
logic [`RASTER_TILE_DATA_BITS-1:0] tile_x_loc, tile_y_loc;
logic [RASTER_LEVEL_DATA_BITS-1:0] level;
logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] tile_edge_func_val[2:0];
logic [RASTER_LEVEL_DATA_BITS-1:0] level_1;
assign level_1 = level + 1;
// Control signsl
logic valid_tile, valid_block;
logic fifo_full, fifo_empty, fifo_tile_valid;
logic stall;
logic be_ready; // to track the status of the block evaluator
logic done;
// Stall used to wait for block queue to complete run if another needs to be inserted
assign stall = (valid_block == 1 && block_fifo_full == 1);
// Incoming tile data from fifo
logic [`RASTER_TILE_DATA_BITS-1:0] fifo_tile_x_loc, fifo_tile_y_loc;
logic [RASTER_LEVEL_DATA_BITS-1:0] fifo_tile_level;
logic [`RASTER_PRIMITIVE_DATA_BITS-1:0] fifo_tile_edge_func_val[2:0];
// Tile data selector to choose tile data from:
// 1. Input tile
// 2. Sub-tile for queue
// 3. Data forwarded sub-tile for first iteration when
// input tile divided into 4, but sub-tile fifo will
// be empty. So, instead of wasting 1 cycle, sub=tile
// forwarded directly here.
always @(posedge clk) begin
if (reset) begin
done <= 0;
// Reset all globals and signals
for (integer i = 0; i < 3; ++i) begin
global_extents[i] <= 0;
tile_edge_func_val[i] <= 0;
for (integer j = 0; j < 3; ++j) begin
global_edges[i][j] <= 0;
end
end
end
else if (stall == 0) begin
// if block ready and input valid read data from input
if (ready == 1 && input_valid == 1) begin
tile_x_loc <= x_loc;
tile_y_loc <= y_loc;
level <= 0;
tile_edge_func_val <= edge_func_val;
// Update globals
global_extents <= extents;
global_edges <= edges;
end
// sub-tile rerouter used only 1 onces for the parent tile
else if (level == 0 && fifo_empty == 1 && fifo_tile_valid == 0) begin
tile_x_loc <= subtile_x_loc[0];
tile_y_loc <= subtile_y_loc[0];
level <= level_1;
tile_edge_func_val <= subtile_edge_func_val[0];
end
// else ready from the fifo is it is not empty and fifo tile is valid
else if (fifo_empty == 0 && fifo_tile_valid == 1) begin
tile_x_loc <= fifo_tile_x_loc;
tile_y_loc <= fifo_tile_y_loc;
level <= fifo_tile_level;
tile_edge_func_val <= fifo_tile_edge_func_val;
end
end
end
// Decide the ready flag
// 1. Tile evaluator doesn't have a valid tile or (block -> block will be pushed to next pipe so no need to stall for it)
// 2. FIFO empty
// 3. FIFO pop data is invalid
assign ready = (fifo_empty == 1) && (block_fifo_empty == 1) && (valid_tile == 0);
// Sub-tile data output from tile-evaluator
logic [`RASTER_TILE_DATA_BITS-1:0] subtile_x_loc[3:0], subtile_y_loc[3:0];
logic [`RASTER_PRIMITIVE_DATA_BITS-1:0] subtile_edge_func_val[3:0][2:0];
/**********************************
TILE EVALUATOR
***********************************/
VX_raster_te #(
.RASTER_TILE_SIZE(RASTER_TILE_SIZE),
.RASTER_BLOCK_SIZE(RASTER_BLOCK_SIZE),
.RASTER_LEVEL_DATA_BITS(RASTER_LEVEL_DATA_BITS)
) tile_evaluator (
.level(level),
.x_loc(tile_x_loc),
.y_loc(tile_y_loc),
.edges(global_edges),
.edge_func_val(tile_edge_func_val),
.extents(global_extents),
.valid_tile(valid_tile),
.valid_block(valid_block),
.tile_x_loc(subtile_x_loc),
.tile_y_loc(subtile_y_loc),
.tile_edge_func_val(subtile_edge_func_val)
);
/**********************************
TILE ARBITER
***********************************/
// Create mask for sub-tile push into fifo
logic [3:0] fifo_push_mask;
for (genvar i = 0; i < 4; ++i) begin
assign fifo_push_mask[i] = ~(i == 0 && level == 0);
end
// Create data_push data
logic [RASTER_FIFO_DATA_WIDTH-1:0] fifo_data_push[3:0];
for (genvar i = 0; i < 4; ++i) begin
assign fifo_data_push[i] = {level_1, subtile_x_loc[i], subtile_y_loc[i],
subtile_edge_func_val[i][0], subtile_edge_func_val[i][1],
subtile_edge_func_val[i][2]};
end
logic [RASTER_FIFO_DATA_WIDTH-1:0] fifo_data_pop;
// Create sub-tile data from the fifo output
assign {fifo_tile_level, fifo_tile_x_loc, fifo_tile_y_loc, fifo_tile_edge_func_val[0],
fifo_tile_edge_func_val[1], fifo_tile_edge_func_val[2]} = fifo_data_pop;
// Assert that fifo cannot be full when tile is valid
always_comb
`ASSERT(!(fifo_full == 1 && valid_tile == 1), ("Raster insufficient subtile fifo depth"));
// NOTE: condition not added in fifo_push check as it wil lead to deadlock => Assertion added
// Set the pop logic based on stall if not stalled & not empty, then it will definitely pop
logic [3:0] fifo_pop, fifo_index_onehot;
for (genvar i = 0; i < 4; ++i) begin
// Updated based on conditions and from the onehot received from arbiter
assign fifo_pop[i] = (stall == 0) && (fifo_empty == 0) && (fifo_index_onehot[i] == 1);
end
VX_raster_te_arbiter #(
.RASTER_TILE_SIZE(RASTER_TILE_SIZE),
.RASTER_BLOCK_SIZE(RASTER_BLOCK_SIZE)
) tile_arbiter (
.clk(clk),
.reset(reset),
.fifo_push({4{valid_tile}} & fifo_push_mask), // Push only tiles, not blocks
.fifo_pop(fifo_pop),
.data_push(fifo_data_push),
.data_pop(fifo_data_pop),
.fifo_full(fifo_full),
.fifo_empty(fifo_empty),
.fifo_data_valid(fifo_tile_valid),
.fifo_index_onehot(fifo_index_onehot)
);
/**********************************
BLOCK EVALUATOR
***********************************/
logic block_fifo_full, block_fifo_empty;
// Block evaluator data
logic [`RASTER_TILE_DATA_BITS-1:0] be_in_x_loc, be_in_y_loc;
logic [`RASTER_PRIMITIVE_DATA_BITS-1:0] be_in_edge_func_val[2:0];
localparam BLOCK_FIFO_DATA_WIDTH = 2*`RASTER_TILE_DATA_BITS + 3*`RASTER_PRIMITIVE_DATA_BITS;
logic be_fifo_pop;
assign be_fifo_pop = (be_ready == 1 && block_fifo_empty == 0);
// Stop pushing the last tile back in
logic last_block;
always @(posedge clk) begin
// check if the current block is going to be the last block
if (fifo_empty == 1 && valid_tile == 0 && valid_block == 1)
done <= 1;
end
// Block fifo
VX_fifo_queue #(
.DATAW (BLOCK_FIFO_DATA_WIDTH),
.SIZE (RASTER_TILE_FIFO_DEPTH),
.OUT_REG (1)
) block_fifo_queue (
.clk (clk),
.reset (reset),
.push (valid_block == 1 && block_fifo_full == 0 && done == 0),
.pop (be_fifo_pop),
.data_in ({
tile_x_loc, tile_y_loc,
tile_edge_func_val[0], tile_edge_func_val[1], tile_edge_func_val[2]
}),
.data_out ({
be_in_x_loc, be_in_y_loc,
be_in_edge_func_val[0], be_in_edge_func_val[1], be_in_edge_func_val[2]
}),
.full (block_fifo_full),
.empty (block_fifo_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
VX_raster_be #(
.RASTER_BLOCK_SIZE(RASTER_BLOCK_SIZE),
.RASTER_QUAD_OUTPUT_RATE(RASTER_QUAD_OUTPUT_RATE),
.RASTER_QUAD_FIFO_DEPTH(RASTER_QUAD_FIFO_DEPTH)
) block_evaluator (
.clk(clk),
.reset(reset),
.input_valid(be_fifo_pop),
.x_loc(be_in_x_loc),
.y_loc(be_in_y_loc),
.edges(global_edges),
.edge_func_val(be_in_edge_func_val),
.out_quad_x_loc(out_quad_x_loc),
.out_quad_y_loc(out_quad_y_loc),
.out_quad_masks(out_quad_masks),
.valid(valid),
.ready(be_ready),
.pop(pop_quad),
.empty(quad_queue_empty)
);
endmodule

View file

@ -0,0 +1,77 @@
// Tile evaluator block
// Functionality: Evaluates the input tile to check:
// 1. If it is valid => overlaps triangle
// 2. If it is a block
// 3. Else divides it into 4
`include "VX_raster_define.vh"
module VX_raster_te #(
parameter RASTER_TILE_SIZE = 16,
parameter RASTER_BLOCK_SIZE = 4,
parameter RASTER_LEVEL_DATA_BITS = ($clog2(RASTER_TILE_SIZE/RASTER_BLOCK_SIZE) + 1)
) (
// Level value in recursive descent
input logic [RASTER_LEVEL_DATA_BITS-1:0] level,
// Tile data
input logic [`RASTER_TILE_DATA_BITS-1:0] x_loc, y_loc,
// Primitive data
// edge equation data for the 3 edges and ax+by+c
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] edges[2:0][2:0],
// edge function computation value propagated
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] edge_func_val[2:0],
input logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] extents[2:0],
// Status signals
output logic valid_tile, valid_block,
// Sub-tile related data
output logic [`RASTER_TILE_DATA_BITS-1:0] tile_x_loc[3:0],
tile_y_loc[3:0],
output logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] tile_edge_func_val[3:0][2:0]
);
localparam RASTER_TILE_FIFO_DEPTH = RASTER_TILE_SIZE/RASTER_BLOCK_SIZE;
localparam RASTER_TILE_SIZE_BITS = $clog2(RASTER_TILE_SIZE);
localparam RASTER_BLOCK_SIZE_BITS = $clog2(RASTER_BLOCK_SIZE);
// Check if primitive within tile
logic signed [`RASTER_PRIMITIVE_DATA_BITS-1:0] eval0, eval1, eval2;
assign eval0 = (edge_func_val[0] + extents[0]) >> level;
assign eval1 = (edge_func_val[1] + extents[1]) >> level;
assign eval2 = (edge_func_val[2] + extents[2]) >> level;
// Sub-tile specs info
logic [`RASTER_TILE_DATA_BITS-1:0] sub_tile_size;
logic [`RASTER_TILE_DATA_BITS-1:0] sub_tile_bits;
always_comb begin
// Check if tile has triangle
valid_tile = !((eval0 < 0) || (eval1 < 0) || (eval2 < 9));
// If tile valid => sub-divide into sub-tiles
if (valid_tile) begin
sub_tile_bits = `RASTER_TILE_DATA_BITS'(RASTER_TILE_SIZE_BITS) - `RASTER_TILE_DATA_BITS'(level) - 1;
sub_tile_size = 1 << sub_tile_bits;
if (sub_tile_bits >= `RASTER_TILE_DATA_BITS'(RASTER_BLOCK_SIZE_BITS)) begin
// divide into sub tiles as still bigger than block
valid_block = 0;
// generate sub-tile data
for (integer i = 0; i < 2; ++i) begin
for (integer j = 0; j < 2; ++j) begin
tile_x_loc[i*2+j] = x_loc + `RASTER_TILE_DATA_BITS'(i)*sub_tile_size;
tile_y_loc[i*2+j] = y_loc + `RASTER_TILE_DATA_BITS'(j)*sub_tile_size;
for (integer k = 0; k < 3; ++k) begin
tile_edge_func_val[i*2+j][k] = edge_func_val[k]
+ i*(edges[k][0] << sub_tile_bits)
+ j*(edges[k][1] << sub_tile_bits);
end
end
end
end
else begin
// run block evaluator on valid block
valid_block = 1;
// Deassert valid_tile so that it tells whether it generated a block or tile or neither
valid_tile = 0;
end
end
end
endmodule

View file

@ -0,0 +1,71 @@
// Rasterization tile arbiter
// Functionality: Stores 4 sub-tiles and returns 1 based on arbitration
`include "VX_raster_define.vh"
module VX_raster_te_arbiter #(
parameter RASTER_TILE_SIZE = 16,
parameter RASTER_BLOCK_SIZE = 4,
parameter RASTER_LEVEL_DATA_BITS = $clog2(RASTER_TILE_SIZE/RASTER_BLOCK_SIZE) + 1,
parameter RASTER_FIFO_DATA_WIDTH = (RASTER_LEVEL_DATA_BITS + 2*`RASTER_TILE_DATA_BITS + 3*`RASTER_PRIMITIVE_DATA_BITS)
) (
input logic clk, reset,
input logic [3:0] fifo_push, fifo_pop,
input logic [RASTER_FIFO_DATA_WIDTH-1:0] data_push[3:0],
output logic [RASTER_FIFO_DATA_WIDTH-1:0] data_pop,
output logic [3:0] fifo_index_onehot,
output logic fifo_full, fifo_empty, fifo_data_valid
);
localparam RASTER_TILE_FIFO_DEPTH = RASTER_TILE_SIZE/RASTER_BLOCK_SIZE;
// Per FIFO flags
logic [3:0] empty_flag, full_flag;
logic [RASTER_FIFO_DATA_WIDTH-1:0] data_pop_array[3:0];
// Index selected from arbitration
logic [1:0] fifo_index;
// Generate 4 queues for 4 sub-tiles
for(genvar i = 0; i < 4; ++i) begin
// Sub-tile queue
VX_fifo_queue #(
.DATAW (RASTER_FIFO_DATA_WIDTH),
.SIZE (RASTER_TILE_FIFO_DEPTH),
.OUT_REG (1)
) tile_fifo_queue (
.clk (clk),
.reset (reset),
.push (fifo_push[i]),
.pop (fifo_pop[i]),
.data_in (data_push[i]),
.data_out (data_pop_array[i]),
.full (full_flag[i]),
.empty (empty_flag[i]),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
end
assign fifo_empty = &empty_flag;
assign fifo_full = &full_flag;
// Arbitrate over the available entries to pop and generate index to pop for sub=tile
VX_fair_arbiter #(
.NUM_REQS (4),
) tile_fifo_arbiter (
.clk (clk),
.reset (reset),
.enable (!fifo_empty),
.requests (~empty_flag),
.grant_index (fifo_index),
.grant_onehot (fifo_index_onehot),
.grant_valid (fifo_data_valid)
);
assign data_pop = data_pop_array[fifo_index];
endmodule

View file

@ -3,15 +3,17 @@
`include "VX_define.vh"
`define RASTER_DIM_BITS 15
package raster_types;
typedef struct packed {
logic [31:0] tbuf_addr;
logic [31:0] tile_count;
logic [31:0] pbuf_addr;
logic [31:0] pbuf_stride;
logic [`RASTER_DCR_DATA_BITS-1:0] pidx_addr; // Fetch index for pixels
logic [`RASTER_DCR_DATA_BITS-1:0] pidx_size; // Fetch size for pixels
logic [`RASTER_DCR_DATA_BITS-1:0] pbuf_addr; // Primitive (triangle) data buffer start address
logic [`RASTER_DCR_DATA_BITS-1:0] pbuf_stride; // Primitive data stride to fetch vertices
logic [`RASTER_TILE_DATA_BITS-1:0] tile_left; // left location of input tile
logic [`RASTER_TILE_DATA_BITS-1:0] tile_top; // top location of input tile
logic [`RASTER_TILE_DATA_BITS-1:0] tile_width; // width of input tile
logic [`RASTER_TILE_DATA_BITS-1:0] tile_height; // heigth of input tile
} raster_dcrs_t;
endpackage

View file

@ -1,7 +1,15 @@
`include "VX_raster_define.vh"
module VX_raster_unit #(
parameter NUM_SLICES = 1
// Top unit for the raster unit
// Instantiates the following modules:
// 1. DCR connections
// 2. Requests switch
// 3. Raster slices
// 4. Response switch
module VX_raster_unit #(
parameter CORE_ID = 0,
parameter NUM_SLICES = 1 // number of raster slices
// TODO
) (
input wire clk,
@ -23,11 +31,53 @@ module VX_raster_unit #(
// Outputs
VX_raster_rsp_if.master raster_rsp_if
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
// TODO: remove
raster_dcrs_t raster_dcrs = raster_dcr_if.data;
raster_dcrs_t raster_dcrs;
// Raster unit dcr block
VX_raster_dcr #(
.CORE_ID (CORE_ID)
) raster_dcr (
.clk (clk),
.reset (reset),
// inputs
.raster_dcr_if (raster_dcr_if),
// TODO: Remove if not used
//.raster_req_if (raster_req_if),
// outputs
.raster_dcrs (raster_dcrs)
);
// TODO: Add requests switch here
VX_raster_req_switch #(
.CORE_ID (CORE_ID)
) raster_req_switch (
.clk (clk),
.reset (reset)
)
// TODO: Add raster slices in generate block here
for (genvar i = 0; i < NUM_SLICES, ++i) begin
VX_raster_slice #(
.CORE_ID (CORE_ID)
) raster_slice (
.clk (clk),
.reset (reset)
)
end
// TODO: Add response switch here
VX_raster_rsp_switch #(
.CORE_ID (CORE_ID)
) raster_rsp_switch (
.clk (clk),
.reset (reset)
)
// TODO: remove
`UNUSED_VAR (raster_dcrs)
// TODO: remove
@ -52,6 +102,12 @@ module VX_raster_unit #(
assign raster_rsp_if.rem = 0;
`UNUSED_VAR (raster_rsp_if.ready)
// TODO: remove
`UNUSED_VAR (raster_dcr_if.write_enable);
`UNUSED_VAR (raster_dcr_if.write_addr);
`UNUSED_VAR (raster_dcr_if.write_data);
`UNUSED_VAR (raster_dcr_if.write_uuid);
// TODO: remove
assign perf_raster_if.mem_reads = 0;
assign perf_raster_if.mem_latency = 0;

View file

@ -0,0 +1,76 @@
PARAMS ?=
DEBUG ?= 1
RTL_DIR = ../../../rtl
DPI_DIR = ../../../dpi
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
RASTER_INCLUDE = -I$(RTL_DIR)/raster_unit
ROP_INCLUDE = -I$(RTL_DIR)/rop_unit
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) -I$(DPI_DIR)/..
RTL_INCLUDE += $(TEX_INCLUDE) $(RASTER_INCLUDE) $(ROP_INCLUDE)
TOP = VX_raster_be
TESTBENCH = testbench.cpp
SRCS = $(TESTBENCH) $(DPI_DIR)/util_dpi.cpp
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -Wno-maybe-uninitialized
CXXFLAGS += -I$(DPI_DIR)/.. -I$(DPI_DIR) -I$(DPI_DIR)/../common -I/nethome/vsaxena36/vortex-dev/hw
VL_FLAGS += --exe --cc $(TOP).sv --top-module $(TOP)
VL_FLAGS += --language 1800-2009 --assert -Wall
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique -Wno-UNUSED -Wno-UNDRIVEN
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(PARAMS)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
all: $(TOP)
$(TOP): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$(TOP)
run: $(TOP)
./$(TOP)
clean:
rm -rf obj_dir $(TOP)

View file

@ -0,0 +1,93 @@
#include <stdlib.h>
#include <iostream>
#include <verilated.h>
#include <verilated_vcd_c.h>
#include "VVX_raster_be.h"
#include "VX_config.h"
#define MAX_SIM_TIME 200
vluint64_t sim_time = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = 0;
static uint64_t trace_stop_time = -1ull;
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable (bool enable) {
trace_enabled = enable;
}
void eval(VVX_raster_be* dut, VerilatedVcdC* m_trace)
{
dut->clk = not dut->clk;
dut->eval();
timestamp += 10;
m_trace->dump(timestamp);
dut->clk = not dut->clk;
dut->eval();
timestamp += 10;
m_trace->dump(timestamp);
// if (dut->clk == 1)
printf("time=%d clk=%d ready=%d x=%d y=%d masks=%d in_x=%d in_y=%d id=%d\n",
timestamp, dut->clk, dut->ready, dut->out_quad_x_loc, dut->out_quad_y_loc,
dut->out_quad_masks,
dut->x_loc, dut->y_loc);
}
int main(int argc, char** argv, char** env) {
VVX_raster_be *dut = new VVX_raster_be();
Verilated::traceEverOn(true);
VerilatedVcdC *m_trace = new VerilatedVcdC;
dut->trace(m_trace, 10);
m_trace->open("waveform.vcd");
// while (sim_time < MAX_SIM_TIME) {
// // dut->clk ^= 1;
// // dut->x_loc = 0; dut->y_loc = 256;
// dut->edge_func_val[0] = 56210;
// dut->eval();
// m_trace->dump(sim_time);
// sim_time++;
// }
dut->x_loc = 0; dut->y_loc = 256;
dut->edge_func_val[0] = 56210; dut->edge_func_val[1] = 40000; dut->edge_func_val[2] = 5000;
dut->edges[0][0] = -73; dut->edges[0][1] = -36; dut->edges[0][2] = 65000;
dut->edges[1][0] = 5; dut->edges[1][1] = -89; dut->edges[1][2] = 65000;
dut->edges[2][0] = 0; dut->edges[2][1] = 255; dut->edges[2][2] = -65000;
dut->clk = 1;
eval(dut, m_trace);
dut->reset = 1;
eval(dut, m_trace);
dut->reset = 0;
dut->input_valid = 1;
eval(dut, m_trace);
dut->input_valid = 0;
for (int i = 0; i < 10; ++i)
{
eval(dut, m_trace);
}
for (int i = 0; i < 5; ++i)
{
if (dut->empty == 0)
dut->pop = 1;
eval(dut, m_trace);
}
m_trace->close();
delete dut;
exit(EXIT_SUCCESS);
}

View file

@ -0,0 +1,76 @@
PARAMS ?=
DEBUG ?= 1
RTL_DIR = ../../../rtl
DPI_DIR = ../../../dpi
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
RASTER_INCLUDE = -I$(RTL_DIR)/raster_unit
ROP_INCLUDE = -I$(RTL_DIR)/rop_unit
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) -I$(DPI_DIR)/..
RTL_INCLUDE += $(TEX_INCLUDE) $(RASTER_INCLUDE) $(ROP_INCLUDE)
TOP = VX_raster_qe
TESTBENCH = testbench.cpp
SRCS = $(TESTBENCH) $(DPI_DIR)/util_dpi.cpp
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -Wno-maybe-uninitialized
CXXFLAGS += -I$(DPI_DIR)/.. -I$(DPI_DIR) -I$(DPI_DIR)/../common -I/nethome/vsaxena36/vortex-dev/hw
VL_FLAGS += --exe --cc $(TOP).sv --top-module $(TOP)
VL_FLAGS += --language 1800-2009 --assert -Wall
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique -Wno-UNUSED -Wno-UNDRIVEN
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(PARAMS)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
all: $(TOP)
$(TOP): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$(TOP)
run: $(TOP)
./$(TOP)
clean:
rm -rf obj_dir $(TOP)

View file

@ -0,0 +1,73 @@
#include <stdlib.h>
#include <iostream>
#include <verilated.h>
#include <verilated_vcd_c.h>
#include "VVX_raster_qe.h"
#define MAX_SIM_TIME 20
vluint64_t sim_time = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = 0;
static uint64_t trace_stop_time = -1ull;
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable (bool enable) {
trace_enabled = enable;
}
int main(int argc, char** argv, char** env) {
VVX_raster_qe *dut = new VVX_raster_qe();
Verilated::traceEverOn(true);
VerilatedVcdC *m_trace = new VerilatedVcdC;
dut->trace(m_trace, 10);
m_trace->open("waveform.vcd");
// while (sim_time < MAX_SIM_TIME) {
// // dut->clk ^= 1;
// // dut->x_loc = 0; dut->y_loc = 256;
// dut->edge_func_val[0] = 56210;
// dut->eval();
// m_trace->dump(sim_time);
// sim_time++;
// }
dut->edge_func_val[0] = 56210; dut->edge_func_val[1] = 40000; dut->edge_func_val[2] = 5000;
dut->edge_func_val[0] = 10; dut->edge_func_val[1] = 40000; dut->edge_func_val[2] = 5000;
dut->edges[0][0] = -73; dut->edges[0][1] = -36; dut->edges[0][2] = 65000;
dut->edges[1][0] = 5; dut->edges[1][1] = -89; dut->edges[1][2] = 65000;
dut->edges[2][0] = 0; dut->edges[2][1] = 255; dut->edges[2][2] = -65000;
dut->eval();
m_trace->dump(timestamp++);
dut->eval();
m_trace->dump(timestamp++);
dut->eval();
m_trace->dump(timestamp++);
dut->eval();
m_trace->dump(timestamp++);
dut->eval();
m_trace->dump(timestamp++);
dut->eval();
m_trace->dump(timestamp++);
dut->eval();
m_trace->dump(timestamp++);
printf("Here %d\n", dut->masks);
m_trace->close();
delete dut;
exit(EXIT_SUCCESS);
}

View file

@ -0,0 +1,77 @@
PARAMS ?=
DEBUG ?= 1
RTL_DIR = ../../../rtl
DPI_DIR = ../../../dpi
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
RASTER_INCLUDE = -I$(RTL_DIR)/raster_unit -I$(RTL_DIR)/raster_unit/tile_evaluator
ROP_INCLUDE = -I$(RTL_DIR)/rop_unit
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) -I$(DPI_DIR)/..
RTL_INCLUDE += $(TEX_INCLUDE) $(RASTER_INCLUDE) $(ROP_INCLUDE)
TOP = VX_raster_slice
TESTBENCH = testbench.cpp
SRCS = $(TESTBENCH) $(DPI_DIR)/util_dpi.cpp
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -Wno-maybe-uninitialized
CXXFLAGS += -I$(DPI_DIR)/.. -I$(DPI_DIR) -I$(DPI_DIR)/../common -I/nethome/vsaxena36/vortex-dev/hw
VL_FLAGS += --exe --cc $(TOP).sv --top-module $(TOP)
VL_FLAGS += --language 1800-2009 --assert -Wall
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique --Wno-UNUSED
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(PARAMS)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
all: $(TOP)
$(TOP): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$(TOP)
run: $(TOP)
./$(TOP) > out_v.log
python compare.py
clean:
rm -rf obj_dir $(TOP)

View file

@ -0,0 +1,32 @@
import sys
list_c = []
list_v = []
with open("golden_data/test_data.txt") as FH:
lines = FH.readlines()
for line in lines:
line = line.strip()
list_c.append(line)
with open("out_v.log") as FH:
lines = FH.readlines()
for line in lines:
line = line.strip()
list_v.append(line)
if len(list_v) != len(list_c):
print("Matching failed")
sys.exit(-1)
match_failed = False
list_v.sort()
for entry in list_v:
if entry not in list_c:
print(entry)
match_failed = True
if match_failed:
print("Matching failed")
sys.exit(-1)
sys.exit(0)

View file

@ -0,0 +1,64 @@
0 256
1 256
2 256
3 256
4 256
5 256
6 256
7 256
0 257
1 257
2 257
3 257
4 257
5 257
6 257
0 258
1 258
2 258
3 258
4 258
5 258
6 258
0 259
1 259
2 259
3 259
4 259
5 259
0 260
1 260
2 260
3 260
4 260
5 260
0 261
1 261
2 261
3 261
4 261
0 262
1 262
2 262
3 262
4 262
0 263
1 263
2 263
3 263
0 264
1 264
2 264
3 264
0 265
1 265
2 265
0 266
1 266
2 266
0 267
1 267
0 268
1 268
0 269
0 270

View file

@ -0,0 +1,106 @@
#include <stdlib.h>
#include <iostream>
#include <verilated.h>
#include <verilated_vcd_c.h>
#include "VVX_raster_slice.h"
#include "VX_config.h"
#define MAX_SIM_TIME 200
vluint64_t sim_time = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = 0;
static uint64_t trace_stop_time = -1ull;
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable (bool enable) {
trace_enabled = enable;
}
void eval(VVX_raster_slice* dut, VerilatedVcdC* m_trace)
{
dut->clk = not dut->clk;
dut->eval();
timestamp += 5;
m_trace->dump(timestamp);
// if (dut->out_valid_block && dut->ready == 0)
//printf("%u %u %u\n", dut->clk, dut->out_quad_x_loc, dut->out_quad_y_loc, dut->out_quad_masks);
dut->clk = not dut->clk;
dut->eval();
timestamp += 5;
m_trace->dump(timestamp);
}
int main(int argc, char** argv, char** env) {
VVX_raster_slice *dut = new VVX_raster_slice();
Verilated::traceEverOn(true);
VerilatedVcdC *m_trace = new VerilatedVcdC;
dut->trace(m_trace, 20);
m_trace->open("waveform.vcd");
// Config #1: Tile = 16, Block = 8
dut->clk = 1;
dut->reset = 1;
eval(dut, m_trace);
dut->reset = 0;
dut->input_valid = 1;
dut->x_loc = 0; dut->y_loc = 256;
dut->edge_func_val[0] = 518; dut->edge_func_val[1] = 42976; dut->edge_func_val[2] = 0;
//dut->edge_func_val[0] = 500; dut->edge_func_val[1] = 200; dut->edge_func_val[2] = 500;
dut->edges[0][0] = -73; dut->edges[0][1] = -36; dut->edges[0][2] = 65456;
dut->edges[1][0] = 5; dut->edges[1][1] = -89; dut->edges[1][2] = 65440;
dut->edges[2][0] = 0; dut->edges[2][1] = 255; dut->edges[2][2] = -65280;
dut->extents[0] = 0; dut->extents[1] = 320; dut->extents[2] = 16320;
eval(dut, m_trace);
dut->input_valid = 0;
for (int i = 0; i < 100; ++i)
eval(dut, m_trace);
assert(dut->quad_queue_empty == 0);
dut->pop_quad = 1;
for (int i = 0; i < 100; ++i)
{
if (dut->quad_queue_empty == 1)
{
//std::cout << "Breaking at time " << timestamp << " as queue empty\n";
break;
}
eval(dut, m_trace);
for (int j = 0; j < 4; ++j)
// Print the set pixel
for (int quad_i = 0; quad_i < 2; ++quad_i)
{
for (int quad_j = 0; quad_j < 2; ++quad_j)
{
int index = quad_i*2 + quad_j;
if (dut->out_quad_masks[j] & 1 << (index))
// printf("Testing %d x_loc=%d, y_loc=%d, mask=%d, valid=%d\n",
// j, dut->out_quad_x_loc[j], dut->out_quad_y_loc[j],
// dut->out_quad_masks[j], dut->valid[j]);
printf("%d %d\n", dut->out_quad_x_loc[j]+quad_i, dut->out_quad_y_loc[j]+quad_j);
}
}
}
dut->pop_quad = 0;
eval(dut, m_trace);
eval(dut, m_trace);
m_trace->close();
delete dut;
exit(EXIT_SUCCESS);
}

View file

@ -0,0 +1,76 @@
PARAMS ?=
DEBUG ?= 1
RTL_DIR = ../../../../rtl
DPI_DIR = ../../../../dpi
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
RASTER_INCLUDE = -I$(RTL_DIR)/raster_unit -I$(RTL_DIR)/raster_unit/tile_evaluator
ROP_INCLUDE = -I$(RTL_DIR)/rop_unit
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) -I$(DPI_DIR)/..
RTL_INCLUDE += $(TEX_INCLUDE) $(RASTER_INCLUDE) $(ROP_INCLUDE)
TOP = VX_raster_te_arbiter
TESTBENCH = testbench.cpp
SRCS = $(TESTBENCH) $(DPI_DIR)/util_dpi.cpp
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -Wno-maybe-uninitialized
CXXFLAGS += -I$(DPI_DIR)/.. -I$(DPI_DIR) -I$(DPI_DIR)/../common -I/nethome/vsaxena36/vortex-dev/hw
VL_FLAGS += --exe --cc $(TOP).sv --top-module $(TOP)
VL_FLAGS += --language 1800-2009 --assert -Wall
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique -Wno-UNUSED -Wno-UNDRIVEN
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(PARAMS)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
all: $(TOP)
$(TOP): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$(TOP)
run: $(TOP)
./$(TOP)
clean:
rm -rf obj_dir $(TOP)

View file

@ -0,0 +1,119 @@
#include <stdlib.h>
#include <iostream>
#include <verilated.h>
#include <verilated_vcd_c.h>
#include "VVX_raster_te_arbiter.h"
#include "VX_config.h"
#define MAX_SIM_TIME 200
vluint64_t sim_time = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = 0;
static uint64_t trace_stop_time = -1ull;
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable (bool enable) {
trace_enabled = enable;
}
void eval(VVX_raster_te_arbiter* dut, VerilatedVcdC* m_trace)
{ printf("Running: Clk=%d, Push=%d data=%d, Pop=%d data=%d\n", dut->clk, dut->fifo_push, dut->data_push, dut->fifo_pop, dut->data_pop);
dut->clk = not dut->clk;
dut->eval();
timestamp += 5;
m_trace->dump(timestamp);
dut->clk = not dut->clk;
dut->eval();
timestamp += 5;
m_trace->dump(timestamp);
printf("Clk=%d, Push=%d data=%d, Pop=%d data=%d\n", dut->clk, dut->fifo_push, dut->data_push, dut->fifo_pop, dut->data_pop);
}
int main(int argc, char** argv, char** env) {
VVX_raster_te_arbiter *dut = new VVX_raster_te_arbiter();
Verilated::traceEverOn(true);
VerilatedVcdC *m_trace = new VerilatedVcdC;
dut->trace(m_trace, 20);
m_trace->open("waveform.vcd");
// while (sim_time < MAX_SIM_TIME) {
// // dut->clk ^= 1;
// // dut->x_loc = 0; dut->y_loc = 256;
// dut->edge_func_val[0] = 56210;
// dut->eval();
// m_trace->dump(sim_time);
// sim_time++;
// }
// Config #1: Tile = 16, Block = 8
dut->clk = 1;
dut->reset = 1;
eval(dut, m_trace);
dut->reset = 0;
// dut->fifo_push = 1; dut->fifo_pop = 0; dut->data_push = 12;
// eval(dut, m_trace);
// dut->fifo_push = 0; dut->fifo_pop = 1;
// eval(dut, m_trace);
// dut->fifo_push = 0; dut->fifo_pop = 0;
// eval(dut, m_trace);
// Write 4 items
/*
// Single FIFO test Test #1
for (int i = 0; i < 4; ++i)
{
dut->fifo_push = 1; dut->fifo_pop = 0; dut->data_push = 12 + i*i;
eval(dut, m_trace);
}
// Read 4 items
for (int i = 0; i < 4; ++i)
{
dut->fifo_push = 0; dut->fifo_pop = 1;
eval(dut, m_trace);
}
for (int i = 0; i < 2; ++i)
{
eval(dut, m_trace);
}
*/
dut->fifo_push = 1;
for (int j = 0; j < 4; ++j)
{
for (int i = 0; i < 4; ++i)
{
dut->fifo_pop = 0; dut->data_push[i] = j*4 + i;
}
eval(dut, m_trace);
}
dut->fifo_push = 0;
for (int i = 10; i >= 0; --i)
{
dut->fifo_pop = dut->fifo_index_onehot;
eval(dut, m_trace);
}
m_trace->close();
delete dut;
exit(EXIT_SUCCESS);
}

View file

@ -0,0 +1,76 @@
PARAMS ?=
DEBUG ?= 1
RTL_DIR = ../../../../rtl
DPI_DIR = ../../../../dpi
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
RASTER_INCLUDE = -I$(RTL_DIR)/raster_unit -I$(RTL_DIR)/raster_unit/tile_evaluator
ROP_INCLUDE = -I$(RTL_DIR)/rop_unit
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) -I$(DPI_DIR)/..
RTL_INCLUDE += $(TEX_INCLUDE) $(RASTER_INCLUDE) $(ROP_INCLUDE)
TOP = VX_raster_te
TESTBENCH = testbench.cpp
SRCS = $(TESTBENCH) $(DPI_DIR)/util_dpi.cpp
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -Wno-maybe-uninitialized
CXXFLAGS += -I$(DPI_DIR)/.. -I$(DPI_DIR) -I$(DPI_DIR)/../common -I/nethome/vsaxena36/vortex-dev/hw
VL_FLAGS += --exe --cc $(TOP).sv --top-module $(TOP)
VL_FLAGS += --language 1800-2009 --assert -Wall
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique -Wno-UNUSED -Wno-UNDRIVEN
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(PARAMS)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
all: $(TOP)
$(TOP): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$(TOP)
run: $(TOP)
./$(TOP)
clean:
rm -rf obj_dir $(TOP)

View file

@ -0,0 +1,166 @@
#include <stdlib.h>
#include <iostream>
#include <verilated.h>
#include <verilated_vcd_c.h>
#include "VVX_raster_te.h"
#include "VX_config.h"
#define MAX_SIM_TIME 200
vluint64_t sim_time = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = 0;
static uint64_t trace_stop_time = -1ull;
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable (bool enable) {
trace_enabled = enable;
}
void eval(VVX_raster_te* dut, VerilatedVcdC* m_trace)
{
dut->eval();
timestamp += 20;
m_trace->dump(timestamp);
// printf("time=%d valid_tile=%d valid_block=%d\n", timestamp,
// dut->valid_tile, dut->valid_block);
// for (int k = 0; k < 4; ++k)
// {
// printf("\tx=%d, y=%d\n\t", dut->tile_x_loc[k], dut->tile_y_loc[k]);
// for(int i = 0; i < 3; ++i)
// {
// printf("%d val=%d ", i, dut->tile_edge_func_val[k][i]);
// }
// printf("\n");
// }
}
int main(int argc, char** argv, char** env) {
VVX_raster_te *dut = new VVX_raster_te();
Verilated::traceEverOn(true);
VerilatedVcdC *m_trace = new VerilatedVcdC;
dut->trace(m_trace, 10);
m_trace->open("waveform.vcd");
// Config #1: Tile = 16, Block = 8
dut->x_loc = 0; dut->y_loc = 256;
dut->edge_func_val[0] = 56210; dut->edge_func_val[1] = 26112; dut->edge_func_val[2] = 0;
//dut->edge_func_val[0] = 0; dut->edge_func_val[1] = -7000; dut->edge_func_val[2] = 0;
dut->edges[0][0] = -73; dut->edges[0][1] = -36; dut->edges[0][2] = 65456;
dut->edges[1][0] = 102; dut->edges[1][1] = -153; dut->edges[1][2] = 65280;
dut->edges[2][0] = 0; dut->edges[2][1] = 255; dut->edges[2][2] = -65280;
dut->extents[0] = 0; dut->extents[1] = 1632; dut->extents[2] = 4080;
dut->level = 0;
for (int i = 0; i < 10; ++i)
{
eval(dut, m_trace);
}
assert(dut->tile_edge_func_val[0][0] == 56210);
assert(dut->tile_edge_func_val[0][1] == 26112);
assert(dut->tile_edge_func_val[0][2] == 0);
assert(dut->tile_edge_func_val[1][0] == 55922);
assert(dut->tile_edge_func_val[1][1] == 24888);
assert(dut->tile_edge_func_val[1][2] == 2040);
assert(dut->tile_edge_func_val[2][0] == 55626);
assert(dut->tile_edge_func_val[2][1] == 26928);
assert(dut->tile_edge_func_val[2][2] == 0);
assert(dut->tile_edge_func_val[3][0] == 55338);
assert(dut->tile_edge_func_val[3][1] == 25704);
assert(dut->tile_edge_func_val[3][2] == 2040);
assert(dut->tile_x_loc[0] == 0);
assert(dut->tile_y_loc[0] == 256);
assert(dut->tile_x_loc[1] == 0);
assert(dut->tile_y_loc[1] == 264);
assert(dut->tile_x_loc[2] == 8);
assert(dut->tile_y_loc[2] == 256);
assert(dut->tile_x_loc[3] == 8);
assert(dut->tile_y_loc[3] == 264);
assert(dut->valid_block == 0);
assert(dut->valid_tile == 1);
// Config #3; Tile = 16, Block = 8
dut->x_loc = 8; dut->y_loc = 264;
dut->edge_func_val[0] = 55338; dut->edge_func_val[1] = 25704; dut->edge_func_val[2] = 2040;
//dut->edge_func_val[0] = 0; dut->edge_func_val[1] = -7000; dut->edge_func_val[2] = 0;
dut->edges[0][0] = -73; dut->edges[0][1] = -36; dut->edges[0][2] = 65456;
dut->edges[1][0] = 102; dut->edges[1][1] = -153; dut->edges[1][2] = 65280;
dut->edges[2][0] = 0; dut->edges[2][1] = 255; dut->edges[2][2] = -65280;
dut->extents[0] = 0; dut->extents[1] = 1632; dut->extents[2] = 4080;
dut->level = 1;
eval(dut, m_trace);
eval(dut, m_trace);
eval(dut, m_trace);
for (int i = 0; i < 2; ++i)
{
eval(dut, m_trace);
//if (dut->ready == 1) break;
}
assert(dut->valid_tile == 0);
assert(dut->valid_block == 1);
/*
// Config #2, Tile = 16, Block = 4
dut->x_loc = 8; dut->y_loc = 264;
dut->edge_func_val[0] = 55338; dut->edge_func_val[1] = 25704; dut->edge_func_val[2] = 2040;
//dut->edge_func_val[0] = 0; dut->edge_func_val[1] = -7000; dut->edge_func_val[2] = 0;
dut->edges[0][0] = -73; dut->edges[0][1] = -36; dut->edges[0][2] = 65456;
dut->edges[1][0] = 102; dut->edges[1][1] = -153; dut->edges[1][2] = 65280;
dut->edges[2][0] = 0; dut->edges[2][1] = 255; dut->edges[2][2] = -65280;
dut->extents[0] = 0; dut->extents[1] = 1632; dut->extents[2] = 4080;
dut->level = 1;
eval(dut, m_trace);
eval(dut, m_trace);
eval(dut, m_trace);
for (int i = 0; i < 2; ++i)
{
eval(dut, m_trace);
//if (dut->ready == 1) break;
}
assert(dut->tile_edge_func_val[0][0] == 55338);
assert(dut->tile_edge_func_val[0][1] == 25704);
assert(dut->tile_edge_func_val[0][2] == 2040);
assert(dut->tile_edge_func_val[1][0] == 55194);
assert(dut->tile_edge_func_val[1][1] == 25092);
assert(dut->tile_edge_func_val[1][2] == 3060);
assert(dut->tile_edge_func_val[2][0] == 55046);
assert(dut->tile_edge_func_val[2][1] == 26112);
assert(dut->tile_edge_func_val[2][2] == 2040);
assert(dut->tile_edge_func_val[3][0] == 54902);
assert(dut->tile_edge_func_val[3][1] == 25500);
assert(dut->tile_edge_func_val[3][2] == 3060);
assert(dut->tile_x_loc[0] == 8);
assert(dut->tile_y_loc[0] == 264);
assert(dut->tile_x_loc[1] == 8);
assert(dut->tile_y_loc[1] == 268);
assert(dut->tile_x_loc[2] == 12);
assert(dut->tile_y_loc[2] == 264);
assert(dut->tile_x_loc[3] == 12);
assert(dut->tile_y_loc[3] == 268);
assert(dut->valid_block == 0);
assert(dut->valid_tile == 1);
*/
std::cout << "NOTE: On assertion failure double check the tile and block sizes\n";
m_trace->close();
delete dut;
exit(EXIT_SUCCESS);
}

View file

@ -0,0 +1,59 @@
`timescale 1ns/1ns
`include "VX_rop_blend.sv"
`define check(x, y) if ((x == y) !== 1) if ((x == y) === 0) $error("x=%h, expected=%h", x, y); else $warning("x=%h, expected=%h", x, y)
module testbench();
reg clk;
reg reset;
// reg valid_in;
// reg ready_out;
rop_dcrs_t dcrs;
rgba_t src_color;
rgba_t dst_color;
rgba_t color_out;
rgba_t src_factor;
rgba_t dst_factor;
VX_rop_blend #() dut (
.clk (clk),
.reset (reset),
.ready_in (),
.valid_in (1), // only one needed
.ready_out (0), // only one needed
.valid_out (),
.dcrs (dcrs),
.src_color (src_color),
.dst_color (dst_color),
.color_out (color_out)
);
always begin
#1 clk = !clk;
end
initial begin
$monitor ("%d: clk=%b rst=%b mode_rgb=%h mode_a=%h src_color=%p, dst_color=%p, src_factor=%p, dst_factor=%p, out_color=%p",
$time, clk, reset, dcrs.blend_mode_rgb, dcrs.blend_mode_a, src_color, dst_color, src_factor, dst_factor, out_color);
#0 clk=0; reset=1; dcrs.blend_mode_rgb=0; dcrs.blend_mode_a=0; src_color=0; dst_color=0; src_factor=0; dst_factor=0; drcs.logic_op=0;
#2 reset=0; dcrs.blend_src_rgb=`ROP_BLEND_FUNC_ONE; dcrs.blend_src_a=`ROP_BLEND_FUNC_ONE;
dcrs.blend_dst_rgb=`ROP_BLEND_FUNC_ZERO; dcrs.blend_dst_a=`ROP_BLEND_FUNC_ZERO;
dcrs.blend_mode_rgb=`ROP_BLEND_MODE_ADD; dcrs.blend_mode_a=`ROP_BLEND_MODE_SUB;
drcs.blend_const=32'h0;
src_color='{8'hb4, 8'hef, 8'h4b, 8'h7b}; dst_color='{8'hc2, 8'hc4, 8'h26, 8'hf5};
#2 `check(color_out, '{8'hb4, 8'hef, 8'h4b, 8'h7b});
#2 dcrs.blend_src_rgb=`ROP_BLEND_FUNC_SRC_RGB; dcrs.blend_src_a=`ROP_BLEND_FUNC_SRC_A;
dcrs.blend_dst_rgb=`ROP_BLEND_FUNC_SRC_RGB; dcrs.blend_dst_a=`ROP_BLEND_FUNC_SRC_A;
#2 `check(color_out, '{8'h2b, 8'hff, 8'hc7, 8'hb0});
#2 dcrs.blend_mode_rgb=`ROP_BLEND_MODE_LOGICOP; dcrs.blend_mode_a=`ROP_BLEND_MODE_LOGICOP;
dcrs.logic_op=`ROP_LOGIC_OP_AND_INVERTED;
#2 `check(color_out, '{8'h42, 8'h00, 8'h24, 8'h84});
#1 $finish;
end
endmodule