many fixes

This commit is contained in:
Blaise Tine 2021-03-27 20:58:12 -04:00
parent 2d48fe13c8
commit 39a8579c27
34 changed files with 5021 additions and 515 deletions

View file

@ -8,14 +8,14 @@ CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
#DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
#DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
#DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
#DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
#DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
#DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
#DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
#DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
DBG_FLAGS += $(DBG_PRINT_FLAGS)

View file

@ -24,7 +24,7 @@ LDFLAGS +=
PROJECT = basic
SRCS = basic.cpp
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump

View file

@ -22,7 +22,7 @@ CXXFLAGS += -I../../include
PROJECT = demo
SRCS = demo.cpp
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump

View file

@ -24,7 +24,7 @@ CXXFLAGS += -I../../include -I../../../hw
PROJECT = dogfood
SRCS = dogfood.cpp
SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump

View file

@ -1,2 +1,2 @@
demo.o: demo.cpp ../../include/vortex.h common.h utils.h
main.o: main.cpp ../../include/vortex.h common.h utils.h
utils.o: utils.cpp utils.h

View file

@ -13,6 +13,8 @@ VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_LDFLAGS += -lm
VX_SRCS = kernel.c
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
@ -22,7 +24,7 @@ CXXFLAGS += -I../../include
PROJECT = demo
SRCS = demo.cpp utils.cpp
SRCS = main.cpp utils.cpp
all: $(PROJECT) kernel.bin kernel.dump

View file

@ -1,5 +1,8 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
uint32_t num_tasks;
uint32_t src_width;

Binary file not shown.

Binary file not shown.

View file

@ -37,16 +37,15 @@ void kernel_body(int task_id, void* arg) {
}
int main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)0x0;
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
// configure texture unit
vx_csr_write(CSR_TEX_ADDR(0), arg->src_ptr);
vx_csr_write(CSR_TEX_FORMAT(0), 0);
vx_csr_write(CSR_TEX_MIPOFF(0), 0);
vx_csr_write(CSR_TEX_WIDTH(0), ilog2(arg->src_width));
vx_csr_write(CSR_TEX_HEIGHT(0), ilog2(arg->src_height));
vx_csr_write(CSR_TEX_STRIDE(0), ilog2(arg->src_stride));
vx_csr_write(CSR_TEX_WRAP_U(0), 0);
vx_csr_write(CSR_TEX_WRAP_V(0), 0);
vx_csr_write(CSR_TEX_FORMAT(0), 0);
vx_csr_write(CSR_TEX_WRAP(0), 0);
vx_csr_write(CSR_TEX_FILTER(0), 0);
struct tile_arg_t targ;
@ -56,5 +55,5 @@ int main() {
targ.deltaX = 1.0f / arg->dst_width;
targ.deltaY = 1.0f / arg->dst_height;
vx_spawn_tasks(arg->num_tasks, kernel_body, targ);
vx_spawn_tasks(arg->num_tasks, kernel_body, &targ);
}

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -134,14 +134,10 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
size_t arg_addr, src_addr, dst_addr;
RT_CHECK(vx_alloc_dev_mem(device, sizeof(kernel_arg_t), &arg_addr));
size_t src_addr, dst_addr;
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
assert(arg_addr == ALLOC_BASE_ADDR);
std::cout << "arg_addr=" << std::hex << arg_addr << std::endl;
std::cout << "src_addr=" << std::hex << src_addr << std::endl;
std::cout << "dst_addr=" << std::hex << dst_addr << std::endl;
@ -154,20 +150,22 @@ int main(int argc, char *argv[]) {
std::cout << "upload kernel argument" << std::endl;
{
kernel_arg.num_tasks = std::min<uint32_t>(num_tasks, dst_height);
kernel_arg.src_width = src_width;
kernel_arg.src_height = src_height;
kernel_arg.src_stride = src_bpp;
kernel_arg.src_pitch = src_bpp * src_width * src_height;
kernel_arg.src_ptr = src_addr;
kernel_arg.dst_width = dst_width;
kernel_arg.dst_height = dst_height;
kernel_arg.dst_stride = dst_bpp;
kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height;
kernel_arg.src_ptr = src_addr;
kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height;
kernel_arg.dst_ptr = dst_addr;
auto buf_ptr = (int*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(buffer, arg_addr, sizeof(kernel_arg_t), 0));
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// upload source buffer0

View file

@ -241,17 +241,16 @@
`define NUM_TEX_UNITS 2
`define CSR_TEX_STATES 8
`define CSR_TEX_STATES 7
`define CSR_TEX_BEGIN(x) (12'hFD0 + (x) * `CSR_TEX_STATES)
`define CSR_TEX_ADDR(x) (`CSR_TEX_BEGIN(x) + 12'h00)
`define CSR_TEX_FORMAT(x) (`CSR_TEX_BEGIN(x) + 12'h01)
`define CSR_TEX_WIDTH(x) (`CSR_TEX_BEGIN(x) + 12'h02)
`define CSR_TEX_HEIGHT(x) (`CSR_TEX_BEGIN(x) + 12'h03)
`define CSR_TEX_STRIDE(x) (`CSR_TEX_BEGIN(x) + 12'h04)
`define CSR_TEX_WRAP_U(x) (`CSR_TEX_BEGIN(x) + 12'h05)
`define CSR_TEX_WRAP_V(x) (`CSR_TEX_BEGIN(x) + 12'h06)
`define CSR_TEX_FILTER(x) (`CSR_TEX_BEGIN(x) + 12'h07)
`define CSR_TEX_WRAP(x) (`CSR_TEX_BEGIN(x) + 12'h02)
`define CSR_TEX_FILTER(x) (`CSR_TEX_BEGIN(x) + 12'h03)
`define CSR_TEX_MIPOFF(x) (`CSR_TEX_BEGIN(x) + 12'h04)
`define CSR_TEX_WIDTH(x) (`CSR_TEX_BEGIN(x) + 12'h05)
`define CSR_TEX_HEIGHT(x) (`CSR_TEX_BEGIN(x) + 12'h06)
// Pipeline Queues ////////////////////////////////////////////////////////////

View file

@ -203,8 +203,9 @@ module VX_csr_data #(
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
default: begin
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
default: begin
assert (~read_enable || read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES))
else $error("%t: invalid CSR read address: %0h", $time, read_addr);
end
endcase
end

View file

@ -31,6 +31,7 @@ module VX_decode #(
wire [6:0] opcode = instr[6:0];
wire [2:0] func3 = instr[14:12];
wire [6:0] func7 = instr[31:25];
wire [1:0] func2 = instr[26:25];
wire [11:0] u_12 = instr[31:20];
wire [4:0] rd = instr[11:7];
@ -361,7 +362,7 @@ module VX_decode #(
`ifdef EXT_TEX_ENABLE
3'h5: begin
op_type = `OP_BITS'(`GPU_TEX);
op_mod = `MOD_BITS'(instr[26:25]);
op_mod = `MOD_BITS'(func2);
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;

View file

@ -19,18 +19,19 @@ module VX_tex_addr #(
input wire [31:0] req_PC,
input wire [REQ_INFO_WIDTH-1:0] req_info,
input wire [`TEX_FORMAT_BITS-1:0] format,
input wire [`TEX_FILTER_BITS-1:0] filter,
input wire [`TEX_WRAP_BITS-1:0] wrap_u,
input wire [`TEX_WRAP_BITS-1:0] wrap_v,
input wire [`TEX_ADDR_BITS-1:0] base_addr,
input wire [`TEX_STRIDE_BITS-1:0] log_stride,
input wire [`TEX_WIDTH_BITS-1:0] log_width,
input wire [`TEX_HEIGHT_BITS-1:0] log_height,
input wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] mip_offsets,
input wire [`NUM_THREADS-1:0][`TEX_WIDTH_BITS-1:0] log_widths,
input wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] log_heights,
input wire [`NUM_THREADS-1:0][31:0] coord_u,
input wire [`NUM_THREADS-1:0][31:0] coord_v,
input wire [`NUM_THREADS-1:0][31:0] lod,
// outputs
@ -48,10 +49,19 @@ module VX_tex_addr #(
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (lod)
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u;
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] v;
wire [`TEX_STRIDE_BITS-1:0] log_stride;
// stride
VX_tex_stride #(
.CORE_ID (CORE_ID)
) tex_stride (
.format (format),
.log_stride (log_stride)
);
// addressing mode
@ -60,10 +70,10 @@ module VX_tex_addr #(
wire [31:0] fu[1:0];
wire [31:0] fv[1:0];
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_width) : 0);
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_height) : 0);
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_width) : 0);
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_height) : 0);
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
@ -107,15 +117,15 @@ module VX_tex_addr #(
wire [`FIXED_FRAC-1:0] x [1:0];
wire [`FIXED_FRAC-1:0] y [1:0];
assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log_width);
assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log_width);
assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log_height);
assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log_height);
assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log_widths[i]);
assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log_widths[i]);
assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log_heights[i]);
assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log_heights[i]);
assign addr[i][0] = base_addr + (32'(x[0]) + (32'(y[0]) << log_width)) << log_stride;
assign addr[i][1] = base_addr + (32'(x[1]) + (32'(y[0]) << log_width)) << log_stride;
assign addr[i][2] = base_addr + (32'(x[0]) + (32'(y[1]) << log_width)) << log_stride;
assign addr[i][3] = base_addr + (32'(x[1]) + (32'(y[1]) << log_width)) << log_stride;
assign addr[i][0] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[0]) << log_widths[i])) << log_stride;
assign addr[i][1] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[0]) << log_widths[i])) << log_stride;
assign addr[i][2] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[1]) << log_widths[i])) << log_stride;
assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride;
end
wire stall_out = mem_req_valid && ~mem_req_ready;

View file

@ -15,25 +15,33 @@
`define LERP_64(x1,x2,frac) ((x2 + (((x1 - x2) * frac) >> `BLEND_FRAC_64)) & 64'h00ff00ff00ff00ff)
`define TEX_ADDR_BITS 32
`define TEX_FORMAT_BITS 3
`define TEX_WRAP_BITS 2
`define TEX_WIDTH_BITS 4
`define TEX_HEIGHT_BITS 4
`define TEX_STRIDE_BITS 2
`define TEX_FILTER_BITS 1
`define TEX_ADDR_BITS 32
`define TEX_FORMAT_BITS 3
`define TEX_WRAP_BITS 2
`define TEX_WIDTH_BITS 4
`define TEX_HEIGHT_BITS 4
`define TEX_FILTER_BITS 1
`define TEX_WRAP_REPEAT 0
`define TEX_WRAP_CLAMP 1
`define TEX_WRAP_MIRROR 2
`define TEX_MIPOFF_BITS (2*12+1)
`define TEX_STRIDE_BITS 2
`define MAX_COLOR_WIDTH 8
`define NUM_COLOR_CHANNEL 4
`define TEX_LOD_BITS 4
`define TEX_MIP_BITS (`NTEX_BITS + `TEX_LOD_BITS)
`define TEX_COLOR_BITS 8
`define TEX_WRAP_REPEAT 0
`define TEX_WRAP_CLAMP 1
`define TEX_WRAP_MIRROR 2
`define TEX_FORMAT_R5G6B5 `TEX_FORMAT_BITS'(1)
`define TEX_FORMAT_R8G8B8 `TEX_FORMAT_BITS'(2)
`define TEX_FORMAT_R8G8B8A8 `TEX_FORMAT_BITS'(3)
`define MAX_COLOR_WIDTH 8
`define NUM_COLOR_CHANNEL 4
`define TEX_COLOR_BITS 8
`define TEX_FORMAT_R8G8B8A8 `TEX_FORMAT_BITS'(0)
`define TEX_FORMAT_R5G6B5 `TEX_FORMAT_BITS'(1)
`define TEX_FORMAT_R4G4B4A4 `TEX_FORMAT_BITS'(2)
`define TEX_FORMAT_L8A8 `TEX_FORMAT_BITS'(3)
`define TEX_FORMAT_L8 `TEX_FORMAT_BITS'(4)
`define TEX_FORMAT_A8 `TEX_FORMAT_BITS'(5)
`endif

View file

@ -1,7 +1,7 @@
`include "VX_tex_define.vh"
module VX_tex_format #(
parameter CORE_ID = 0,
parameter CORE_ID = 0,
parameter NUM_TEXELS = 4 //BILINEAR
) (
input wire [NUM_TEXELS-1:0][31:0] texel_data,
@ -13,32 +13,32 @@ module VX_tex_format #(
`UNUSED_PARAM (CORE_ID)
reg [`NUM_COLOR_CHANNEL-1:0] color_enable_r;
reg [NUM_TEXELS-1:0][63:0] formatted_texel_r;
reg [NUM_TEXELS-1:0][63:0] formatted_texel_r;
always @(*) begin
for (integer i = 0; i<NUM_TEXELS ;i++ ) begin
case (format)
`TEX_FORMAT_R5G6B5: begin
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][15:11]);
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][10:5]);
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][4:0]);
formatted_texel_r[i][7:0] = {`TEX_COLOR_BITS{1'b0}};
formatted_texel_r[i][07:00] = `TEX_COLOR_BITS'(texel_data[i][4:0]);
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][10:5]);
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][15:11]);
formatted_texel_r[i][55:48] = {`TEX_COLOR_BITS{1'b0}};
if (i == 0)
color_enable_r = 4'b1110;
color_enable_r = 4'b0111;
end
`TEX_FORMAT_R8G8B8: begin
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][23:16]);
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][15:8]);
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][7:0]);
formatted_texel_r[i][7:0] = {`TEX_COLOR_BITS{1'b0}};
`TEX_FORMAT_R4G4B4A4: begin
formatted_texel_r[i][07:00] = `TEX_COLOR_BITS'(texel_data[i][3:0]);
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][7:4]);
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][11:8]);
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][15:12]);
if (i == 0)
color_enable_r = 4'b1110;
color_enable_r = 4'b0111;
end
default: begin // `TEX_FORMAT_R8G8B8A8:
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][31:24]);
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][23:16]);
formatted_texel_r[i][07:00] = `TEX_COLOR_BITS'(texel_data[i][7:0]);
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][15:8]);
formatted_texel_r[i][7:0] = `TEX_COLOR_BITS'(texel_data[i][7:0]);
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][23:16]);
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][31:24]);
if (i == 0)
color_enable_r = 4'b1111;
end
@ -46,9 +46,9 @@ module VX_tex_format #(
end
end
assign color_enable = color_enable_r;
assign color_enable = color_enable_r;
for (genvar i = 0;i<NUM_TEXELS ;i++ ) begin
for (genvar i = 0; i < NUM_TEXELS; i++) begin
assign formatted_texel[i] = formatted_texel_r[i] & 64'h00ff00ff00ff00ff;
end

View file

@ -33,8 +33,8 @@ module VX_tex_sampler #(
`UNUSED_PARAM (CORE_ID)
wire [`NUM_THREADS-1:0][31:0] req_data ;
wire [`NUM_THREADS-1:0][31:0] req_data_bilerp ;
wire [`NUM_THREADS-1:0][31:0] req_data;
wire [`NUM_THREADS-1:0][31:0] req_data_bilerp;
wire stall_out;

View file

@ -0,0 +1,27 @@
`include "VX_tex_define.vh"
module VX_tex_stride #(
parameter CORE_ID = 0
) (
input wire [`TEX_FORMAT_BITS-1:0] format,
output wire [`TEX_STRIDE_BITS-1:0] log_stride
);
`UNUSED_PARAM (CORE_ID)
reg [`TEX_STRIDE_BITS-1:0] log_stride_r;
always @(*) begin
case (format)
`TEX_FORMAT_A8: log_stride_r = 0;
`TEX_FORMAT_L8: log_stride_r = 0;
`TEX_FORMAT_L8A8: log_stride_r = 1;
`TEX_FORMAT_R5G6B5: log_stride_r = 1;
`TEX_FORMAT_R4G4B4A4: log_stride_r = 1;
// `TEX_FORMAT_R8G8B8A8
default: log_stride_r = 2;
endcase
end
assign log_stride = log_stride_r;
endmodule

View file

@ -24,25 +24,24 @@ module VX_tex_unit #(
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
reg [`TEX_ADDR_BITS-1:0] tex_addr [`NUM_TEX_UNITS-1: 0];
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1: 0];
reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1: 0];
reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1: 0];
reg [`TEX_STRIDE_BITS-1:0] tex_stride [`NUM_TEX_UNITS-1: 0];
reg [`TEX_WRAP_BITS-1:0] tex_wrap_u [`NUM_TEX_UNITS-1: 0];
reg [`TEX_WRAP_BITS-1:0] tex_wrap_v [`NUM_TEX_UNITS-1: 0];
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1: 0];
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [(1 << `TEX_MIP_BITS)-1:0];
reg [`TEX_WIDTH_BITS-1:0] tex_width [(1 << `TEX_MIP_BITS)-1:0];
reg [`TEX_HEIGHT_BITS-1:0] tex_height [(1 << `TEX_MIP_BITS)-1:0];
// CSRs programming
reg [`TEX_ADDR_BITS-1:0] tex_addr [`NUM_TEX_UNITS-1:0];
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
reg [`TEX_WRAP_BITS-1:0] tex_wrap_u [`NUM_TEX_UNITS-1:0];
reg [`TEX_WRAP_BITS-1:0] tex_wrap_v [`NUM_TEX_UNITS-1:0];
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
// CSRs programming
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
wire [`TEX_MIP_BITS-1:0] mip_waddr = tex_csr_if.write_data[24 +: `TEX_MIP_BITS];
always @(posedge clk ) begin
if (reset) begin
if (reset) begin
tex_addr[i] <= 0;
tex_format[i] <= 0;
tex_width[i] <= 0;
tex_height[i] <= 0;
tex_stride[i] <= 0;
tex_wrap_u[i] <= 0;
tex_wrap_v[i] <= 0;
tex_filter[i] <= 0;
@ -51,12 +50,20 @@ module VX_tex_unit #(
case (tex_csr_if.write_addr)
`CSR_TEX_ADDR(i) : tex_addr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
`CSR_TEX_WIDTH(i) : tex_width[i] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
`CSR_TEX_HEIGHT(i) : tex_height[i] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
`CSR_TEX_STRIDE(i) : tex_stride[i] <= tex_csr_if.write_data[`TEX_STRIDE_BITS-1:0];
`CSR_TEX_WRAP_U(i) : tex_wrap_u[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
`CSR_TEX_WRAP_V(i) : tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
`CSR_TEX_WRAP(i) : begin
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
end
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
`CSR_TEX_MIPOFF(i) : begin
tex_mipoff[mip_waddr] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
end
`CSR_TEX_WIDTH(i) : begin
tex_width[mip_waddr] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
end
`CSR_TEX_HEIGHT(i) : begin
tex_height[mip_waddr] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
end
default:
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
@ -66,6 +73,19 @@ module VX_tex_unit #(
end
end
// mipmap attributes
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs;
wire [`NUM_THREADS-1:0][`TEX_WIDTH_BITS-1:0] tex_widths;
wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] tex_heights;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`TEX_MIP_BITS-1:0] mip_raddr = {tex_req_if.unit[`NTEX_BITS-1:0], tex_req_if.lod[i][`TEX_LOD_BITS-1:0]};
assign tex_mipoffs[i] = tex_mipoff[mip_raddr];
assign tex_widths[i] = tex_width[mip_raddr];
assign tex_heights[i] = tex_height[mip_raddr];
end
// address generation
wire mem_req_valid;
@ -87,7 +107,7 @@ module VX_tex_unit #(
wire [`TEX_FILTER_BITS-1:0] mem_rsp_filter;
wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data;
wire [REQ_INFO_WIDTH_M-1:0] mem_rsp_info;
wire mem_rsp_ready;
wire mem_rsp_ready;
VX_tex_addr #(
.REQ_INFO_WIDTH (REQ_INFO_WIDTH_A)
@ -103,18 +123,18 @@ module VX_tex_unit #(
.req_PC (tex_req_if.PC),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb}),
.format (tex_format[tex_req_if.unit]),
.filter (tex_filter[tex_req_if.unit]),
.wrap_u (tex_wrap_u[tex_req_if.unit]),
.wrap_v (tex_wrap_v[tex_req_if.unit]),
.base_addr (tex_addr[tex_req_if.unit]),
.log_stride (tex_stride[tex_req_if.unit]),
.log_width (tex_width[tex_req_if.unit]),
.log_height (tex_height[tex_req_if.unit]),
.base_addr (tex_addr[tex_req_if.unit]),
.mip_offsets (tex_mipoffs),
.log_widths (tex_widths),
.log_heights (tex_heights),
.coord_u (tex_req_if.u),
.coord_v (tex_req_if.v),
.lod (tex_req_if.lod),
.mem_req_valid (mem_req_valid),
.mem_req_wid (mem_req_wid),
@ -211,10 +231,9 @@ module VX_tex_unit #(
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
$display("%t: core%0d-tex_csr: csr_tex%d_addr, csr_data=%0h", $time, CORE_ID, i, tex_addr[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_format, csr_data=%0h", $time, CORE_ID, i, tex_format[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_width, csr_data=%0h", $time, CORE_ID, i, tex_width[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_height, csr_data=%0h", $time, CORE_ID, i, tex_height[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_stride, csr_data=%0h", $time, CORE_ID, i, tex_stride[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_format, csr_data=%0h", $time, CORE_ID, i, tex_format[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_u, csr_data=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_v, csr_data=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_filter, csr_data=%0h", $time, CORE_ID, i, tex_filter[i]);

View file

@ -10,7 +10,7 @@ CFLAGS += -I./include -I../hw
PROJECT = libvortexrt
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_tex.c
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c
OBJS := $(addsuffix .o, $(notdir $(SRCS)))

View file

@ -74,16 +74,9 @@ inline void vx_join() {
// Warp Barrier
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps));
asm volatile (".insn s 0x6b, 4, %1, 0(%0)" :: "r"(barried_id), "r"(num_warps));
}
// Texture load
#define vx_tex_ld(unit, u, v, lod) ({ \
register unsigned result; \
asm volatile (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" :: "r"(result), "r"(u), "r"(v), "r"(lod)); \
result; \
})
// Return active warp's thread id
inline int vx_thread_id() {
int result;

View file

@ -1,14 +1,16 @@
#ifndef VX_API_H
#define VX_API_H
#include <stdint.h>
#include <stdio.h>
#ifndef VX_TEX_H
#define VX_TEX_H
#ifdef __cplusplus
extern "C" {
#endif
int vx_tex(unsigned t, unsigned u, unsigned v, unsigned lod);
unsigned vx_tex(unsigned unit, unsigned u, unsigned v, unsigned lod) {
unsigned result;
unsigned lod_unit = (unit << 24) | lod;
asm volatile (".insn r4 0x6b, 5, 0, %0, %1, %2, %3" : "=r"(result) : "r"(u), "r"(v), "r"(lod_unit));
return result;
}
#ifdef __cplusplus
}

Binary file not shown.

View file

@ -1028,46 +1028,3 @@ Disassembly of section .riscv.attributes:
1c: 326d jal fffff9c6 <.L50+0xfffff79e>
1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f
vx_tex.c.o: file format elf32-littleriscv
Disassembly of section .text.vx_tex:
00000000 <vx_tex>:
0: 00869693 slli a3,a3,0x8
4: 00a6e6b3 or a3,a3,a0
8: 00000513 li a0,0
c: 6ac5d56b 0x6ac5d56b
10: 00008067 ret
Disassembly of section .comment:
00000000 <.comment>:
0: 4700 lw s0,8(a4)
2: 203a4343 fmadd.s ft6,fs4,ft3,ft4,rmm
6: 4728 lw a0,72(a4)
8: 554e lw a0,240(sp)
a: 2029 jal 14 <vx_tex+0x14>
c: 2e39 jal 32a <vx_tex+0x32a>
e: 2e32 fld ft8,264(sp)
10: 0030 addi a2,sp,8
Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>:
0: 2541 jal 680 <vx_tex+0x680>
2: 0000 unimp
4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <vx_tex+0x14>
c: 0000001b 0x1b
10: 1004 addi s1,sp,32
12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp)
18: 7032 flw ft0,44(sp)
1a: 5f30 lw a2,120(a4)
1c: 326d jal fffff9c6 <vx_tex+0xfffff9c6>
1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f

View file

@ -1,9 +0,0 @@
#include <VX_config.h>
@ .type vx_tex_ld, @function
@ .global vx_tex_ld
@ vx_tex_ld:
@ slli a1,a1,0x8
@ or a1,a1,a0
@ .word 0x5ae7952b
@ ret

View file

@ -1,13 +0,0 @@
#include <vx_tex.h>
#include <vx_intrinsics.h>
#include <inttypes.h>
#ifdef __cplusplus
extern "C" {
#endif
#define NUM_CORES_MAX 32
int vx_tex(unsigned t, unsigned u, unsigned v, unsigned lod){
return vx_tex_ld(t,u,v,lod);
}

Binary file not shown.

BIN
simX/simX

Binary file not shown.