mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
many fixes
This commit is contained in:
parent
2d48fe13c8
commit
39a8579c27
34 changed files with 5021 additions and 515 deletions
|
@ -8,14 +8,14 @@ CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
#DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
#DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
#DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
#DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
#DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
#DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
#DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
#DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
|
|
|
@ -24,7 +24,7 @@ LDFLAGS +=
|
|||
|
||||
PROJECT = basic
|
||||
|
||||
SRCS = basic.cpp
|
||||
SRCS = main.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ CXXFLAGS += -I../../include
|
|||
|
||||
PROJECT = demo
|
||||
|
||||
SRCS = demo.cpp
|
||||
SRCS = main.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ CXXFLAGS += -I../../include -I../../../hw
|
|||
|
||||
PROJECT = dogfood
|
||||
|
||||
SRCS = dogfood.cpp
|
||||
SRCS = main.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
demo.o: demo.cpp ../../include/vortex.h common.h utils.h
|
||||
main.o: main.cpp ../../include/vortex.h common.h utils.h
|
||||
utils.o: utils.cpp utils.h
|
||||
|
|
|
@ -13,6 +13,8 @@ VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
|||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
VX_LDFLAGS += -lm
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
@ -22,7 +24,7 @@ CXXFLAGS += -I../../include
|
|||
|
||||
PROJECT = demo
|
||||
|
||||
SRCS = demo.cpp utils.cpp
|
||||
SRCS = main.cpp utils.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t num_tasks;
|
||||
uint32_t src_width;
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -37,16 +37,15 @@ void kernel_body(int task_id, void* arg) {
|
|||
}
|
||||
|
||||
int main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)0x0;
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
|
||||
// configure texture unit
|
||||
vx_csr_write(CSR_TEX_ADDR(0), arg->src_ptr);
|
||||
vx_csr_write(CSR_TEX_FORMAT(0), 0);
|
||||
vx_csr_write(CSR_TEX_MIPOFF(0), 0);
|
||||
vx_csr_write(CSR_TEX_WIDTH(0), ilog2(arg->src_width));
|
||||
vx_csr_write(CSR_TEX_HEIGHT(0), ilog2(arg->src_height));
|
||||
vx_csr_write(CSR_TEX_STRIDE(0), ilog2(arg->src_stride));
|
||||
vx_csr_write(CSR_TEX_WRAP_U(0), 0);
|
||||
vx_csr_write(CSR_TEX_WRAP_V(0), 0);
|
||||
vx_csr_write(CSR_TEX_FORMAT(0), 0);
|
||||
vx_csr_write(CSR_TEX_WRAP(0), 0);
|
||||
vx_csr_write(CSR_TEX_FILTER(0), 0);
|
||||
|
||||
struct tile_arg_t targ;
|
||||
|
@ -56,5 +55,5 @@ int main() {
|
|||
targ.deltaX = 1.0f / arg->dst_width;
|
||||
targ.deltaY = 1.0f / arg->dst_height;
|
||||
|
||||
vx_spawn_tasks(arg->num_tasks, kernel_body, targ);
|
||||
vx_spawn_tasks(arg->num_tasks, kernel_body, &targ);
|
||||
}
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -134,14 +134,10 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
size_t arg_addr, src_addr, dst_addr;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, sizeof(kernel_arg_t), &arg_addr));
|
||||
size_t src_addr, dst_addr;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
|
||||
|
||||
assert(arg_addr == ALLOC_BASE_ADDR);
|
||||
|
||||
std::cout << "arg_addr=" << std::hex << arg_addr << std::endl;
|
||||
std::cout << "src_addr=" << std::hex << src_addr << std::endl;
|
||||
std::cout << "dst_addr=" << std::hex << dst_addr << std::endl;
|
||||
|
||||
|
@ -154,20 +150,22 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
kernel_arg.num_tasks = std::min<uint32_t>(num_tasks, dst_height);
|
||||
|
||||
kernel_arg.src_width = src_width;
|
||||
kernel_arg.src_height = src_height;
|
||||
kernel_arg.src_stride = src_bpp;
|
||||
kernel_arg.src_pitch = src_bpp * src_width * src_height;
|
||||
kernel_arg.src_ptr = src_addr;
|
||||
|
||||
kernel_arg.dst_width = dst_width;
|
||||
kernel_arg.dst_height = dst_height;
|
||||
kernel_arg.dst_stride = dst_bpp;
|
||||
kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height;
|
||||
kernel_arg.src_ptr = src_addr;
|
||||
kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height;
|
||||
kernel_arg.dst_ptr = dst_addr;
|
||||
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, arg_addr, sizeof(kernel_arg_t), 0));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
// upload source buffer0
|
|
@ -241,17 +241,16 @@
|
|||
|
||||
`define NUM_TEX_UNITS 2
|
||||
|
||||
`define CSR_TEX_STATES 8
|
||||
`define CSR_TEX_STATES 7
|
||||
`define CSR_TEX_BEGIN(x) (12'hFD0 + (x) * `CSR_TEX_STATES)
|
||||
|
||||
`define CSR_TEX_ADDR(x) (`CSR_TEX_BEGIN(x) + 12'h00)
|
||||
`define CSR_TEX_FORMAT(x) (`CSR_TEX_BEGIN(x) + 12'h01)
|
||||
`define CSR_TEX_WIDTH(x) (`CSR_TEX_BEGIN(x) + 12'h02)
|
||||
`define CSR_TEX_HEIGHT(x) (`CSR_TEX_BEGIN(x) + 12'h03)
|
||||
`define CSR_TEX_STRIDE(x) (`CSR_TEX_BEGIN(x) + 12'h04)
|
||||
`define CSR_TEX_WRAP_U(x) (`CSR_TEX_BEGIN(x) + 12'h05)
|
||||
`define CSR_TEX_WRAP_V(x) (`CSR_TEX_BEGIN(x) + 12'h06)
|
||||
`define CSR_TEX_FILTER(x) (`CSR_TEX_BEGIN(x) + 12'h07)
|
||||
`define CSR_TEX_WRAP(x) (`CSR_TEX_BEGIN(x) + 12'h02)
|
||||
`define CSR_TEX_FILTER(x) (`CSR_TEX_BEGIN(x) + 12'h03)
|
||||
`define CSR_TEX_MIPOFF(x) (`CSR_TEX_BEGIN(x) + 12'h04)
|
||||
`define CSR_TEX_WIDTH(x) (`CSR_TEX_BEGIN(x) + 12'h05)
|
||||
`define CSR_TEX_HEIGHT(x) (`CSR_TEX_BEGIN(x) + 12'h06)
|
||||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -203,8 +203,9 @@ module VX_csr_data #(
|
|||
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
|
||||
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
|
||||
|
||||
default: begin
|
||||
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||
default: begin
|
||||
assert (~read_enable || read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES))
|
||||
else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -31,6 +31,7 @@ module VX_decode #(
|
|||
wire [6:0] opcode = instr[6:0];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [6:0] func7 = instr[31:25];
|
||||
wire [1:0] func2 = instr[26:25];
|
||||
wire [11:0] u_12 = instr[31:20];
|
||||
|
||||
wire [4:0] rd = instr[11:7];
|
||||
|
@ -361,7 +362,7 @@ module VX_decode #(
|
|||
`ifdef EXT_TEX_ENABLE
|
||||
3'h5: begin
|
||||
op_type = `OP_BITS'(`GPU_TEX);
|
||||
op_mod = `MOD_BITS'(instr[26:25]);
|
||||
op_mod = `MOD_BITS'(func2);
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
|
|
|
@ -19,18 +19,19 @@ module VX_tex_addr #(
|
|||
input wire [31:0] req_PC,
|
||||
input wire [REQ_INFO_WIDTH-1:0] req_info,
|
||||
|
||||
input wire [`TEX_FORMAT_BITS-1:0] format,
|
||||
input wire [`TEX_FILTER_BITS-1:0] filter,
|
||||
input wire [`TEX_WRAP_BITS-1:0] wrap_u,
|
||||
input wire [`TEX_WRAP_BITS-1:0] wrap_v,
|
||||
|
||||
input wire [`TEX_ADDR_BITS-1:0] base_addr,
|
||||
input wire [`TEX_STRIDE_BITS-1:0] log_stride,
|
||||
input wire [`TEX_WIDTH_BITS-1:0] log_width,
|
||||
input wire [`TEX_HEIGHT_BITS-1:0] log_height,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] mip_offsets,
|
||||
input wire [`NUM_THREADS-1:0][`TEX_WIDTH_BITS-1:0] log_widths,
|
||||
input wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] log_heights,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] coord_u,
|
||||
input wire [`NUM_THREADS-1:0][31:0] coord_v,
|
||||
input wire [`NUM_THREADS-1:0][31:0] lod,
|
||||
|
||||
// outputs
|
||||
|
||||
|
@ -48,10 +49,19 @@ module VX_tex_addr #(
|
|||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (lod)
|
||||
|
||||
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u;
|
||||
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] v;
|
||||
wire [`TEX_STRIDE_BITS-1:0] log_stride;
|
||||
|
||||
// stride
|
||||
|
||||
VX_tex_stride #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_stride (
|
||||
.format (format),
|
||||
.log_stride (log_stride)
|
||||
);
|
||||
|
||||
// addressing mode
|
||||
|
||||
|
@ -60,10 +70,10 @@ module VX_tex_addr #(
|
|||
wire [31:0] fu[1:0];
|
||||
wire [31:0] fv[1:0];
|
||||
|
||||
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_width) : 0);
|
||||
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_height) : 0);
|
||||
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_width) : 0);
|
||||
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_height) : 0);
|
||||
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
|
||||
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
|
||||
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
|
||||
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
|
@ -107,15 +117,15 @@ module VX_tex_addr #(
|
|||
wire [`FIXED_FRAC-1:0] x [1:0];
|
||||
wire [`FIXED_FRAC-1:0] y [1:0];
|
||||
|
||||
assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log_width);
|
||||
assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log_width);
|
||||
assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log_height);
|
||||
assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log_height);
|
||||
assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log_widths[i]);
|
||||
assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log_widths[i]);
|
||||
assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log_heights[i]);
|
||||
assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log_heights[i]);
|
||||
|
||||
assign addr[i][0] = base_addr + (32'(x[0]) + (32'(y[0]) << log_width)) << log_stride;
|
||||
assign addr[i][1] = base_addr + (32'(x[1]) + (32'(y[0]) << log_width)) << log_stride;
|
||||
assign addr[i][2] = base_addr + (32'(x[0]) + (32'(y[1]) << log_width)) << log_stride;
|
||||
assign addr[i][3] = base_addr + (32'(x[1]) + (32'(y[1]) << log_width)) << log_stride;
|
||||
assign addr[i][0] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[0]) << log_widths[i])) << log_stride;
|
||||
assign addr[i][1] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[0]) << log_widths[i])) << log_stride;
|
||||
assign addr[i][2] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[1]) << log_widths[i])) << log_stride;
|
||||
assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride;
|
||||
end
|
||||
|
||||
wire stall_out = mem_req_valid && ~mem_req_ready;
|
||||
|
|
|
@ -15,25 +15,33 @@
|
|||
|
||||
`define LERP_64(x1,x2,frac) ((x2 + (((x1 - x2) * frac) >> `BLEND_FRAC_64)) & 64'h00ff00ff00ff00ff)
|
||||
|
||||
`define TEX_ADDR_BITS 32
|
||||
`define TEX_FORMAT_BITS 3
|
||||
`define TEX_WRAP_BITS 2
|
||||
`define TEX_WIDTH_BITS 4
|
||||
`define TEX_HEIGHT_BITS 4
|
||||
`define TEX_STRIDE_BITS 2
|
||||
`define TEX_FILTER_BITS 1
|
||||
`define TEX_ADDR_BITS 32
|
||||
`define TEX_FORMAT_BITS 3
|
||||
`define TEX_WRAP_BITS 2
|
||||
`define TEX_WIDTH_BITS 4
|
||||
`define TEX_HEIGHT_BITS 4
|
||||
`define TEX_FILTER_BITS 1
|
||||
|
||||
`define TEX_WRAP_REPEAT 0
|
||||
`define TEX_WRAP_CLAMP 1
|
||||
`define TEX_WRAP_MIRROR 2
|
||||
`define TEX_MIPOFF_BITS (2*12+1)
|
||||
`define TEX_STRIDE_BITS 2
|
||||
|
||||
`define MAX_COLOR_WIDTH 8
|
||||
`define NUM_COLOR_CHANNEL 4
|
||||
`define TEX_LOD_BITS 4
|
||||
`define TEX_MIP_BITS (`NTEX_BITS + `TEX_LOD_BITS)
|
||||
|
||||
`define TEX_COLOR_BITS 8
|
||||
`define TEX_WRAP_REPEAT 0
|
||||
`define TEX_WRAP_CLAMP 1
|
||||
`define TEX_WRAP_MIRROR 2
|
||||
|
||||
`define TEX_FORMAT_R5G6B5 `TEX_FORMAT_BITS'(1)
|
||||
`define TEX_FORMAT_R8G8B8 `TEX_FORMAT_BITS'(2)
|
||||
`define TEX_FORMAT_R8G8B8A8 `TEX_FORMAT_BITS'(3)
|
||||
`define MAX_COLOR_WIDTH 8
|
||||
`define NUM_COLOR_CHANNEL 4
|
||||
|
||||
`define TEX_COLOR_BITS 8
|
||||
|
||||
`define TEX_FORMAT_R8G8B8A8 `TEX_FORMAT_BITS'(0)
|
||||
`define TEX_FORMAT_R5G6B5 `TEX_FORMAT_BITS'(1)
|
||||
`define TEX_FORMAT_R4G4B4A4 `TEX_FORMAT_BITS'(2)
|
||||
`define TEX_FORMAT_L8A8 `TEX_FORMAT_BITS'(3)
|
||||
`define TEX_FORMAT_L8 `TEX_FORMAT_BITS'(4)
|
||||
`define TEX_FORMAT_A8 `TEX_FORMAT_BITS'(5)
|
||||
|
||||
`endif
|
|
@ -1,7 +1,7 @@
|
|||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_format #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_TEXELS = 4 //BILINEAR
|
||||
) (
|
||||
input wire [NUM_TEXELS-1:0][31:0] texel_data,
|
||||
|
@ -13,32 +13,32 @@ module VX_tex_format #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_COLOR_CHANNEL-1:0] color_enable_r;
|
||||
reg [NUM_TEXELS-1:0][63:0] formatted_texel_r;
|
||||
reg [NUM_TEXELS-1:0][63:0] formatted_texel_r;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i<NUM_TEXELS ;i++ ) begin
|
||||
case (format)
|
||||
`TEX_FORMAT_R5G6B5: begin
|
||||
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][15:11]);
|
||||
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][10:5]);
|
||||
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][4:0]);
|
||||
formatted_texel_r[i][7:0] = {`TEX_COLOR_BITS{1'b0}};
|
||||
formatted_texel_r[i][07:00] = `TEX_COLOR_BITS'(texel_data[i][4:0]);
|
||||
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][10:5]);
|
||||
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][15:11]);
|
||||
formatted_texel_r[i][55:48] = {`TEX_COLOR_BITS{1'b0}};
|
||||
if (i == 0)
|
||||
color_enable_r = 4'b1110;
|
||||
color_enable_r = 4'b0111;
|
||||
end
|
||||
`TEX_FORMAT_R8G8B8: begin
|
||||
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][23:16]);
|
||||
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][15:8]);
|
||||
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][7:0]);
|
||||
formatted_texel_r[i][7:0] = {`TEX_COLOR_BITS{1'b0}};
|
||||
`TEX_FORMAT_R4G4B4A4: begin
|
||||
formatted_texel_r[i][07:00] = `TEX_COLOR_BITS'(texel_data[i][3:0]);
|
||||
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][7:4]);
|
||||
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][11:8]);
|
||||
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][15:12]);
|
||||
if (i == 0)
|
||||
color_enable_r = 4'b1110;
|
||||
color_enable_r = 4'b0111;
|
||||
end
|
||||
default: begin // `TEX_FORMAT_R8G8B8A8:
|
||||
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][31:24]);
|
||||
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][23:16]);
|
||||
formatted_texel_r[i][07:00] = `TEX_COLOR_BITS'(texel_data[i][7:0]);
|
||||
formatted_texel_r[i][23:16] = `TEX_COLOR_BITS'(texel_data[i][15:8]);
|
||||
formatted_texel_r[i][7:0] = `TEX_COLOR_BITS'(texel_data[i][7:0]);
|
||||
formatted_texel_r[i][39:32] = `TEX_COLOR_BITS'(texel_data[i][23:16]);
|
||||
formatted_texel_r[i][55:48] = `TEX_COLOR_BITS'(texel_data[i][31:24]);
|
||||
if (i == 0)
|
||||
color_enable_r = 4'b1111;
|
||||
end
|
||||
|
@ -46,9 +46,9 @@ module VX_tex_format #(
|
|||
end
|
||||
end
|
||||
|
||||
assign color_enable = color_enable_r;
|
||||
assign color_enable = color_enable_r;
|
||||
|
||||
for (genvar i = 0;i<NUM_TEXELS ;i++ ) begin
|
||||
for (genvar i = 0; i < NUM_TEXELS; i++) begin
|
||||
assign formatted_texel[i] = formatted_texel_r[i] & 64'h00ff00ff00ff00ff;
|
||||
end
|
||||
|
||||
|
|
|
@ -33,8 +33,8 @@ module VX_tex_sampler #(
|
|||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data ;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data_bilerp ;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data_bilerp;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
|
|
27
hw/rtl/tex_unit/VX_tex_stride.v
Normal file
27
hw/rtl/tex_unit/VX_tex_stride.v
Normal file
|
@ -0,0 +1,27 @@
|
|||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_stride #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire [`TEX_FORMAT_BITS-1:0] format,
|
||||
output wire [`TEX_STRIDE_BITS-1:0] log_stride
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`TEX_STRIDE_BITS-1:0] log_stride_r;
|
||||
|
||||
always @(*) begin
|
||||
case (format)
|
||||
`TEX_FORMAT_A8: log_stride_r = 0;
|
||||
`TEX_FORMAT_L8: log_stride_r = 0;
|
||||
`TEX_FORMAT_L8A8: log_stride_r = 1;
|
||||
`TEX_FORMAT_R5G6B5: log_stride_r = 1;
|
||||
`TEX_FORMAT_R4G4B4A4: log_stride_r = 1;
|
||||
// `TEX_FORMAT_R8G8B8A8
|
||||
default: log_stride_r = 2;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign log_stride = log_stride_r;
|
||||
|
||||
endmodule
|
|
@ -24,25 +24,24 @@ module VX_tex_unit #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`TEX_ADDR_BITS-1:0] tex_addr [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`TEX_STRIDE_BITS-1:0] tex_stride [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`TEX_WRAP_BITS-1:0] tex_wrap_u [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`TEX_WRAP_BITS-1:0] tex_wrap_v [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [(1 << `TEX_MIP_BITS)-1:0];
|
||||
reg [`TEX_WIDTH_BITS-1:0] tex_width [(1 << `TEX_MIP_BITS)-1:0];
|
||||
reg [`TEX_HEIGHT_BITS-1:0] tex_height [(1 << `TEX_MIP_BITS)-1:0];
|
||||
|
||||
// CSRs programming
|
||||
reg [`TEX_ADDR_BITS-1:0] tex_addr [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_WRAP_BITS-1:0] tex_wrap_u [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_WRAP_BITS-1:0] tex_wrap_v [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
|
||||
|
||||
// CSRs programming
|
||||
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
wire [`TEX_MIP_BITS-1:0] mip_waddr = tex_csr_if.write_data[24 +: `TEX_MIP_BITS];
|
||||
always @(posedge clk ) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
tex_addr[i] <= 0;
|
||||
tex_format[i] <= 0;
|
||||
tex_width[i] <= 0;
|
||||
tex_height[i] <= 0;
|
||||
tex_stride[i] <= 0;
|
||||
tex_wrap_u[i] <= 0;
|
||||
tex_wrap_v[i] <= 0;
|
||||
tex_filter[i] <= 0;
|
||||
|
@ -51,12 +50,20 @@ module VX_tex_unit #(
|
|||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX_ADDR(i) : tex_addr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
`CSR_TEX_WIDTH(i) : tex_width[i] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
|
||||
`CSR_TEX_HEIGHT(i) : tex_height[i] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
|
||||
`CSR_TEX_STRIDE(i) : tex_stride[i] <= tex_csr_if.write_data[`TEX_STRIDE_BITS-1:0];
|
||||
`CSR_TEX_WRAP_U(i) : tex_wrap_u[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
|
||||
`CSR_TEX_WRAP_V(i) : tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
|
||||
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
`CSR_TEX_WRAP(i) : begin
|
||||
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
|
||||
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
|
||||
end
|
||||
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
`CSR_TEX_MIPOFF(i) : begin
|
||||
tex_mipoff[mip_waddr] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_WIDTH(i) : begin
|
||||
tex_width[mip_waddr] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_HEIGHT(i) : begin
|
||||
tex_height[mip_waddr] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
|
||||
end
|
||||
default:
|
||||
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
|
||||
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
|
||||
|
@ -66,6 +73,19 @@ module VX_tex_unit #(
|
|||
end
|
||||
end
|
||||
|
||||
// mipmap attributes
|
||||
|
||||
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs;
|
||||
wire [`NUM_THREADS-1:0][`TEX_WIDTH_BITS-1:0] tex_widths;
|
||||
wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] tex_heights;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`TEX_MIP_BITS-1:0] mip_raddr = {tex_req_if.unit[`NTEX_BITS-1:0], tex_req_if.lod[i][`TEX_LOD_BITS-1:0]};
|
||||
assign tex_mipoffs[i] = tex_mipoff[mip_raddr];
|
||||
assign tex_widths[i] = tex_width[mip_raddr];
|
||||
assign tex_heights[i] = tex_height[mip_raddr];
|
||||
end
|
||||
|
||||
// address generation
|
||||
|
||||
wire mem_req_valid;
|
||||
|
@ -87,7 +107,7 @@ module VX_tex_unit #(
|
|||
wire [`TEX_FILTER_BITS-1:0] mem_rsp_filter;
|
||||
wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data;
|
||||
wire [REQ_INFO_WIDTH_M-1:0] mem_rsp_info;
|
||||
wire mem_rsp_ready;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
VX_tex_addr #(
|
||||
.REQ_INFO_WIDTH (REQ_INFO_WIDTH_A)
|
||||
|
@ -103,18 +123,18 @@ module VX_tex_unit #(
|
|||
.req_PC (tex_req_if.PC),
|
||||
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb}),
|
||||
|
||||
.format (tex_format[tex_req_if.unit]),
|
||||
.filter (tex_filter[tex_req_if.unit]),
|
||||
.wrap_u (tex_wrap_u[tex_req_if.unit]),
|
||||
.wrap_v (tex_wrap_v[tex_req_if.unit]),
|
||||
|
||||
.base_addr (tex_addr[tex_req_if.unit]),
|
||||
.log_stride (tex_stride[tex_req_if.unit]),
|
||||
.log_width (tex_width[tex_req_if.unit]),
|
||||
.log_height (tex_height[tex_req_if.unit]),
|
||||
.base_addr (tex_addr[tex_req_if.unit]),
|
||||
.mip_offsets (tex_mipoffs),
|
||||
.log_widths (tex_widths),
|
||||
.log_heights (tex_heights),
|
||||
|
||||
.coord_u (tex_req_if.u),
|
||||
.coord_v (tex_req_if.v),
|
||||
.lod (tex_req_if.lod),
|
||||
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_wid (mem_req_wid),
|
||||
|
@ -211,10 +231,9 @@ module VX_tex_unit #(
|
|||
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
|
||||
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_addr, csr_data=%0h", $time, CORE_ID, i, tex_addr[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_format, csr_data=%0h", $time, CORE_ID, i, tex_format[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_width, csr_data=%0h", $time, CORE_ID, i, tex_width[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_height, csr_data=%0h", $time, CORE_ID, i, tex_height[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_stride, csr_data=%0h", $time, CORE_ID, i, tex_stride[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_format, csr_data=%0h", $time, CORE_ID, i, tex_format[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_u, csr_data=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_v, csr_data=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_filter, csr_data=%0h", $time, CORE_ID, i, tex_filter[i]);
|
||||
|
|
|
@ -10,7 +10,7 @@ CFLAGS += -I./include -I../hw
|
|||
|
||||
PROJECT = libvortexrt
|
||||
|
||||
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_tex.c
|
||||
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c
|
||||
|
||||
OBJS := $(addsuffix .o, $(notdir $(SRCS)))
|
||||
|
||||
|
|
|
@ -74,16 +74,9 @@ inline void vx_join() {
|
|||
|
||||
// Warp Barrier
|
||||
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
|
||||
asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps));
|
||||
asm volatile (".insn s 0x6b, 4, %1, 0(%0)" :: "r"(barried_id), "r"(num_warps));
|
||||
}
|
||||
|
||||
// Texture load
|
||||
#define vx_tex_ld(unit, u, v, lod) ({ \
|
||||
register unsigned result; \
|
||||
asm volatile (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" :: "r"(result), "r"(u), "r"(v), "r"(lod)); \
|
||||
result; \
|
||||
})
|
||||
|
||||
// Return active warp's thread id
|
||||
inline int vx_thread_id() {
|
||||
int result;
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
#ifndef VX_API_H
|
||||
#define VX_API_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#ifndef VX_TEX_H
|
||||
#define VX_TEX_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int vx_tex(unsigned t, unsigned u, unsigned v, unsigned lod);
|
||||
unsigned vx_tex(unsigned unit, unsigned u, unsigned v, unsigned lod) {
|
||||
unsigned result;
|
||||
unsigned lod_unit = (unit << 24) | lod;
|
||||
asm volatile (".insn r4 0x6b, 5, 0, %0, %1, %2, %3" : "=r"(result) : "r"(u), "r"(v), "r"(lod_unit));
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -1028,46 +1028,3 @@ Disassembly of section .riscv.attributes:
|
|||
1c: 326d jal fffff9c6 <.L50+0xfffff79e>
|
||||
1e: 3070 fld fa2,224(s0)
|
||||
20: 665f 7032 0030 0x307032665f
|
||||
|
||||
vx_tex.c.o: file format elf32-littleriscv
|
||||
|
||||
|
||||
Disassembly of section .text.vx_tex:
|
||||
|
||||
00000000 <vx_tex>:
|
||||
0: 00869693 slli a3,a3,0x8
|
||||
4: 00a6e6b3 or a3,a3,a0
|
||||
8: 00000513 li a0,0
|
||||
c: 6ac5d56b 0x6ac5d56b
|
||||
10: 00008067 ret
|
||||
|
||||
Disassembly of section .comment:
|
||||
|
||||
00000000 <.comment>:
|
||||
0: 4700 lw s0,8(a4)
|
||||
2: 203a4343 fmadd.s ft6,fs4,ft3,ft4,rmm
|
||||
6: 4728 lw a0,72(a4)
|
||||
8: 554e lw a0,240(sp)
|
||||
a: 2029 jal 14 <vx_tex+0x14>
|
||||
c: 2e39 jal 32a <vx_tex+0x32a>
|
||||
e: 2e32 fld ft8,264(sp)
|
||||
10: 0030 addi a2,sp,8
|
||||
|
||||
Disassembly of section .riscv.attributes:
|
||||
|
||||
00000000 <.riscv.attributes>:
|
||||
0: 2541 jal 680 <vx_tex+0x680>
|
||||
2: 0000 unimp
|
||||
4: 7200 flw fs0,32(a2)
|
||||
6: 7369 lui t1,0xffffa
|
||||
8: 01007663 bgeu zero,a6,14 <vx_tex+0x14>
|
||||
c: 0000001b 0x1b
|
||||
10: 1004 addi s1,sp,32
|
||||
12: 7205 lui tp,0xfffe1
|
||||
14: 3376 fld ft6,376(sp)
|
||||
16: 6932 flw fs2,12(sp)
|
||||
18: 7032 flw ft0,44(sp)
|
||||
1a: 5f30 lw a2,120(a4)
|
||||
1c: 326d jal fffff9c6 <vx_tex+0xfffff9c6>
|
||||
1e: 3070 fld fa2,224(s0)
|
||||
20: 665f 7032 0030 0x307032665f
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
#include <VX_config.h>
|
||||
|
||||
@ .type vx_tex_ld, @function
|
||||
@ .global vx_tex_ld
|
||||
@ vx_tex_ld:
|
||||
@ slli a1,a1,0x8
|
||||
@ or a1,a1,a0
|
||||
@ .word 0x5ae7952b
|
||||
@ ret
|
|
@ -1,13 +0,0 @@
|
|||
#include <vx_tex.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NUM_CORES_MAX 32
|
||||
|
||||
int vx_tex(unsigned t, unsigned u, unsigned v, unsigned lod){
|
||||
return vx_tex_ld(t,u,v,lod);
|
||||
}
|
Binary file not shown.
BIN
simX/simX
BIN
simX/simX
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue