minor updates

This commit is contained in:
Blaise Tine 2021-03-28 18:08:04 -04:00
parent 6514a3b782
commit f968dbccd3
9 changed files with 80 additions and 86 deletions

Binary file not shown.

View file

@ -27,7 +27,7 @@ void kernel_body(int task_id, void* arg) {
for (uint32_t x = 0; x < _arg->tile_width; ++x) {
int32_t u = (int32_t)(fu * (1<<20));
int32_t v = (int32_t)(fv * (1<<20));
dst_ptr[x] = vx_tex(0, u, v, 0);
dst_ptr[x] = vx_tex(0, u, v, 0x0);
fu += _arg->deltaX;
}
dst_ptr += _arg->karg.dst_pitch;

View file

@ -36,23 +36,23 @@ Disassembly of section .text:
80000060: 04912223 sw s1,68(sp)
80000064: 01442783 lw a5,20(s0) # 7ffff014 <__stack_size+0x7fffec14>
80000068: fd079073 csrw 0xfd0,a5
8000006c: fd105073 csrwi 0xfd1,0
8000006c: fd405073 csrwi 0xfd4,0
80000070: 00442503 lw a0,4(s0)
80000074: 01f00493 li s1,31
80000078: 00151513 slli a0,a0,0x1
8000007c: fff50513 addi a0,a0,-1
80000080: 400000ef jal ra,80000480 <__clzsi2>
80000084: 40a48533 sub a0,s1,a0
80000088: fd251073 csrw 0xfd2,a0
80000088: fd551073 csrw 0xfd5,a0
8000008c: 00842503 lw a0,8(s0)
80000090: 00151513 slli a0,a0,0x1
80000094: fff50513 addi a0,a0,-1
80000098: 3e8000ef jal ra,80000480 <__clzsi2>
8000009c: 40a484b3 sub s1,s1,a0
800000a0: fd349073 csrw 0xfd3,s1
800000a4: fd405073 csrwi 0xfd4,0
800000a8: fd505073 csrwi 0xfd5,0
800000ac: fd605073 csrwi 0xfd6,0
800000a0: fd649073 csrw 0xfd6,s1
800000a4: fd105073 csrwi 0xfd1,0
800000a8: fd205073 csrwi 0xfd2,0
800000ac: fd305073 csrwi 0xfd3,0
800000b0: 01442503 lw a0,20(s0)
800000b4: 01842583 lw a1,24(s0)
800000b8: 01c42603 lw a2,28(s0)

Binary file not shown.

View file

@ -204,7 +204,7 @@ module VX_csr_data #(
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
default: begin
assert (~read_enable || read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES))
assert (~read_enable || (read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES)))
else $error("%t: invalid CSR read address: %0h", $time, read_addr);
end
endcase

View file

@ -24,7 +24,7 @@ module VX_decode #(
reg [`MOD_BITS-1:0] op_mod;
reg [31:0] imm;
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
reg rd_fp, rs1_fp, rs2_fp;
reg rd_fp, rs1_fp, rs2_fp, rs3_fp;
reg is_join, is_wstall;
wire [31:0] instr = ifetch_rsp_if.instr;
@ -59,6 +59,7 @@ module VX_decode #(
rd_fp = 0;
rs1_fp = 0;
rs2_fp = 0;
rs3_fp = 1;
is_join = 0;
is_wstall = 0;
@ -367,6 +368,7 @@ module VX_decode #(
use_rs1 = 1;
use_rs2 = 1;
use_rs3 = 1;
rs3_fp = 0;
end
`endif
default:;
@ -395,7 +397,7 @@ module VX_decode #(
assign decode_if.rd = {rd_fp, rd};
assign decode_if.rs1 = {rs1_fp, rs1_qual};
assign decode_if.rs2 = {rs2_fp, rs2};
assign decode_if.rs3 = {1'b1, rs3};
assign decode_if.rs3 = {rs3_fp, rs3};
`else
`UNUSED_VAR (rd_fp)
`UNUSED_VAR (rs1_fp)

View file

@ -50,8 +50,8 @@ module VX_tex_addr #(
`UNUSED_PARAM (CORE_ID)
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u;
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] v;
wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_u;
wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_v;
wire [`TEX_STRIDE_BITS-1:0] log_stride;
// stride
@ -70,9 +70,10 @@ module VX_tex_addr #(
wire [31:0] fu[1:0];
wire [31:0] fv[1:0];
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
VX_tex_wrap #(
@ -80,15 +81,7 @@ module VX_tex_addr #(
) tex_wrap_u0 (
.wrap_i (wrap_u),
.coord_i (fu[0]),
.coord_o (u[0][i])
);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_v0 (
.wrap_i (wrap_v),
.coord_i (fv[0]),
.coord_o (v[0][i])
.coord_o (clamped_u[i][0])
);
VX_tex_wrap #(
@ -96,7 +89,15 @@ module VX_tex_addr #(
) tex_wrap_u1 (
.wrap_i (wrap_u),
.coord_i (fu[1]),
.coord_o (u[1][i])
.coord_o (clamped_u[i][1])
);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_v0 (
.wrap_i (wrap_v),
.coord_i (fv[0]),
.coord_o (clamped_v[i][0])
);
VX_tex_wrap #(
@ -104,7 +105,7 @@ module VX_tex_addr #(
) tex_wrap_v1 (
.wrap_i (wrap_v),
.coord_i (fv[1]),
.coord_o (v[1][i])
.coord_o (clamped_v[i][1])
);
end
@ -117,10 +118,11 @@ module VX_tex_addr #(
wire [`FIXED_FRAC-1:0] x [1:0];
wire [`FIXED_FRAC-1:0] y [1:0];
assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log_widths[i]);
assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log_widths[i]);
assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log_heights[i]);
assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log_heights[i]);
assign x[0] = clamped_u[i][0] >> ((`FIXED_FRAC) - log_widths[i]);
assign x[1] = clamped_u[i][1] >> ((`FIXED_FRAC) - log_widths[i]);
assign y[0] = clamped_v[i][0] >> ((`FIXED_FRAC) - log_heights[i]);
assign y[1] = clamped_v[i][1] >> ((`FIXED_FRAC) - log_heights[i]);
assign addr[i][0] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[0]) << log_widths[i])) << log_stride;
assign addr[i][1] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[0]) << log_widths[i])) << log_stride;
@ -128,6 +130,12 @@ module VX_tex_addr #(
assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride;
end
wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u0, v0;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign u0[i] = clamped_u[i][0];
assign v0[i] = clamped_v[i][0];
end
wire stall_out = mem_req_valid && ~mem_req_ready;
VX_pipe_register #(
@ -137,7 +145,7 @@ module VX_tex_addr #(
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, u[0], v[0], req_info}),
.data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, u0, v0, req_info}),
.data_out ({mem_req_valid, mem_req_wid, mem_req_tmask, mem_req_PC, mem_req_filter, mem_req_stride, mem_req_addr, mem_req_u, mem_req_v, mem_req_info})
);

View file

@ -24,9 +24,9 @@ module VX_tex_unit #(
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [(1 << `TEX_MIP_BITS)-1:0];
reg [`TEX_WIDTH_BITS-1:0] tex_width [(1 << `TEX_MIP_BITS)-1:0];
reg [`TEX_HEIGHT_BITS-1:0] tex_height [(1 << `TEX_MIP_BITS)-1:0];
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
@ -36,50 +36,29 @@ module VX_tex_unit #(
// CSRs programming
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
always @(posedge clk) begin
if (reset) begin
tex_baddr[i] <= 0;
tex_format[i] <= 0;
tex_wrap_u[i] <= 0;
tex_wrap_v[i] <= 0;
tex_filter[i] <= 0;
end begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
`CSR_TEX_WRAP(i) : begin
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
end
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
`CSR_TEX_MIPOFF(i),
`CSR_TEX_WIDTH(i),
`CSR_TEX_HEIGHT(i):;
default:
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
endcase
end
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS];
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
`CSR_TEX_WRAP(i) : begin
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
end
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
`CSR_TEX_MIPOFF(i) : tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
`CSR_TEX_WIDTH(i) : tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
`CSR_TEX_HEIGHT(i) : tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
default:
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
endcase
end
end
end
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
wire [`TEX_MIP_BITS-1:0] mip_waddr = tex_csr_if.write_data[24 +: `TEX_MIP_BITS];
always @(posedge clk) begin
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_MIPOFF(i))
tex_mipoff[mip_waddr] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_WIDTH(i))
tex_width[mip_waddr] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_HEIGHT(i))
tex_height[mip_waddr] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
end
end
// mipmap attributes
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs;
@ -87,10 +66,11 @@ module VX_tex_unit #(
wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] tex_heights;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`TEX_MIP_BITS-1:0] mip_raddr = {tex_req_if.unit[`NTEX_BITS-1:0], tex_req_if.lod[i][`TEX_LOD_BITS-1:0]};
assign tex_mipoffs[i] = tex_mipoff[mip_raddr];
assign tex_widths[i] = tex_width[mip_raddr];
assign tex_heights[i] = tex_height[mip_raddr];
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS];
assign tex_mipoffs[i] = tex_mipoff[unit][mip_level];
assign tex_widths[i] = tex_width[unit][mip_level];
assign tex_heights[i] = tex_height[unit][mip_level];
end
// address generation
@ -237,13 +217,14 @@ module VX_tex_unit #(
if (tex_csr_if.write_enable
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
$display("%t: core%0d-tex_csr: csr_tex%d_addr, csr_data=%0h", $time, CORE_ID, i, tex_baddr[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_width, csr_data=%0h", $time, CORE_ID, i, tex_width[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_height, csr_data=%0h", $time, CORE_ID, i, tex_height[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_format, csr_data=%0h", $time, CORE_ID, i, tex_format[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_u, csr_data=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_v, csr_data=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_filter, csr_data=%0h", $time, CORE_ID, i, tex_filter[i]);
$display("%t: core%0d-tex_unit: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
$display("%t: core%0d-tex_unit: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
$display("%t: core%0d-tex_unit: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
$display("%t: core%0d-tex_unit: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
$display("%t: core%0d-tex_unit: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
$display("%t: core%0d-tex_unit: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
$display("%t: core%0d-tex_unit: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]);
$display("%t: core%0d-tex_unit: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]);
end
end
end

View file

@ -54,8 +54,11 @@ extern "C" {
// Texture load
#define vx_tex(unit, u, v, l) ({ \
register unsigned __r; \
__asm__ __volatile__ (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" : "=r"(__r) : "r"(u), "r"(v), "r"(l)); \
unsigned __r; \
unsigned __u = u; \
unsigned __v = v; \
unsigned __l = l; \
__asm__ __volatile__ (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" : "=r"(__r) : "r"(__u), "r"(__v), "r"(__l)); \
__r; \
})