mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
minor updates
This commit is contained in:
parent
6514a3b782
commit
f968dbccd3
9 changed files with 80 additions and 86 deletions
Binary file not shown.
|
@ -27,7 +27,7 @@ void kernel_body(int task_id, void* arg) {
|
|||
for (uint32_t x = 0; x < _arg->tile_width; ++x) {
|
||||
int32_t u = (int32_t)(fu * (1<<20));
|
||||
int32_t v = (int32_t)(fv * (1<<20));
|
||||
dst_ptr[x] = vx_tex(0, u, v, 0);
|
||||
dst_ptr[x] = vx_tex(0, u, v, 0x0);
|
||||
fu += _arg->deltaX;
|
||||
}
|
||||
dst_ptr += _arg->karg.dst_pitch;
|
||||
|
|
|
@ -36,23 +36,23 @@ Disassembly of section .text:
|
|||
80000060: 04912223 sw s1,68(sp)
|
||||
80000064: 01442783 lw a5,20(s0) # 7ffff014 <__stack_size+0x7fffec14>
|
||||
80000068: fd079073 csrw 0xfd0,a5
|
||||
8000006c: fd105073 csrwi 0xfd1,0
|
||||
8000006c: fd405073 csrwi 0xfd4,0
|
||||
80000070: 00442503 lw a0,4(s0)
|
||||
80000074: 01f00493 li s1,31
|
||||
80000078: 00151513 slli a0,a0,0x1
|
||||
8000007c: fff50513 addi a0,a0,-1
|
||||
80000080: 400000ef jal ra,80000480 <__clzsi2>
|
||||
80000084: 40a48533 sub a0,s1,a0
|
||||
80000088: fd251073 csrw 0xfd2,a0
|
||||
80000088: fd551073 csrw 0xfd5,a0
|
||||
8000008c: 00842503 lw a0,8(s0)
|
||||
80000090: 00151513 slli a0,a0,0x1
|
||||
80000094: fff50513 addi a0,a0,-1
|
||||
80000098: 3e8000ef jal ra,80000480 <__clzsi2>
|
||||
8000009c: 40a484b3 sub s1,s1,a0
|
||||
800000a0: fd349073 csrw 0xfd3,s1
|
||||
800000a4: fd405073 csrwi 0xfd4,0
|
||||
800000a8: fd505073 csrwi 0xfd5,0
|
||||
800000ac: fd605073 csrwi 0xfd6,0
|
||||
800000a0: fd649073 csrw 0xfd6,s1
|
||||
800000a4: fd105073 csrwi 0xfd1,0
|
||||
800000a8: fd205073 csrwi 0xfd2,0
|
||||
800000ac: fd305073 csrwi 0xfd3,0
|
||||
800000b0: 01442503 lw a0,20(s0)
|
||||
800000b4: 01842583 lw a1,24(s0)
|
||||
800000b8: 01c42603 lw a2,28(s0)
|
||||
|
|
Binary file not shown.
|
@ -204,7 +204,7 @@ module VX_csr_data #(
|
|||
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
|
||||
|
||||
default: begin
|
||||
assert (~read_enable || read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES))
|
||||
assert (~read_enable || (read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES)))
|
||||
else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||
end
|
||||
endcase
|
||||
|
|
|
@ -24,7 +24,7 @@ module VX_decode #(
|
|||
reg [`MOD_BITS-1:0] op_mod;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg rd_fp, rs1_fp, rs2_fp;
|
||||
reg rd_fp, rs1_fp, rs2_fp, rs3_fp;
|
||||
reg is_join, is_wstall;
|
||||
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
@ -59,6 +59,7 @@ module VX_decode #(
|
|||
rd_fp = 0;
|
||||
rs1_fp = 0;
|
||||
rs2_fp = 0;
|
||||
rs3_fp = 1;
|
||||
is_join = 0;
|
||||
is_wstall = 0;
|
||||
|
||||
|
@ -367,6 +368,7 @@ module VX_decode #(
|
|||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rs3 = 1;
|
||||
rs3_fp = 0;
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
|
@ -395,7 +397,7 @@ module VX_decode #(
|
|||
assign decode_if.rd = {rd_fp, rd};
|
||||
assign decode_if.rs1 = {rs1_fp, rs1_qual};
|
||||
assign decode_if.rs2 = {rs2_fp, rs2};
|
||||
assign decode_if.rs3 = {1'b1, rs3};
|
||||
assign decode_if.rs3 = {rs3_fp, rs3};
|
||||
`else
|
||||
`UNUSED_VAR (rd_fp)
|
||||
`UNUSED_VAR (rs1_fp)
|
||||
|
|
|
@ -50,8 +50,8 @@ module VX_tex_addr #(
|
|||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u;
|
||||
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] v;
|
||||
wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_u;
|
||||
wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_v;
|
||||
wire [`TEX_STRIDE_BITS-1:0] log_stride;
|
||||
|
||||
// stride
|
||||
|
@ -70,9 +70,10 @@ module VX_tex_addr #(
|
|||
wire [31:0] fu[1:0];
|
||||
wire [31:0] fv[1:0];
|
||||
|
||||
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
|
||||
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
|
||||
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
|
||||
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
|
||||
|
||||
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
|
||||
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
|
||||
|
||||
VX_tex_wrap #(
|
||||
|
@ -80,15 +81,7 @@ module VX_tex_addr #(
|
|||
) tex_wrap_u0 (
|
||||
.wrap_i (wrap_u),
|
||||
.coord_i (fu[0]),
|
||||
.coord_o (u[0][i])
|
||||
);
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_wrap_v0 (
|
||||
.wrap_i (wrap_v),
|
||||
.coord_i (fv[0]),
|
||||
.coord_o (v[0][i])
|
||||
.coord_o (clamped_u[i][0])
|
||||
);
|
||||
|
||||
VX_tex_wrap #(
|
||||
|
@ -96,7 +89,15 @@ module VX_tex_addr #(
|
|||
) tex_wrap_u1 (
|
||||
.wrap_i (wrap_u),
|
||||
.coord_i (fu[1]),
|
||||
.coord_o (u[1][i])
|
||||
.coord_o (clamped_u[i][1])
|
||||
);
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_wrap_v0 (
|
||||
.wrap_i (wrap_v),
|
||||
.coord_i (fv[0]),
|
||||
.coord_o (clamped_v[i][0])
|
||||
);
|
||||
|
||||
VX_tex_wrap #(
|
||||
|
@ -104,7 +105,7 @@ module VX_tex_addr #(
|
|||
) tex_wrap_v1 (
|
||||
.wrap_i (wrap_v),
|
||||
.coord_i (fv[1]),
|
||||
.coord_o (v[1][i])
|
||||
.coord_o (clamped_v[i][1])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -117,10 +118,11 @@ module VX_tex_addr #(
|
|||
wire [`FIXED_FRAC-1:0] x [1:0];
|
||||
wire [`FIXED_FRAC-1:0] y [1:0];
|
||||
|
||||
assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log_widths[i]);
|
||||
assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log_widths[i]);
|
||||
assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log_heights[i]);
|
||||
assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log_heights[i]);
|
||||
assign x[0] = clamped_u[i][0] >> ((`FIXED_FRAC) - log_widths[i]);
|
||||
assign x[1] = clamped_u[i][1] >> ((`FIXED_FRAC) - log_widths[i]);
|
||||
|
||||
assign y[0] = clamped_v[i][0] >> ((`FIXED_FRAC) - log_heights[i]);
|
||||
assign y[1] = clamped_v[i][1] >> ((`FIXED_FRAC) - log_heights[i]);
|
||||
|
||||
assign addr[i][0] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[0]) << log_widths[i])) << log_stride;
|
||||
assign addr[i][1] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[0]) << log_widths[i])) << log_stride;
|
||||
|
@ -128,6 +130,12 @@ module VX_tex_addr #(
|
|||
assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride;
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u0, v0;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign u0[i] = clamped_u[i][0];
|
||||
assign v0[i] = clamped_v[i][0];
|
||||
end
|
||||
|
||||
wire stall_out = mem_req_valid && ~mem_req_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
|
@ -137,7 +145,7 @@ module VX_tex_addr #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, u[0], v[0], req_info}),
|
||||
.data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, u0, v0, req_info}),
|
||||
.data_out ({mem_req_valid, mem_req_wid, mem_req_tmask, mem_req_PC, mem_req_filter, mem_req_stride, mem_req_addr, mem_req_u, mem_req_v, mem_req_info})
|
||||
);
|
||||
|
||||
|
|
|
@ -24,9 +24,9 @@ module VX_tex_unit #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [(1 << `TEX_MIP_BITS)-1:0];
|
||||
reg [`TEX_WIDTH_BITS-1:0] tex_width [(1 << `TEX_MIP_BITS)-1:0];
|
||||
reg [`TEX_HEIGHT_BITS-1:0] tex_height [(1 << `TEX_MIP_BITS)-1:0];
|
||||
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
|
||||
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
|
||||
|
@ -36,50 +36,29 @@ module VX_tex_unit #(
|
|||
|
||||
// CSRs programming
|
||||
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
tex_baddr[i] <= 0;
|
||||
tex_format[i] <= 0;
|
||||
tex_wrap_u[i] <= 0;
|
||||
tex_wrap_v[i] <= 0;
|
||||
tex_filter[i] <= 0;
|
||||
end begin
|
||||
if (tex_csr_if.write_enable) begin
|
||||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
`CSR_TEX_WRAP(i) : begin
|
||||
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
|
||||
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
|
||||
end
|
||||
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
`CSR_TEX_MIPOFF(i),
|
||||
`CSR_TEX_WIDTH(i),
|
||||
`CSR_TEX_HEIGHT(i):;
|
||||
default:
|
||||
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
|
||||
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
|
||||
endcase
|
||||
end
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS];
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable) begin
|
||||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
`CSR_TEX_WRAP(i) : begin
|
||||
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
|
||||
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
|
||||
end
|
||||
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
`CSR_TEX_MIPOFF(i) : tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
`CSR_TEX_WIDTH(i) : tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
|
||||
`CSR_TEX_HEIGHT(i) : tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
|
||||
default:
|
||||
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
|
||||
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
wire [`TEX_MIP_BITS-1:0] mip_waddr = tex_csr_if.write_data[24 +: `TEX_MIP_BITS];
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_MIPOFF(i))
|
||||
tex_mipoff[mip_waddr] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
|
||||
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_WIDTH(i))
|
||||
tex_width[mip_waddr] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
|
||||
|
||||
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_HEIGHT(i))
|
||||
tex_height[mip_waddr] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
// mipmap attributes
|
||||
|
||||
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs;
|
||||
|
@ -87,10 +66,11 @@ module VX_tex_unit #(
|
|||
wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] tex_heights;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`TEX_MIP_BITS-1:0] mip_raddr = {tex_req_if.unit[`NTEX_BITS-1:0], tex_req_if.lod[i][`TEX_LOD_BITS-1:0]};
|
||||
assign tex_mipoffs[i] = tex_mipoff[mip_raddr];
|
||||
assign tex_widths[i] = tex_width[mip_raddr];
|
||||
assign tex_heights[i] = tex_height[mip_raddr];
|
||||
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
|
||||
wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS];
|
||||
assign tex_mipoffs[i] = tex_mipoff[unit][mip_level];
|
||||
assign tex_widths[i] = tex_width[unit][mip_level];
|
||||
assign tex_heights[i] = tex_height[unit][mip_level];
|
||||
end
|
||||
|
||||
// address generation
|
||||
|
@ -237,13 +217,14 @@ module VX_tex_unit #(
|
|||
if (tex_csr_if.write_enable
|
||||
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
|
||||
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_addr, csr_data=%0h", $time, CORE_ID, i, tex_baddr[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_width, csr_data=%0h", $time, CORE_ID, i, tex_width[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_height, csr_data=%0h", $time, CORE_ID, i, tex_height[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_format, csr_data=%0h", $time, CORE_ID, i, tex_format[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_u, csr_data=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_v, csr_data=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex%d_filter, csr_data=%0h", $time, CORE_ID, i, tex_filter[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -54,8 +54,11 @@ extern "C" {
|
|||
|
||||
// Texture load
|
||||
#define vx_tex(unit, u, v, l) ({ \
|
||||
register unsigned __r; \
|
||||
__asm__ __volatile__ (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" : "=r"(__r) : "r"(u), "r"(v), "r"(l)); \
|
||||
unsigned __r; \
|
||||
unsigned __u = u; \
|
||||
unsigned __v = v; \
|
||||
unsigned __l = l; \
|
||||
__asm__ __volatile__ (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" : "=r"(__r) : "r"(__u), "r"(__v), "r"(__l)); \
|
||||
__r; \
|
||||
})
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue