mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
tex_unit address generation complete
This commit is contained in:
parent
1431ef9bc0
commit
7ff5c082bc
4 changed files with 100 additions and 80 deletions
|
@ -43,52 +43,85 @@ module VX_tex_addr_gen #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (lod)
|
||||
|
||||
wire [`FIXED_FRAC-1:0] u[`NUM_THREADS-1:0][1:0];
|
||||
wire [`FIXED_FRAC-1:0] v[`NUM_THREADS-1:0][1:0];
|
||||
|
||||
// addressing mode
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
// addressing mode
|
||||
|
||||
wire [31:0] u, v;
|
||||
wire [31:0] fu[1:0];
|
||||
wire [31:0] fv[1:0];
|
||||
|
||||
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log2_width) : 0);
|
||||
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log2_height) : 0);
|
||||
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log2_width) : 0);
|
||||
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log2_height) : 0);
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_wrap_u (
|
||||
) tex_wrap_u0 (
|
||||
.wrap_i (wrap_u),
|
||||
.coord_i (coord_u[i]),
|
||||
.coord_o (u)
|
||||
.coord_i (fu[0]),
|
||||
.coord_o (u[i][0])
|
||||
);
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_wrap_v (
|
||||
) tex_wrap_v0 (
|
||||
.wrap_i (wrap_v),
|
||||
.coord_i (coord_v[i]),
|
||||
.coord_o (v)
|
||||
.coord_i (fv[0]),
|
||||
.coord_o (v[i][0])
|
||||
);
|
||||
|
||||
// texel addresses generation
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_wrap_u1 (
|
||||
.wrap_i (wrap_u),
|
||||
.coord_i (fu[1]),
|
||||
.coord_o (u[i][1])
|
||||
);
|
||||
|
||||
wire [31:0] x_offset, y_offset;
|
||||
wire [31:0] addr0;
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_wrap_v1 (
|
||||
.wrap_i (wrap_v),
|
||||
.coord_i (fv[1]),
|
||||
.coord_o (v[i][1])
|
||||
);
|
||||
end
|
||||
|
||||
// addresses generation
|
||||
|
||||
assign x_offset = u >> (5'(`FIXED_FRAC) - log2_width);
|
||||
assign y_offset = v >> (5'(`FIXED_FRAC) - log2_height);
|
||||
assign addr0 = base_addr + (x_offset + (y_offset << log2_width)) << log2_stride;
|
||||
wire [31:0] addr [`NUM_THREADS-1:0][3:0];
|
||||
|
||||
wire [3:0] req_valids = 4'(valid_in);
|
||||
wire [3:0][31:0] req_address = {4{addr0}};
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
wire [`FIXED_FRAC-1:0] x [1:0];
|
||||
wire [`FIXED_FRAC-1:0] y [1:0];
|
||||
|
||||
assign x[0] = u[i][0] >> ((`FIXED_FRAC) - log2_width);
|
||||
assign x[1] = u[i][1] >> ((`FIXED_FRAC) - log2_width);
|
||||
assign y[0] = v[i][0] >> ((`FIXED_FRAC) - log2_height);
|
||||
assign y[1] = v[i][1] >> ((`FIXED_FRAC) - log2_height);
|
||||
|
||||
assign addr [i][0] = base_addr + (x[0] + (y[0] << log2_width)) << log2_stride;
|
||||
assign addr [i][1] = base_addr + (x[1] + (y[0] << log2_width)) << log2_stride;
|
||||
assign addr [i][2] = base_addr + (x[0] + (y[1] << log2_width)) << log2_stride;
|
||||
assign addr [i][3] = base_addr + (x[1] + (y[1] << log2_width)) << log2_stride;
|
||||
end
|
||||
|
||||
wire stall_out = mem_req_valid && ~mem_req_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 4 + 4 * 32 + REQ_TAG_WIDTH),
|
||||
.DATAW (1 + 4 + `NUM_THREADS * 4 * 32 + REQ_TAG_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({req_valids, req_address, req_tag}),
|
||||
.data_out ({mem_req_valid, mem_req_addr, mem_req_tag})
|
||||
.data_in ({valid_in, req_tmask, filter, req_tag, addr}),
|
||||
.data_out ({mem_req_valid, mem_req_tmask, mem_req_filter, mem_req_tag, mem_req_addr})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall_out;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
`define FIXED_FRAC 20
|
||||
`define FIXED_INT (32 - `FIXED_FRAC)
|
||||
`define FIXED_ONE (1 << `FIXED_FRAC)
|
||||
`define FIXED_HALF (`FIXED_ONE >> 1)
|
||||
`define FIXED_MASK (`FIXED_ONE - 1)
|
||||
|
||||
`define CLAMP(x,lo,hi) ((x < lo) ? lo : ((x > hi) ? hi : x))
|
||||
|
@ -13,8 +14,8 @@
|
|||
`define TEX_ADDR_BITS 32
|
||||
`define TEX_FORMAT_BITS 3
|
||||
`define TEX_WRAP_BITS 2
|
||||
`define TEX_WIDTH_BITS 12
|
||||
`define TEX_HEIGHT_BITS 12
|
||||
`define TEX_WIDTH_BITS 4
|
||||
`define TEX_HEIGHT_BITS 4
|
||||
`define TEX_STRIDE_BITS 2
|
||||
`define TEX_FILTER_BITS 1
|
||||
|
||||
|
|
|
@ -1,35 +1,16 @@
|
|||
`include "VX_tex_define.vh"
|
||||
|
||||
/*
|
||||
switch(addressing_mode) {
|
||||
case undefined: return is_undefined;
|
||||
case clamp_to_edge: return intdowni(max(0, min(coord, coorddim - 1)));
|
||||
case clamp_to_border: return is_border;
|
||||
case repeat:
|
||||
tile = intdowni(coord / coorddim);
|
||||
return intdowni(coord - (tile * coorddim));
|
||||
case mirrored_repeat:
|
||||
mirrored_coord = (coord < 0) ? (-coord - 1) : coord;
|
||||
tile = intdowni(mirrored_coord / coorddim);
|
||||
mirrored_coord = intdowni(mirrored_coord - (tile * coorddim));
|
||||
if (tile & 1) {
|
||||
mirrored_coord = (coorddim - 1) - mirrored_coord;
|
||||
}
|
||||
return mirrored_coord;
|
||||
}
|
||||
*/
|
||||
|
||||
module VX_tex_wrap #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire [`TEX_WRAP_BITS-1:0] wrap_i,
|
||||
input wire [31:0] coord_i,
|
||||
input wire [`FIXED_FRAC-1:0] coord_o
|
||||
output wire [`FIXED_FRAC-1:0] coord_o
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [31:0] coord_r;
|
||||
reg [`FIXED_FRAC-1:0] coord_r;
|
||||
|
||||
wire [31:0] clamp = `CLAMP(coord_i, 0, `FIXED_MASK);
|
||||
|
||||
|
|
|
@ -776,46 +776,51 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
|
|||
case FMSUB:
|
||||
case FMNMADD:
|
||||
case FMNMSUB: {
|
||||
// multiplicands are infinity and zero, them set FCSR
|
||||
if (fpBinIsZero(rsdata[0]) || fpBinIsZero(rsdata[1]) || fpBinIsInf(rsdata[0]) || fpBinIsInf(rsdata[1])) {
|
||||
core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit
|
||||
core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit
|
||||
}
|
||||
if (fpBinIsNan(rsdata[0]) || fpBinIsNan(rsdata[1]) || fpBinIsNan(rsdata[2])) {
|
||||
// if one of op is NaN, if addend is not quiet NaN, them set FCSR
|
||||
if ((fpBinIsNan(rsdata[0])==2) | (fpBinIsNan(rsdata[1])==2) | (fpBinIsNan(rsdata[1])==2)) {
|
||||
core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit
|
||||
core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit
|
||||
}
|
||||
rddata = 0x7fc00000; // canonical(quiet) NaN
|
||||
// select FP format
|
||||
if (core_->get_csr(CSR_FPMODE, t, id_) == 1) {
|
||||
// CODE
|
||||
} else {
|
||||
float rs1 = intregToFloat(rsdata[0]);
|
||||
float rs2 = intregToFloat(rsdata[1]);
|
||||
float rs3 = intregToFloat(rsdata[2]);
|
||||
float fpDest(0.0);
|
||||
feclearexcept(FE_ALL_EXCEPT);
|
||||
switch (opcode) {
|
||||
case FMADD:
|
||||
// rd = (rs1*rs2)+rs3
|
||||
fpDest = (rs1 * rs2) + rs3; break;
|
||||
case FMSUB:
|
||||
// rd = (rs1*rs2)-rs3
|
||||
fpDest = (rs1 * rs2) - rs3; break;
|
||||
case FMNMADD:
|
||||
// rd = -(rs1*rs2)+rs3
|
||||
fpDest = -1*(rs1 * rs2) - rs3; break;
|
||||
case FMNMSUB:
|
||||
// rd = -(rs1*rs2)-rs3
|
||||
fpDest = -1*(rs1 * rs2) + rs3; break;
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
// multiplicands are infinity and zero, them set FCSR
|
||||
if (fpBinIsZero(rsdata[0]) || fpBinIsZero(rsdata[1]) || fpBinIsInf(rsdata[0]) || fpBinIsInf(rsdata[1])) {
|
||||
core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit
|
||||
core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit
|
||||
}
|
||||
if (fpBinIsNan(rsdata[0]) || fpBinIsNan(rsdata[1]) || fpBinIsNan(rsdata[2])) {
|
||||
// if one of op is NaN, if addend is not quiet NaN, them set FCSR
|
||||
if ((fpBinIsNan(rsdata[0])==2) | (fpBinIsNan(rsdata[1])==2) | (fpBinIsNan(rsdata[1])==2)) {
|
||||
core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit
|
||||
core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit
|
||||
}
|
||||
rddata = 0x7fc00000; // canonical(quiet) NaN
|
||||
} else {
|
||||
float rs1 = intregToFloat(rsdata[0]);
|
||||
float rs2 = intregToFloat(rsdata[1]);
|
||||
float rs3 = intregToFloat(rsdata[2]);
|
||||
float fpDest(0.0);
|
||||
feclearexcept(FE_ALL_EXCEPT);
|
||||
switch (opcode) {
|
||||
case FMADD:
|
||||
// rd = (rs1*rs2)+rs3
|
||||
fpDest = (rs1 * rs2) + rs3; break;
|
||||
case FMSUB:
|
||||
// rd = (rs1*rs2)-rs3
|
||||
fpDest = (rs1 * rs2) - rs3; break;
|
||||
case FMNMADD:
|
||||
// rd = -(rs1*rs2)+rs3
|
||||
fpDest = -1*(rs1 * rs2) - rs3; break;
|
||||
case FMNMSUB:
|
||||
// rd = -(rs1*rs2)-rs3
|
||||
fpDest = -1*(rs1 * rs2) + rs3; break;
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
|
||||
// update fcsrs
|
||||
update_fcrs(core_, t, id_);
|
||||
// update fcsrs
|
||||
update_fcrs(core_, t, id_);
|
||||
|
||||
rddata = floatToBin(fpDest);
|
||||
rddata = floatToBin(fpDest);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue