tex_unit address generation complete

This commit is contained in:
Blaise Tine 2021-03-20 18:56:34 -04:00
parent 1431ef9bc0
commit 7ff5c082bc
4 changed files with 100 additions and 80 deletions

View file

@ -43,52 +43,85 @@ module VX_tex_addr_gen #(
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (lod)
wire [`FIXED_FRAC-1:0] u[`NUM_THREADS-1:0][1:0];
wire [`FIXED_FRAC-1:0] v[`NUM_THREADS-1:0][1:0];
// addressing mode
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
// addressing mode
wire [31:0] u, v;
wire [31:0] fu[1:0];
wire [31:0] fv[1:0];
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log2_width) : 0);
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log2_height) : 0);
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log2_width) : 0);
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log2_height) : 0);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_u (
) tex_wrap_u0 (
.wrap_i (wrap_u),
.coord_i (coord_u[i]),
.coord_o (u)
.coord_i (fu[0]),
.coord_o (u[i][0])
);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_v (
) tex_wrap_v0 (
.wrap_i (wrap_v),
.coord_i (coord_v[i]),
.coord_o (v)
.coord_i (fv[0]),
.coord_o (v[i][0])
);
// texel addresses generation
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_u1 (
.wrap_i (wrap_u),
.coord_i (fu[1]),
.coord_o (u[i][1])
);
wire [31:0] x_offset, y_offset;
wire [31:0] addr0;
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_v1 (
.wrap_i (wrap_v),
.coord_i (fv[1]),
.coord_o (v[i][1])
);
end
// addresses generation
assign x_offset = u >> (5'(`FIXED_FRAC) - log2_width);
assign y_offset = v >> (5'(`FIXED_FRAC) - log2_height);
assign addr0 = base_addr + (x_offset + (y_offset << log2_width)) << log2_stride;
wire [31:0] addr [`NUM_THREADS-1:0][3:0];
wire [3:0] req_valids = 4'(valid_in);
wire [3:0][31:0] req_address = {4{addr0}};
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`FIXED_FRAC-1:0] x [1:0];
wire [`FIXED_FRAC-1:0] y [1:0];
assign x[0] = u[i][0] >> ((`FIXED_FRAC) - log2_width);
assign x[1] = u[i][1] >> ((`FIXED_FRAC) - log2_width);
assign y[0] = v[i][0] >> ((`FIXED_FRAC) - log2_height);
assign y[1] = v[i][1] >> ((`FIXED_FRAC) - log2_height);
assign addr [i][0] = base_addr + (x[0] + (y[0] << log2_width)) << log2_stride;
assign addr [i][1] = base_addr + (x[1] + (y[0] << log2_width)) << log2_stride;
assign addr [i][2] = base_addr + (x[0] + (y[1] << log2_width)) << log2_stride;
assign addr [i][3] = base_addr + (x[1] + (y[1] << log2_width)) << log2_stride;
end
wire stall_out = mem_req_valid && ~mem_req_ready;
VX_pipe_register #(
.DATAW (1 + 4 + 4 * 32 + REQ_TAG_WIDTH),
.DATAW (1 + 4 + `NUM_THREADS * 4 * 32 + REQ_TAG_WIDTH),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({req_valids, req_address, req_tag}),
.data_out ({mem_req_valid, mem_req_addr, mem_req_tag})
.data_in ({valid_in, req_tmask, filter, req_tag, addr}),
.data_out ({mem_req_valid, mem_req_tmask, mem_req_filter, mem_req_tag, mem_req_addr})
);
assign ready_in = ~stall_out;

View file

@ -6,6 +6,7 @@
`define FIXED_FRAC 20
`define FIXED_INT (32 - `FIXED_FRAC)
`define FIXED_ONE (1 << `FIXED_FRAC)
`define FIXED_HALF (`FIXED_ONE >> 1)
`define FIXED_MASK (`FIXED_ONE - 1)
`define CLAMP(x,lo,hi) ((x < lo) ? lo : ((x > hi) ? hi : x))
@ -13,8 +14,8 @@
`define TEX_ADDR_BITS 32
`define TEX_FORMAT_BITS 3
`define TEX_WRAP_BITS 2
`define TEX_WIDTH_BITS 12
`define TEX_HEIGHT_BITS 12
`define TEX_WIDTH_BITS 4
`define TEX_HEIGHT_BITS 4
`define TEX_STRIDE_BITS 2
`define TEX_FILTER_BITS 1

View file

@ -1,35 +1,16 @@
`include "VX_tex_define.vh"
/*
switch(addressing_mode) {
case undefined: return is_undefined;
case clamp_to_edge: return intdowni(max(0, min(coord, coorddim - 1)));
case clamp_to_border: return is_border;
case repeat:
tile = intdowni(coord / coorddim);
return intdowni(coord - (tile * coorddim));
case mirrored_repeat:
mirrored_coord = (coord < 0) ? (-coord - 1) : coord;
tile = intdowni(mirrored_coord / coorddim);
mirrored_coord = intdowni(mirrored_coord - (tile * coorddim));
if (tile & 1) {
mirrored_coord = (coorddim - 1) - mirrored_coord;
}
return mirrored_coord;
}
*/
module VX_tex_wrap #(
parameter CORE_ID = 0
) (
input wire [`TEX_WRAP_BITS-1:0] wrap_i,
input wire [31:0] coord_i,
input wire [`FIXED_FRAC-1:0] coord_o
output wire [`FIXED_FRAC-1:0] coord_o
);
`UNUSED_PARAM (CORE_ID)
reg [31:0] coord_r;
reg [`FIXED_FRAC-1:0] coord_r;
wire [31:0] clamp = `CLAMP(coord_i, 0, `FIXED_MASK);

View file

@ -776,46 +776,51 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
case FMSUB:
case FMNMADD:
case FMNMSUB: {
// multiplicands are infinity and zero, them set FCSR
if (fpBinIsZero(rsdata[0]) || fpBinIsZero(rsdata[1]) || fpBinIsInf(rsdata[0]) || fpBinIsInf(rsdata[1])) {
core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit
core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit
}
if (fpBinIsNan(rsdata[0]) || fpBinIsNan(rsdata[1]) || fpBinIsNan(rsdata[2])) {
// if one of op is NaN, if addend is not quiet NaN, them set FCSR
if ((fpBinIsNan(rsdata[0])==2) | (fpBinIsNan(rsdata[1])==2) | (fpBinIsNan(rsdata[1])==2)) {
core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit
core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit
}
rddata = 0x7fc00000; // canonical(quiet) NaN
// select FP format
if (core_->get_csr(CSR_FPMODE, t, id_) == 1) {
// CODE
} else {
float rs1 = intregToFloat(rsdata[0]);
float rs2 = intregToFloat(rsdata[1]);
float rs3 = intregToFloat(rsdata[2]);
float fpDest(0.0);
feclearexcept(FE_ALL_EXCEPT);
switch (opcode) {
case FMADD:
// rd = (rs1*rs2)+rs3
fpDest = (rs1 * rs2) + rs3; break;
case FMSUB:
// rd = (rs1*rs2)-rs3
fpDest = (rs1 * rs2) - rs3; break;
case FMNMADD:
// rd = -(rs1*rs2)+rs3
fpDest = -1*(rs1 * rs2) - rs3; break;
case FMNMSUB:
// rd = -(rs1*rs2)-rs3
fpDest = -1*(rs1 * rs2) + rs3; break;
default:
std::abort();
break;
}
// multiplicands are infinity and zero, them set FCSR
if (fpBinIsZero(rsdata[0]) || fpBinIsZero(rsdata[1]) || fpBinIsInf(rsdata[0]) || fpBinIsInf(rsdata[1])) {
core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit
core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit
}
if (fpBinIsNan(rsdata[0]) || fpBinIsNan(rsdata[1]) || fpBinIsNan(rsdata[2])) {
// if one of op is NaN, if addend is not quiet NaN, them set FCSR
if ((fpBinIsNan(rsdata[0])==2) | (fpBinIsNan(rsdata[1])==2) | (fpBinIsNan(rsdata[1])==2)) {
core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit
core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit
}
rddata = 0x7fc00000; // canonical(quiet) NaN
} else {
float rs1 = intregToFloat(rsdata[0]);
float rs2 = intregToFloat(rsdata[1]);
float rs3 = intregToFloat(rsdata[2]);
float fpDest(0.0);
feclearexcept(FE_ALL_EXCEPT);
switch (opcode) {
case FMADD:
// rd = (rs1*rs2)+rs3
fpDest = (rs1 * rs2) + rs3; break;
case FMSUB:
// rd = (rs1*rs2)-rs3
fpDest = (rs1 * rs2) - rs3; break;
case FMNMADD:
// rd = -(rs1*rs2)+rs3
fpDest = -1*(rs1 * rs2) - rs3; break;
case FMNMSUB:
// rd = -(rs1*rs2)-rs3
fpDest = -1*(rs1 * rs2) + rs3; break;
default:
std::abort();
break;
}
// update fcsrs
update_fcrs(core_, t, id_);
// update fcsrs
update_fcrs(core_, t, id_);
rddata = floatToBin(fpDest);
rddata = floatToBin(fpDest);
}
}
}
break;