tex_unit update

This commit is contained in:
Blaise Tine 2021-03-31 05:43:44 -04:00
parent 79fcdf7a28
commit 7b2f96bc6d
12 changed files with 687 additions and 651 deletions

Binary file not shown.

Binary file not shown.

View file

@ -52,8 +52,8 @@ int main() {
targ.karg = *arg;
targ.tile_width = arg->dst_width;
targ.tile_height = (arg->dst_height + arg->num_tasks - 1) / arg->num_tasks;
targ.deltaX = 1.0f / arg->dst_width;
targ.deltaY = 1.0f / arg->dst_height;
targ.deltaX = 1.0f / (((float)arg->src_width) / arg->dst_width);
targ.deltaY = 1.0f / (((float)arg->src_height) / arg->dst_height);
vx_spawn_tasks(arg->num_tasks, kernel_body, &targ);
}

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -124,7 +124,7 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
uint32_t num_tasks = max_cores * max_warps * max_threads / 4;
uint32_t num_tasks = max_cores * max_warps * max_threads;
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl;
@ -170,8 +170,8 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// upload source buffer0
std::cout << "upload source buffer0" << std::endl;
// upload source buffer
std::cout << "upload source buffer" << std::endl;
{
auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < src_bufsize; ++i) {

View file

@ -26,9 +26,9 @@ extern "C" {
void dpi_utof(int a, int frm, int* result, int* fflags);
void dpi_fclss(int a, int* result);
void dpi_fsgnj(int a, int* result);
void dpi_fsgnjn(int a, int* result);
void dpi_fsgnjx(int a, int* result);
void dpi_fsgnj(int a, int b, int* result);
void dpi_fsgnjn(int a, int b, int* result);
void dpi_fsgnjx(int a, int b, int* result);
void dpi_flt(int a, int b, int* result, int* fflags);
void dpi_fle(int a, int b, int* result, int* fflags);
@ -244,21 +244,53 @@ void dpi_fmax(int a, int b, int* result, int* fflags) {
}
void dpi_fclss(int a, int* result) {
// TODO
*result = 0;
int r = 0; // clear all bits
bool fsign = (a >> 31);
uint32_t expo = (a >> 23) & 0xFF;
uint32_t fraction = a & 0x7FFFFF;
if ((expo == 0) && (fraction == 0)) {
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
} else if ((expo == 0) && (fraction != 0)) {
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
} else if ((expo == 0xFF) && (fraction == 0)) {
r = fsign ? (1<<0) : (1<<7); // +/- infinity
} else if ((expo == 0xFF ) && (fraction != 0)) {
if (!fsign && (fraction == 0x00400000)) {
r = (1 << 9); // quiet NaN
} else {
r = (1 << 8); // signaling NaN
}
} else {
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
}
*result = r;
}
void dpi_fsgnj(int a, int* result) {
// TODO
*result = 0;
void dpi_fsgnj(int a, int b, int* result) {
int sign = b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
}
void dpi_fsgnjn(int a, int* result) {
// TODO
*result = 0;
void dpi_fsgnjn(int a, int b, int* result) {
int sign = ~b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
}
void dpi_fsgnjx(int a, int* result) {
// TODO
*result = 0;
void dpi_fsgnjx(int a, int b, int* result) {
int sign1 = a & 0x80000000;
int sign2 = b & 0x80000000;
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
*result = r;
}

View file

@ -18,9 +18,9 @@ import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, o
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fclss(input int a, output int result);
import "DPI-C" context function void dpi_fsgnj(input int a, output int result);
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result);
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result);
import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);

View file

@ -78,7 +78,7 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
reg [`NUM_THREADS-1:0] req_sent_mask;
wire req_sent_all;
wire sent_all_ready;
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full;
@ -116,13 +116,13 @@ module VX_lsu_unit #(
.full (mbuf_full)
);
assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|| (req_is_dup & dcache_req_if.ready[0]);
assign sent_all_ready = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|| (req_is_dup & dcache_req_if.ready[0]);
always @(posedge clk) begin
if (reset || req_sent_all) begin
if (reset || sent_all_ready) begin
req_sent_mask <= 0;
end else if (!req_sent_all) begin
end else begin
req_sent_mask <= req_sent_mask | dcache_req_fire;
end
end
@ -193,11 +193,11 @@ module VX_lsu_unit #(
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
`endif
assign ready_in = req_ready_dep && req_sent_all;
assign ready_in = req_ready_dep && sent_all_ready;
// send store commit
wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
wire is_store_rsp = req_valid && ~req_wb && sent_all_ready;
assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid;

View file

@ -330,9 +330,9 @@ module VX_fpu_dpi #(
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (dataa[i], result_fsgnj[i]);
dpi_fsgnjn (dataa[i], result_fsgnjn[i]);
dpi_fsgnjx (dataa[i], result_fsgnjx[i]);
dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
result_fmv[i] = dataa[i];
end
end

View file

@ -99,7 +99,7 @@ module VX_tex_memory #(
///////////////////////////////////////////////////////////////////////////
wire req_texel_valid;
wire req_texel_sent, last_texel_sent;
wire sent_all_ready, last_texel_sent;
wire req_texel_dup;
wire [`NUM_THREADS-1:0][29:0] req_texel_addr;
reg [1:0] req_texel_idx;
@ -108,7 +108,7 @@ module VX_tex_memory #(
always @(posedge clk) begin
if (reset || last_texel_sent) begin
req_texel_idx <= 0;
end else if (req_texel_sent) begin
end else if (req_texel_valid && sent_all_ready) begin
req_texel_idx <= req_texel_idx + 1;
end
end
@ -126,7 +126,7 @@ module VX_tex_memory #(
assign req_texel_dup = q_dup_reqs[req_texel_idx];
wire is_last_texel = (req_texel_idx == (q_req_filter ? 3 : 0));
assign last_texel_sent = req_texel_sent && is_last_texel;
assign last_texel_sent = req_texel_valid && sent_all_ready && is_last_texel;
// DCache Request
@ -136,11 +136,11 @@ module VX_tex_memory #(
assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
assign req_texel_sent = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask))
|| (req_texel_dup & dcache_req_if.ready[0]);
assign sent_all_ready = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask))
|| (req_texel_dup & dcache_req_if.ready[0]);
always @(posedge clk) begin
if (reset || req_texel_sent) begin
if (reset || sent_all_ready) begin
texel_sent_mask <= 0;
end else begin
texel_sent_mask <= texel_sent_mask | dcache_req_fire;

View file

@ -561,20 +561,18 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
// FSGNJ.S, FSGNJN.S, FSGNJX.S
case 0x10: {
bool fsign1 = rsdata[0] & 0x80000000;
bool fsign1 = (rsdata[0] >> 31);
uint32_t fdata1 = rsdata[0] & 0x7FFFFFFF;
bool fsign2 = rsdata[1] & 0x80000000;
bool fsign2 = (rsdata[1] >> 31);
switch (func3) {
case 0: // FSGNJ.S
rddata = (fsign2 << 31) | fdata1;
break;
case 1: // FSGNJN.S
fsign2 = !fsign2;
rddata = (fsign2 << 31) | fdata1;
rddata = (!fsign2 << 31) | fdata1;
break;
case 2: { // FSGNJX.S
bool sign = fsign1 ^ fsign2;
rddata = (sign << 31) | fdata1;
rddata = ((fsign1 ^ fsign2) << 31) | fdata1;
} break;
}
} break;