mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
tex_unit update
This commit is contained in:
parent
79fcdf7a28
commit
7b2f96bc6d
12 changed files with 687 additions and 651 deletions
Binary file not shown.
Binary file not shown.
|
@ -52,8 +52,8 @@ int main() {
|
|||
targ.karg = *arg;
|
||||
targ.tile_width = arg->dst_width;
|
||||
targ.tile_height = (arg->dst_height + arg->num_tasks - 1) / arg->num_tasks;
|
||||
targ.deltaX = 1.0f / arg->dst_width;
|
||||
targ.deltaY = 1.0f / arg->dst_height;
|
||||
targ.deltaX = 1.0f / (((float)arg->src_width) / arg->dst_width);
|
||||
targ.deltaY = 1.0f / (((float)arg->src_height) / arg->dst_height);
|
||||
|
||||
vx_spawn_tasks(arg->num_tasks, kernel_body, &targ);
|
||||
}
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -124,7 +124,7 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
uint32_t num_tasks = max_cores * max_warps * max_threads / 4;
|
||||
uint32_t num_tasks = max_cores * max_warps * max_threads;
|
||||
|
||||
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
|
||||
std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl;
|
||||
|
@ -170,8 +170,8 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
// upload source buffer0
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
// upload source buffer
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < src_bufsize; ++i) {
|
||||
|
|
|
@ -26,9 +26,9 @@ extern "C" {
|
|||
void dpi_utof(int a, int frm, int* result, int* fflags);
|
||||
|
||||
void dpi_fclss(int a, int* result);
|
||||
void dpi_fsgnj(int a, int* result);
|
||||
void dpi_fsgnjn(int a, int* result);
|
||||
void dpi_fsgnjx(int a, int* result);
|
||||
void dpi_fsgnj(int a, int b, int* result);
|
||||
void dpi_fsgnjn(int a, int b, int* result);
|
||||
void dpi_fsgnjx(int a, int b, int* result);
|
||||
|
||||
void dpi_flt(int a, int b, int* result, int* fflags);
|
||||
void dpi_fle(int a, int b, int* result, int* fflags);
|
||||
|
@ -244,21 +244,53 @@ void dpi_fmax(int a, int b, int* result, int* fflags) {
|
|||
}
|
||||
|
||||
void dpi_fclss(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
|
||||
int r = 0; // clear all bits
|
||||
|
||||
bool fsign = (a >> 31);
|
||||
uint32_t expo = (a >> 23) & 0xFF;
|
||||
uint32_t fraction = a & 0x7FFFFF;
|
||||
|
||||
if ((expo == 0) && (fraction == 0)) {
|
||||
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
|
||||
} else if ((expo == 0) && (fraction != 0)) {
|
||||
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
|
||||
} else if ((expo == 0xFF) && (fraction == 0)) {
|
||||
r = fsign ? (1<<0) : (1<<7); // +/- infinity
|
||||
} else if ((expo == 0xFF ) && (fraction != 0)) {
|
||||
if (!fsign && (fraction == 0x00400000)) {
|
||||
r = (1 << 9); // quiet NaN
|
||||
} else {
|
||||
r = (1 << 8); // signaling NaN
|
||||
}
|
||||
} else {
|
||||
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
|
||||
}
|
||||
|
||||
*result = r;
|
||||
}
|
||||
|
||||
void dpi_fsgnj(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
void dpi_fsgnj(int a, int b, int* result) {
|
||||
|
||||
int sign = b & 0x80000000;
|
||||
int r = sign | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
void dpi_fsgnjn(int a, int b, int* result) {
|
||||
|
||||
int sign = ~b & 0x80000000;
|
||||
int r = sign | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
void dpi_fsgnjx(int a, int b, int* result) {
|
||||
|
||||
int sign1 = a & 0x80000000;
|
||||
int sign2 = b & 0x80000000;
|
||||
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
}
|
|
@ -18,9 +18,9 @@ import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, o
|
|||
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_fclss(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnj(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
|
||||
|
||||
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
|
|
|
@ -78,7 +78,7 @@ module VX_lsu_unit #(
|
|||
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
|
||||
|
||||
reg [`NUM_THREADS-1:0] req_sent_mask;
|
||||
wire req_sent_all;
|
||||
wire sent_all_ready;
|
||||
|
||||
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||
wire mbuf_full;
|
||||
|
@ -116,13 +116,13 @@ module VX_lsu_unit #(
|
|||
.full (mbuf_full)
|
||||
);
|
||||
|
||||
assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|
||||
|| (req_is_dup & dcache_req_if.ready[0]);
|
||||
assign sent_all_ready = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|
||||
|| (req_is_dup & dcache_req_if.ready[0]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || req_sent_all) begin
|
||||
if (reset || sent_all_ready) begin
|
||||
req_sent_mask <= 0;
|
||||
end else if (!req_sent_all) begin
|
||||
end else begin
|
||||
req_sent_mask <= req_sent_mask | dcache_req_fire;
|
||||
end
|
||||
end
|
||||
|
@ -193,11 +193,11 @@ module VX_lsu_unit #(
|
|||
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
|
||||
`endif
|
||||
|
||||
assign ready_in = req_ready_dep && req_sent_all;
|
||||
assign ready_in = req_ready_dep && sent_all_ready;
|
||||
|
||||
// send store commit
|
||||
|
||||
wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
|
||||
wire is_store_rsp = req_valid && ~req_wb && sent_all_ready;
|
||||
|
||||
assign st_commit_if.valid = is_store_rsp;
|
||||
assign st_commit_if.wid = req_wid;
|
||||
|
|
|
@ -330,9 +330,9 @@ module VX_fpu_dpi #(
|
|||
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (dataa[i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (dataa[i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (dataa[i], result_fsgnjx[i]);
|
||||
dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
|
||||
result_fmv[i] = dataa[i];
|
||||
end
|
||||
end
|
||||
|
|
|
@ -99,7 +99,7 @@ module VX_tex_memory #(
|
|||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire req_texel_valid;
|
||||
wire req_texel_sent, last_texel_sent;
|
||||
wire sent_all_ready, last_texel_sent;
|
||||
wire req_texel_dup;
|
||||
wire [`NUM_THREADS-1:0][29:0] req_texel_addr;
|
||||
reg [1:0] req_texel_idx;
|
||||
|
@ -108,7 +108,7 @@ module VX_tex_memory #(
|
|||
always @(posedge clk) begin
|
||||
if (reset || last_texel_sent) begin
|
||||
req_texel_idx <= 0;
|
||||
end else if (req_texel_sent) begin
|
||||
end else if (req_texel_valid && sent_all_ready) begin
|
||||
req_texel_idx <= req_texel_idx + 1;
|
||||
end
|
||||
end
|
||||
|
@ -126,7 +126,7 @@ module VX_tex_memory #(
|
|||
assign req_texel_dup = q_dup_reqs[req_texel_idx];
|
||||
|
||||
wire is_last_texel = (req_texel_idx == (q_req_filter ? 3 : 0));
|
||||
assign last_texel_sent = req_texel_sent && is_last_texel;
|
||||
assign last_texel_sent = req_texel_valid && sent_all_ready && is_last_texel;
|
||||
|
||||
// DCache Request
|
||||
|
||||
|
@ -136,11 +136,11 @@ module VX_tex_memory #(
|
|||
|
||||
assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
|
||||
|
||||
assign req_texel_sent = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask))
|
||||
|| (req_texel_dup & dcache_req_if.ready[0]);
|
||||
assign sent_all_ready = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask))
|
||||
|| (req_texel_dup & dcache_req_if.ready[0]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || req_texel_sent) begin
|
||||
if (reset || sent_all_ready) begin
|
||||
texel_sent_mask <= 0;
|
||||
end else begin
|
||||
texel_sent_mask <= texel_sent_mask | dcache_req_fire;
|
||||
|
|
|
@ -561,20 +561,18 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
|
|||
|
||||
// FSGNJ.S, FSGNJN.S, FSGNJX.S
|
||||
case 0x10: {
|
||||
bool fsign1 = rsdata[0] & 0x80000000;
|
||||
bool fsign1 = (rsdata[0] >> 31);
|
||||
uint32_t fdata1 = rsdata[0] & 0x7FFFFFFF;
|
||||
bool fsign2 = rsdata[1] & 0x80000000;
|
||||
bool fsign2 = (rsdata[1] >> 31);
|
||||
switch (func3) {
|
||||
case 0: // FSGNJ.S
|
||||
rddata = (fsign2 << 31) | fdata1;
|
||||
break;
|
||||
case 1: // FSGNJN.S
|
||||
fsign2 = !fsign2;
|
||||
rddata = (fsign2 << 31) | fdata1;
|
||||
rddata = (!fsign2 << 31) | fdata1;
|
||||
break;
|
||||
case 2: { // FSGNJX.S
|
||||
bool sign = fsign1 ^ fsign2;
|
||||
rddata = (sign << 31) | fdata1;
|
||||
rddata = ((fsign1 ^ fsign2) << 31) | fdata1;
|
||||
} break;
|
||||
}
|
||||
} break;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue