mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
minor updates
This commit is contained in:
parent
6a03882bd2
commit
b297c29a10
3 changed files with 42 additions and 45 deletions
|
@ -69,7 +69,7 @@ module VX_alu_muldiv #(
|
|||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
reg [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
||||
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
||||
always @(*) begin
|
||||
|
@ -235,7 +235,7 @@ module VX_alu_muldiv #(
|
|||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] div_quotient, div_remainder;
|
||||
reg [`XLEN-1:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
|
||||
end
|
||||
|
|
|
@ -141,13 +141,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
begin : fma
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fma;
|
||||
wire [NUM_LANES-1:0][63:0] result_fadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsub;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmul;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmsub;
|
||||
wire [NUM_LANES-1:0][63:0] result_fnmadd;
|
||||
wire [NUM_LANES-1:0][63:0] result_fnmsub;
|
||||
reg [NUM_LANES-1:0][63:0] result_fadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsub;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmul;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmsub;
|
||||
reg [NUM_LANES-1:0][63:0] result_fnmadd;
|
||||
reg [NUM_LANES-1:0][63:0] result_fnmsub;
|
||||
|
||||
fflags_t [NUM_LANES-1:0] fflags_fma;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fadd;
|
||||
|
@ -217,7 +217,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
begin : fdiv
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r;
|
||||
wire [NUM_LANES-1:0][63:0] result_fdiv;
|
||||
reg [NUM_LANES-1:0][63:0] result_fdiv;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fdiv;
|
||||
|
||||
wire fdiv_valid = (valid_in && core_select == FPU_DIVSQRT) && is_div;
|
||||
|
@ -256,7 +256,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
begin : fsqrt
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsqrt;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsqrt;
|
||||
fflags_t [NUM_LANES-1:0] fflags_fsqrt;
|
||||
|
||||
wire fsqrt_valid = (valid_in && core_select == FPU_DIVSQRT) && ~is_div;
|
||||
|
@ -295,11 +295,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
begin : fcvt
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt;
|
||||
wire [NUM_LANES-1:0][63:0] result_itof;
|
||||
wire [NUM_LANES-1:0][63:0] result_utof;
|
||||
wire [NUM_LANES-1:0][63:0] result_ftoi;
|
||||
wire [NUM_LANES-1:0][63:0] result_ftou;
|
||||
wire [NUM_LANES-1:0][63:0] result_f2f;
|
||||
reg [NUM_LANES-1:0][63:0] result_itof;
|
||||
reg [NUM_LANES-1:0][63:0] result_utof;
|
||||
reg [NUM_LANES-1:0][63:0] result_ftoi;
|
||||
reg [NUM_LANES-1:0][63:0] result_ftou;
|
||||
reg [NUM_LANES-1:0][63:0] result_f2f;
|
||||
|
||||
fflags_t [NUM_LANES-1:0] fflags_fcvt;
|
||||
fflags_t [NUM_LANES-1:0] fflags_itof;
|
||||
|
@ -359,15 +359,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
begin : fncp
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
|
||||
wire [NUM_LANES-1:0][63:0] result_fclss;
|
||||
wire [NUM_LANES-1:0][63:0] result_flt;
|
||||
wire [NUM_LANES-1:0][63:0] result_fle;
|
||||
wire [NUM_LANES-1:0][63:0] result_feq;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmin;
|
||||
wire [NUM_LANES-1:0][63:0] result_fmax;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnj;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnjn;
|
||||
wire [NUM_LANES-1:0][63:0] result_fsgnjx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fclss;
|
||||
reg [NUM_LANES-1:0][63:0] result_flt;
|
||||
reg [NUM_LANES-1:0][63:0] result_fle;
|
||||
reg [NUM_LANES-1:0][63:0] result_feq;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmin;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmax;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnj;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnjn;
|
||||
reg [NUM_LANES-1:0][63:0] result_fsgnjx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmvx;
|
||||
reg [NUM_LANES-1:0][63:0] result_fmvf;
|
||||
|
||||
|
|
|
@ -112,6 +112,7 @@ module VX_mem_coalescer #(
|
|||
reg [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
|
||||
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
|
||||
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
|
||||
reg [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
|
||||
reg [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
|
||||
|
||||
wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx;
|
||||
|
@ -137,6 +138,17 @@ module VX_mem_coalescer #(
|
|||
assign seed_idx[i] = NUM_REQS_W'(BATCH_SIZE * i) + NUM_REQS_W'(batch_idx);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
assign seed_addr_n[i] = in_addr_base[seed_idx[i]];
|
||||
assign seed_atype_n[i] = in_req_atype[seed_idx[i]];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
for (genvar j = 0; j < BATCH_SIZE; ++j) begin
|
||||
assign addr_matches_n[BATCH_SIZE * i + j] = (in_addr_base[BATCH_SIZE * i + j] == seed_addr_n[i]);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_SETUP;
|
||||
|
@ -144,12 +156,13 @@ module VX_mem_coalescer #(
|
|||
out_req_valid_r <= 0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
out_req_valid_r <= out_req_valid_n;
|
||||
batch_valid_r <= batch_valid_n;
|
||||
seed_addr_r <= seed_addr_n;
|
||||
seed_atype_r <= seed_atype_n;
|
||||
out_req_rw_r <= out_req_rw_n;
|
||||
addr_matches_r <= addr_matches_n;
|
||||
out_req_valid_r <= out_req_valid_n;
|
||||
out_req_mask_r <= out_req_mask_n;
|
||||
out_req_rw_r <= out_req_rw_n;
|
||||
out_req_addr_r <= out_req_addr_n;
|
||||
out_req_atype_r <= out_req_atype_n;
|
||||
out_req_byteen_r <= out_req_byteen_n;
|
||||
|
@ -159,15 +172,7 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] addr_matches;
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
for (genvar j = 0; j < BATCH_SIZE; ++j) begin
|
||||
assign addr_matches[BATCH_SIZE * i + j] = (in_addr_base[BATCH_SIZE * i + j] == seed_addr_r[i]);
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches;
|
||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
|
||||
|
||||
reg [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] req_byteen_merged;
|
||||
reg [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] req_data_merged;
|
||||
|
@ -187,16 +192,13 @@ module VX_mem_coalescer #(
|
|||
|
||||
wire [OUT_REQS * BATCH_SIZE - 1:0] pending_mask;
|
||||
for (genvar i = 0; i < OUT_REQS * BATCH_SIZE; ++i) begin
|
||||
assign pending_mask[i] = in_req_mask[i] && ~addr_matches[i] && ~processed_mask_r[i];
|
||||
assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i];
|
||||
end
|
||||
wire batch_completed = ~(| pending_mask);
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
|
||||
seed_addr_n = seed_addr_r;
|
||||
seed_atype_n = seed_atype_r;
|
||||
|
||||
out_req_valid_n = out_req_valid_r;
|
||||
out_req_mask_n = out_req_mask_r;
|
||||
out_req_rw_n = out_req_rw_r;
|
||||
|
@ -211,11 +213,6 @@ module VX_mem_coalescer #(
|
|||
|
||||
case (state_r)
|
||||
STATE_SETUP: begin
|
||||
// find the next seed address
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
seed_addr_n[i] = in_addr_base[seed_idx[i]];
|
||||
seed_atype_n[i] = in_req_atype[seed_idx[i]];
|
||||
end
|
||||
// wait for pending outgoing request to submit
|
||||
if (out_req_valid && out_req_ready) begin
|
||||
out_req_valid_n = 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue