hw unit tests fixes

This commit is contained in:
Blaise Tine 2023-11-05 18:51:31 -08:00
parent 1fd5a95f5a
commit d13c5f2986
32 changed files with 987 additions and 736 deletions

View file

@ -22,6 +22,7 @@ rm -f blackbox.*.cache
unittest()
{
make -C tests/unittest run
make -C hw/unittest
}
isa()

View file

@ -20,7 +20,6 @@
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
#include "uuid_gen.h"
@ -163,7 +162,7 @@ void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotie
uword_t dividen = a;
uword_t divisor = b;
auto inf_neg = uword_t(1) << (XLEN-1);
auto inf_neg = uword_t(1) << (8 * sizeof(iword_t) - 1);
if (is_signed) {
if (b == 0) {

View file

@ -194,174 +194,3 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
endmodule
///////////////////////////////////////////////////////////////////////////////
module VX_cache_cluster_top #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_UNITS = 2,
parameter NUM_INPUTS = 4,
parameter TAG_SEL_IDX = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = 16,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 1,
// Core response output register
parameter CORE_OUT_REG = 2,
// Memory request output register
parameter MEM_OUT_REG = 2,
parameter NUM_CACHES = `UP(NUM_UNITS),
parameter PASSTHRU = (NUM_UNITS == 0),
parameter ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES),
parameter MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS))
) (
input wire clk,
input wire reset,
// Core request
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_valid,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_rw,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready
);
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_if[NUM_INPUTS * NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH)
) mem_bus_if();
// Core request
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar r = 0; r < NUM_REQS; ++r) begin
assign core_bus_if[i * NUM_REQS + r].req_valid = core_req_valid[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.rw = core_req_rw[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.byteen = core_req_byteen[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.addr = core_req_addr[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.data = core_req_data[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.tag = core_req_tag[i][r];
assign core_req_ready[i][r] = core_bus_if[i * NUM_REQS + r].req_ready;
end
end
// Core response
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar r = 0; r < NUM_REQS; ++r) begin
assign core_rsp_valid[i][r] = core_bus_if[i * NUM_REQS + r].rsp_valid;
assign core_rsp_data[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.data;
assign core_rsp_tag[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.tag;
assign core_bus_if[i * NUM_REQS + r].rsp_ready = core_rsp_ready[i][r];
end
end
// Memory request
assign mem_req_valid = mem_bus_if.req_valid;
assign mem_req_rw = mem_bus_if.req_data.rw;
assign mem_req_byteen = mem_bus_if.req_data.byteen;
assign mem_req_addr = mem_bus_if.req_data.addr;
assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready;
// Memory response
assign mem_bus_if.rsp_valid = mem_rsp_valid;
assign mem_bus_if.rsp_data.data = mem_rsp_data;
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_bus_if.rsp_ready;
VX_cache_cluster #(
.INSTANCE_ID (INSTANCE_ID),
.NUM_UNITS (NUM_UNITS),
.NUM_INPUTS (NUM_INPUTS),
.TAG_SEL_IDX (TAG_SEL_IDX),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.TAG_WIDTH (TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.WRITE_ENABLE (WRITE_ENABLE),
.CORE_OUT_REG (CORE_OUT_REG),
.MEM_OUT_REG (MEM_OUT_REG)
) cache (
`ifdef PERF_ENABLE
.cache_perf (perf_icache),
`endif
.clk (clk),
.reset (reset),
.core_bus_if (core_bus_if),
.mem_bus_if (mem_bus_if)
);
endmodule

190
hw/rtl/cache/VX_cache_cluster_top.sv vendored Normal file
View file

@ -0,0 +1,190 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_cluster_top import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_UNITS = 2,
parameter NUM_INPUTS = 4,
parameter TAG_SEL_IDX = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 16,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 1,
// Core response output register
parameter CORE_OUT_REG = 2,
// Memory request output register
parameter MEM_OUT_REG = 2,
parameter NUM_CACHES = `UP(NUM_UNITS),
parameter PASSTHRU = (NUM_UNITS == 0),
parameter ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES),
parameter MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)),
parameter MEM_TAG_X_WIDTH = MEM_TAG_WIDTH + `ARB_SEL_BITS(NUM_CACHES, 1)
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
// Core request
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_valid,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_rw,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_X_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_X_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready
);
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_if[NUM_INPUTS * NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_X_WIDTH)
) mem_bus_if();
// Core request
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar r = 0; r < NUM_REQS; ++r) begin
assign core_bus_if[i * NUM_REQS + r].req_valid = core_req_valid[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.rw = core_req_rw[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.byteen = core_req_byteen[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.addr = core_req_addr[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.data = core_req_data[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.tag = core_req_tag[i][r];
assign core_req_ready[i][r] = core_bus_if[i * NUM_REQS + r].req_ready;
end
end
// Core response
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar r = 0; r < NUM_REQS; ++r) begin
assign core_rsp_valid[i][r] = core_bus_if[i * NUM_REQS + r].rsp_valid;
assign core_rsp_data[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.data;
assign core_rsp_tag[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.tag;
assign core_bus_if[i * NUM_REQS + r].rsp_ready = core_rsp_ready[i][r];
end
end
// Memory request
assign mem_req_valid = mem_bus_if.req_valid;
assign mem_req_rw = mem_bus_if.req_data.rw;
assign mem_req_byteen = mem_bus_if.req_data.byteen;
assign mem_req_addr = mem_bus_if.req_data.addr;
assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready;
// Memory response
assign mem_bus_if.rsp_valid = mem_rsp_valid;
assign mem_bus_if.rsp_data.data = mem_rsp_data;
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_bus_if.rsp_ready;
VX_cache_cluster #(
.INSTANCE_ID (INSTANCE_ID),
.NUM_UNITS (NUM_UNITS),
.NUM_INPUTS (NUM_INPUTS),
.TAG_SEL_IDX (TAG_SEL_IDX),
.NUM_REQS (NUM_REQS),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.NC_ENABLE (NC_ENABLE),
.CORE_OUT_REG (CORE_OUT_REG),
.MEM_OUT_REG (MEM_OUT_REG)
) cache (
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
.clk (clk),
.reset (reset),
.core_bus_if (core_bus_if),
.mem_bus_if (mem_bus_if)
);
endmodule

168
hw/rtl/cache/VX_cache_top.sv vendored Normal file
View file

@ -0,0 +1,168 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_top #(
parameter `STRING INSTANCE_ID = "",
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = 16,
// Core response output register
parameter CORE_OUT_REG = 2,
// Memory request output register
parameter MEM_OUT_REG = 2,
parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS)
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
// Core request
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag,
input wire [NUM_REQS-1:0] core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready
);
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_if[NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH)
) mem_bus_if();
// Core request
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_if[i].req_valid = core_req_valid[i];
assign core_bus_if[i].req_data.rw = core_req_rw[i];
assign core_bus_if[i].req_data.byteen = core_req_byteen[i];
assign core_bus_if[i].req_data.addr = core_req_addr[i];
assign core_bus_if[i].req_data.data = core_req_data[i];
assign core_bus_if[i].req_data.tag = core_req_tag[i];
assign core_req_ready[i] = core_bus_if[i].req_ready;
end
// Core response
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_valid[i] = core_bus_if[i].rsp_valid;
assign core_rsp_data[i] = core_bus_if[i].rsp_data.data;
assign core_rsp_tag[i] = core_bus_if[i].rsp_data.tag;
assign core_bus_if[i].rsp_ready = core_rsp_ready[i];
end
// Memory request
assign mem_req_valid = mem_bus_if.req_valid;
assign mem_req_rw = mem_bus_if.req_data.rw;
assign mem_req_byteen = mem_bus_if.req_data.byteen;
assign mem_req_addr = mem_bus_if.req_data.addr;
assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready;
// Memory response
assign mem_bus_if.rsp_valid = mem_rsp_valid;
assign mem_bus_if.rsp_data.data = mem_rsp_data;
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_bus_if.rsp_ready;
VX_cache #(
.INSTANCE_ID (INSTANCE_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.TAG_WIDTH (TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.WRITE_ENABLE (WRITE_ENABLE),
.CORE_OUT_REG (CORE_OUT_REG),
.MEM_OUT_REG (MEM_OUT_REG)
) cache (
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
.clk (clk),
.reset (reset),
.core_bus_if (core_bus_if),
.mem_bus_if (mem_bus_if)
);
endmodule

View file

@ -334,157 +334,3 @@ module VX_core import VX_gpu_pkg::*; #(
`endif
endmodule
///////////////////////////////////////////////////////////////////////////////
module VX_core_top
import VX_gpu_pkg::*;
#(
parameter CORE_ID = 0
) (
// Clock
input wire clk,
input wire reset,
input wire dcr_write_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_write_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_write_data,
output wire [DCACHE_NUM_REQS-1:0] dcache_req_valid,
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_req_tag,
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
input wire [DCACHE_NUM_REQS-1:0] dcache_rsp_valid,
input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_rsp_data,
input wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_rsp_tag,
output wire [DCACHE_NUM_REQS-1:0] dcache_rsp_ready,
output wire icache_req_valid,
output wire icache_req_rw,
output wire [ICACHE_WORD_SIZE-1:0] icache_req_byteen,
output wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr,
output wire [ICACHE_WORD_SIZE*8-1:0] icache_req_data,
output wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag,
input wire icache_req_ready,
input wire icache_rsp_valid,
input wire [ICACHE_WORD_SIZE*8-1:0] icache_rsp_data,
input wire [ICACHE_TAG_WIDTH-1:0] icache_rsp_tag,
output wire icache_rsp_ready,
`ifdef GBAR_ENABLE
output wire gbar_req_valid,
output wire [`NB_WIDTH-1:0] gbar_req_id,
output wire [`NC_WIDTH-1:0] gbar_req_size_m1,
output wire [`NC_WIDTH-1:0] gbar_req_core_id,
input wire gbar_req_ready,
input wire gbar_rsp_valid,
input wire [`NB_WIDTH-1:0] gbar_rsp_id,
`endif
// simulation helper signals
output wire sim_ebreak,
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
// Status
output wire busy
);
`ifdef GBAR_ENABLE
VX_gbar_bus_if gbar_bus_if();
assign gbar_req_valid = gbar_bus_if.req_valid;
assign gbar_req_id = gbar_bus_if.req_id;
assign gbar_req_size_m1 = gbar_bus_if.req_size_m1;
assign gbar_req_core_id = gbar_bus_if.req_core_id;
assign gbar_bus_if.req_ready = gbar_req_ready;
assign gbar_bus_if.rsp_valid = gbar_rsp_valid;
assign gbar_bus_if.rsp_id = gbar_rsp_id;
`endif
VX_dcr_bus_if dcr_bus_if();
assign dcr_bus_if.write_valid = dcr_write_valid;
assign dcr_bus_if.write_addr = dcr_write_addr;
assign dcr_bus_if.write_data = dcr_write_data;
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) dcache_bus_if[DCACHE_NUM_REQS]();
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
assign dcache_req_valid[i] = dcache_bus_if[i].req_valid;
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
assign dcache_bus_if[i].rsp_valid = dcache_rsp_valid[i];
assign dcache_bus_if[i].rsp_data.tag = dcache_rsp_tag[i];
assign dcache_bus_if[i].rsp_data.data = dcache_rsp_data[i];
assign dcache_rsp_ready[i] = dcache_bus_if[i].rsp_ready;
end
VX_mem_bus_if #(
.DATA_SIZE (ICACHE_WORD_SIZE),
.TAG_WIDTH (ICACHE_TAG_WIDTH)
) icache_bus_if();
assign icache_req_valid = icache_bus_if.req_valid;
assign icache_req_rw = icache_bus_if.req_data.rw;
assign icache_req_byteen = icache_bus_if.req_data.byteen;
assign icache_req_addr = icache_bus_if.req_data.addr;
assign icache_req_data = icache_bus_if.req_data.data;
assign icache_req_tag = icache_bus_if.req_data.tag;
assign icache_bus_if.req_ready = icache_req_ready;
assign icache_bus_if.rsp_valid = icache_rsp_valid;
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;
assign icache_bus_if.rsp_data.data = icache_rsp_data;
assign icache_rsp_ready = icache_bus_if.rsp_ready;
`ifdef PERF_ENABLE
VX_mem_perf_if mem_perf_if();
`endif
`ifdef SCOPE
wire [0:0] scope_reset_w = 1'b0;
wire [0:0] scope_bus_in_w = 1'b0;
wire [0:0] scope_bus_out_w;
`UNUSED_VAR (scope_bus_out_w)
`endif
VX_core #(
.CORE_ID (0)
) core (
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.mem_perf_if (mem_perf_if),
`endif
.dcr_bus_if (dcr_bus_if),
.dcache_bus_if (dcache_bus_if),
.icache_bus_if (icache_bus_if),
`ifdef GBAR_ENABLE
.gbar_bus_if (gbar_bus_if),
`endif
.sim_ebreak (sim_ebreak),
.sim_wb_value (sim_wb_value),
.busy (busy)
);
endmodule

168
hw/rtl/core/VX_core_top.sv Normal file
View file

@ -0,0 +1,168 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
`ifdef EXT_F_ENABLE
`include "VX_fpu_define.vh"
`endif
module VX_core_top import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
) (
// Clock
input wire clk,
input wire reset,
input wire dcr_write_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_write_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_write_data,
output wire [DCACHE_NUM_REQS-1:0] dcache_req_valid,
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_req_tag,
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
input wire [DCACHE_NUM_REQS-1:0] dcache_rsp_valid,
input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_rsp_data,
input wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_rsp_tag,
output wire [DCACHE_NUM_REQS-1:0] dcache_rsp_ready,
output wire icache_req_valid,
output wire icache_req_rw,
output wire [ICACHE_WORD_SIZE-1:0] icache_req_byteen,
output wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr,
output wire [ICACHE_WORD_SIZE*8-1:0] icache_req_data,
output wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag,
input wire icache_req_ready,
input wire icache_rsp_valid,
input wire [ICACHE_WORD_SIZE*8-1:0] icache_rsp_data,
input wire [ICACHE_TAG_WIDTH-1:0] icache_rsp_tag,
output wire icache_rsp_ready,
`ifdef GBAR_ENABLE
output wire gbar_req_valid,
output wire [`NB_WIDTH-1:0] gbar_req_id,
output wire [`NC_WIDTH-1:0] gbar_req_size_m1,
output wire [`NC_WIDTH-1:0] gbar_req_core_id,
input wire gbar_req_ready,
input wire gbar_rsp_valid,
input wire [`NB_WIDTH-1:0] gbar_rsp_id,
`endif
// simulation helper signals
output wire sim_ebreak,
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
// Status
output wire busy
);
`ifdef GBAR_ENABLE
VX_gbar_bus_if gbar_bus_if();
assign gbar_req_valid = gbar_bus_if.req_valid;
assign gbar_req_id = gbar_bus_if.req_id;
assign gbar_req_size_m1 = gbar_bus_if.req_size_m1;
assign gbar_req_core_id = gbar_bus_if.req_core_id;
assign gbar_bus_if.req_ready = gbar_req_ready;
assign gbar_bus_if.rsp_valid = gbar_rsp_valid;
assign gbar_bus_if.rsp_id = gbar_rsp_id;
`endif
VX_dcr_bus_if dcr_bus_if();
assign dcr_bus_if.write_valid = dcr_write_valid;
assign dcr_bus_if.write_addr = dcr_write_addr;
assign dcr_bus_if.write_data = dcr_write_data;
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) dcache_bus_if[DCACHE_NUM_REQS]();
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
assign dcache_req_valid[i] = dcache_bus_if[i].req_valid;
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
assign dcache_bus_if[i].rsp_valid = dcache_rsp_valid[i];
assign dcache_bus_if[i].rsp_data.tag = dcache_rsp_tag[i];
assign dcache_bus_if[i].rsp_data.data = dcache_rsp_data[i];
assign dcache_rsp_ready[i] = dcache_bus_if[i].rsp_ready;
end
VX_mem_bus_if #(
.DATA_SIZE (ICACHE_WORD_SIZE),
.TAG_WIDTH (ICACHE_TAG_WIDTH)
) icache_bus_if();
assign icache_req_valid = icache_bus_if.req_valid;
assign icache_req_rw = icache_bus_if.req_data.rw;
assign icache_req_byteen = icache_bus_if.req_data.byteen;
assign icache_req_addr = icache_bus_if.req_data.addr;
assign icache_req_data = icache_bus_if.req_data.data;
assign icache_req_tag = icache_bus_if.req_data.tag;
assign icache_bus_if.req_ready = icache_req_ready;
assign icache_bus_if.rsp_valid = icache_rsp_valid;
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;
assign icache_bus_if.rsp_data.data = icache_rsp_data;
assign icache_rsp_ready = icache_bus_if.rsp_ready;
`ifdef PERF_ENABLE
VX_mem_perf_if mem_perf_if();
`endif
`ifdef SCOPE
wire [0:0] scope_reset_w = 1'b0;
wire [0:0] scope_bus_in_w = 1'b0;
wire [0:0] scope_bus_out_w;
`UNUSED_VAR (scope_bus_out_w)
`endif
VX_core #(
.CORE_ID (CORE_ID)
) core (
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.mem_perf_if (mem_perf_if),
`endif
.dcr_bus_if (dcr_bus_if),
.dcache_bus_if (dcache_bus_if),
.icache_bus_if (icache_bus_if),
`ifdef GBAR_ENABLE
.gbar_bus_if (gbar_bus_if),
`endif
.sim_ebreak (sim_ebreak),
.sim_wb_value (sim_wb_value),
.busy (busy)
);
endmodule

View file

@ -16,14 +16,14 @@
`TRACING_OFF
module VX_mem_scheduler #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_REQS = 4,
parameter NUM_BANKS = 4,
parameter NUM_REQS = 1,
parameter NUM_BANKS = 1,
parameter ADDR_WIDTH = 32,
parameter DATA_WIDTH = 32,
parameter TAG_WIDTH = 32,
parameter TAG_WIDTH = 8,
parameter MEM_TAG_ID = 0, // upper section of the tag sent to the memory interface
parameter UUID_WIDTH = 0, // upper section of the mem_tag_id containing the UUID
parameter QUEUE_SIZE = 16,
parameter QUEUE_SIZE = 8,
parameter RSP_PARTIAL = 0,
parameter CORE_OUT_REG = 0,
parameter MEM_OUT_REG = 0,

Binary file not shown.

View file

@ -1,45 +0,0 @@
PARAMS += -DCACHE_SIZE=4096 -DCACHE_WORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DCACHE_NUM_BANKS=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4
TOP = VX_cache
# control RTL debug tracing states
DBG_TRACE_FLAGS = -DDBG_TRACE_CORE_PIPELINE \
-DDBG_TRACE_CORE_ICACHE \
-DDBG_TRACE_CORE_DCACHE \
-DDBG_TRACE_CORE_MEM \
-DDBG_TRACE_CACHE_BANK \
-DDBG_TRACE_CACHE_SNP \
-DDBG_TRACE_CACHE_MSHR \
-DDBG_TRACE_CACHE_TAG \
-DDBG_TRACE_CACHE_DATA \
-DDBG_TRACE_AFU
#DBG_PRINT=$(DBG_TRACE_FLAGS)
INCLUDE = -I../../rtl/ -I../../rtl/cache -I../../rtl/libs
SRCS = cachesim.cpp testbench.cpp
all: build
CF += -std=c++11 -fms-extensions -I../..
CF += $(PARAMS)
VF += --language 1800-2009 --assert -Wall --trace #-Wpedantic
VF += -Wno-DECLFILENAME
VF += --x-initial unique
VF += -exe $(SRCS) $(INCLUDE)
VF += $(PARAMS)
gen:
verilator $(VF) -cc $(TOP).sv -CFLAGS '$(CF)' --exe $(SRCS)
build: gen
(cd obj_dir && make -j -f V$(TOP).mk)
run: build
(cd obj_dir && ./V$(TOP))
clean:
rm -rf obj_dir

View file

@ -1,94 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <array>
#include <cstdint>
#include "verilated.h"
#ifdef VM_TRACE
#include <verilated_vcd_c.h> // Trace file format header
#endif
template <typename T>
class vl_simulator {
private:
T top_;
#ifdef VM_TRACE
VerilatedVcdC tfp_;
#endif
public:
vl_simulator() {
top_.clk = 0;
top_.reset = 0;
#ifdef VM_TRACE
Verilated::traceEverOn(true);
top_.trace(&tfp_, 99);
tfp_.open("trace.vcd");
#endif
}
~vl_simulator() {
#ifdef VM_TRACE
tfp_.close();
#endif
top_.final();
}
uint64_t reset(uint64_t ticks) {
top_.reset = 1;
ticks = this->step(ticks, 2);
top_.reset = 0;
return ticks;
}
uint64_t step(uint64_t ticks, uint32_t count = 1) {
while (count--) {
top_.eval();
#ifdef VM_TRACE
tfp_.dump(ticks);
#endif
top_.clk = !top_.clk;
++ticks;
}
return ticks;
}
T* operator->() {
return &top_;
}
};
template <typename... Args>
void vl_setw(uint32_t* sig, Args&&... args) {
std::array<uint32_t, sizeof... (Args)> arr{static_cast<uint32_t>(std::forward<Args>(args))...};
for (size_t i = 0; i < sizeof... (Args); ++i) {
sig[i] = arr[i];
}
}
template <typename... Args>
int vl_cmpw(const uint32_t* sig, Args&&... args) {
std::array<uint32_t, sizeof... (Args)> arr{static_cast<uint32_t>(std::forward<Args>(args))...};
for (size_t i = 0; i < sizeof... (Args); ++i) {
if (sig[i] < arr[i])
return -1;
if (sig[i] > arr[i])
return 1;
}
return 0;
}

View file

@ -1,30 +0,0 @@
TOP = VX_fifo_queue
PARAMS ?=
INCLUDE = -I../../rtl/ -I../../rtl/libs
SRCS = main.cpp
all: build
CF += -std=c++11 -fms-extensions -I../..
VF += $(PARAMS)
VF += --language 1800-2009 --assert -Wall --trace
VF += -Wno-DECLFILENAME
VF += --x-initial unique
VF += -exe $(SRCS) $(INCLUDE)
VF += $(PARAMS)
gen:
verilator $(VF) -cc $(TOP).v -CFLAGS '$(CF)' --exe $(SRCS)
build: gen
(cd obj_dir && make -j -f V$(TOP).mk)
run: build
(cd obj_dir && ./V$(TOP))
clean:
rm -rf obj_dir

View file

@ -1,77 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`timescale 1ns/1ns
`include "VX_fifo_queue.v"
`define check(x, y) if ((x == y) !== 1) if ((x == y) === 0) $error("x=%h, expected=%h", x, y); else $warning("x=%h, expected=%h", x, y)
module testbench();
reg clk;
reg reset;
reg[3:0] data_in;
reg push;
reg pop;
wire[3:0] data_out;
wire full;
wire empty;
VX_fifo_queue #(
.DATAW (4),
.DEPTH (4)
) dut (
.clk(clk),
.reset(reset),
.data_in(data_in),
.push(push),
.pop(pop),
.data_out(data_out),
.empty(empty),
.full(full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_VAR (size)
);
always begin
#1 clk = !clk;
end
initial begin
$monitor ("%d: clk=%b rst=%b push=%b, pop=%b, din=%h, empty=%b, full=%b, dout=%h",
$time, clk, reset, push, pop, data_in, empty, full, data_out);
#0 clk=0; reset=1; pop=0; push=0;
#2 reset=0; data_in=4'ha; pop=0; push=1;
#2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0);
#0 data_in=4'hb;
#2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0);
#0 data_in=4'hc;
#2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0);
#0 data_in=4'hd;
#2 `check(full, 1); `check(data_out, 4'ha); `check(empty, 0);
#0 push=0; pop=1;
#2 `check(full, 0); `check(data_out, 4'hb); `check(empty, 0);
#2 `check(full, 0); `check(data_out, 4'hc); `check(empty, 0);
#2 `check(full, 0); `check(data_out, 4'hd); `check(empty, 0);
#2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 1);
#0 data_in=4'he; push=1; pop=0;
#2 `check(full, 0); `check(data_out, 4'he); `check(empty, 0);
#0 data_in=4'hf; pop=1;
#2 `check(full, 0); `check(data_out, 4'hf); `check(empty, 0);
#0 push=0;
#2 `check(full, 0); `check(data_out, 4'hc); `check(empty, 1);
#1 $finish;
end
endmodule

View file

@ -1,65 +0,0 @@
#---------------------------------------------------------
# Makefile to compile and test the memory stream unit
#---------------------------------------------------------
TOP = VX_mem_scheduler
PARAMS += -GNUM_REQS=4 -GADDRW=8 -GDATAW=8 -GTAGW=8 -GWORD_SIZE=1 -GQUEUE_SIZE=4
ifdef RSP_PARTIAL
PARAMS += -GRSP_PARTIAL=$(RSP_PARTIAL)
endif
ifdef DUPLICATE_ADDR
PARAMS += -GDUPLICATE_ADDR=$(DUPLICATE_ADDR)
endif
RTL_DIR = ../../../rtl
DPI_DIR = ../../../dpi
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(DPI_DIR)
VERILATOR_ROOT = /opt/verilator
VERILATOR ?= $(VERILATOR_ROOT)/bin/verilator
VL_FLAGS += --exe --cc $(TOP).sv --top-module $(TOP)
VL_FLAGS += --language 1800-2009 --assert -Wall
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += --trace
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(PARAMS)
SRCS += memsim.cpp ram.cpp $(DPI_DIR)/util_dpi.cpp
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -Wno-maybe-uninitialized
CXXFLAGS += -I../../../../dpi -I../../../../
default: run
gen: $(SRCS)
@echo
@echo "### VERILATE ###"
$(VERILATOR) $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)'
build: gen
@echo
@echo "### BUILD ###"
$(MAKE) -C obj_dir -j 4 -f V$(TOP).mk
run: build
@echo
@echo "### RUN ###"
obj_dir/V$(TOP)
waves: trace.vcd
@echo
@echo "### TRACE ###"
gtkwave -o trace.vcd
clean:
@echo
@echo "### CLEAN ###"
-rm -rf obj_dir *.vcd *.log
#---------------------------------------------------------

17
hw/unittest/Makefile Normal file
View file

@ -0,0 +1,17 @@
all:
$(MAKE) -C cache
$(MAKE) -C generic_queue
$(MAKE) -C mem_streamer
$(MAKE) -C top_modules
run:
$(MAKE) -C cache run
$(MAKE) -C generic_queue run
$(MAKE) -C mem_streamer run
$(MAKE) -C top_modules run
clean:
$(MAKE) -C cache clean
$(MAKE) -C generic_queue clean
$(MAKE) -C mem_streamer clean
$(MAKE) -C top_modules clean

77
hw/unittest/cache/Makefile vendored Normal file
View file

@ -0,0 +1,77 @@
DESTDIR ?= .
RTL_DIR = ../../rtl
DPI_DIR = ../../dpi
CONFIGS +=
PARAMS +=
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
CXXFLAGS += $(CONFIGS)
LDFLAGS +=
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
SRCS = cachesim.cpp testbench.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp
TOP = VX_cache_top
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(PARAMS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = cache
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
run: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT)
waves: trace.vcd
gtkwave -o trace.vcd
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT)

View file

@ -16,36 +16,58 @@
#include <iomanip>
#include <iostream>
#include <vector>
#include <bitset>
#include <bitset>
uint64_t timestamp = 0;
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
static uint64_t timestamp = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
CacheSim::CacheSim() {
// force random values for uninitialized signals
Verilated::randReset(2);
ram_ = nullptr;
cache_ = new VVX_cache();
cache_ = new VVX_cache_top();
mem_rsp_active_ = false;
snp_req_active_ = false;
//#ifdef VCD_OUTPUT
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC;
cache_->trace(trace_, 99);
trace_->open("trace.vcd");
//#endif
#endif
}
CacheSim::~CacheSim() {
//#ifdef VCD_OUTPUT
#ifdef VCD_OUTPUT
trace_->close();
//#endif
#endif
delete cache_;
//need to delete the req and rsp vectors
}
@ -88,9 +110,9 @@ void CacheSim::step() {
void CacheSim::eval() {
cache_->eval();
//#ifdef VCD_OUTPUT
#ifdef VCD_OUTPUT
trace_->dump(timestamp);
//#endif
#endif
++timestamp;
}
@ -128,7 +150,6 @@ void CacheSim::clear_req(){
cache_->core_req_valid = 0;
}
void CacheSim::send_req(core_req_t *req){
core_req_vec_.push(req);
unsigned int *data = new unsigned int[4];
@ -221,7 +242,7 @@ void CacheSim::eval_mem_bus() {
cache_->mem_rsp_valid = 1;
//copy data from the rsp queue to the cache module
memcpy((uint8_t*)cache_->mem_rsp_data, mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE);
memcpy(cache_->mem_rsp_data.data(), mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE);
cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag;
free(mem_rsp_vec_[dequeue_index].data); //take data out of the queue
@ -249,7 +270,7 @@ void CacheSim::eval_mem_bus() {
if (cache_->mem_req_rw) { //write = 1
uint64_t byteen = cache_->mem_req_byteen;
uint64_t base_addr = (cache_->mem_req_addr * MEM_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(cache_->mem_req_data);
uint8_t* data = reinterpret_cast<uint8_t*>(cache_->mem_req_data.data());
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
@ -330,4 +351,3 @@ void CacheSim::get_mem_rsp(){
std::cout << std::hex << "mem_rsp_tag: " << cache_->mem_rsp_tag << std::endl;
std::cout << std::hex << "mem_rsp_ready: " << cache_->mem_rsp_ready << std::endl;
}

View file

@ -13,15 +13,15 @@
#pragma once
#include "VVX_cache.h"
#include "VVX_cache__Syms.h"
#include "VVX_cache_top.h"
#include "VVX_cache_top__Syms.h"
#include "verilated.h"
//#ifdef VCD_OUTPUT
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
//#endif
#endif
//#include <VX_config.h>
#include <VX_config.h>
#include "ram.h"
#include <ostream>
#include <vector>
@ -97,9 +97,9 @@ private:
uint32_t snp_req_size_;
uint32_t pending_snp_reqs_;
VVX_cache *cache_;
VVX_cache_top *cache_;
RAM *ram_;
//#ifdef VCD_OUTPUT
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
//#endif
#endif
};

View file

@ -54,6 +54,8 @@ int REQ_RSP(CacheSim *sim){ //verified
int check = sim->assert_equal(data, write->tag);
if (check == 4) return 1;
return 0;
}
int HIT_1(CacheSim *sim){
@ -242,7 +244,5 @@ int main(int argc, char **argv)
std::cout << "FAILED" << std::endl;
}
return 0;
}

View file

@ -17,7 +17,7 @@
#include <cstdint>
#include "verilated.h"
#ifdef VM_TRACE
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h> // Trace file format header
#endif
@ -26,7 +26,7 @@ class vl_simulator {
private:
T top_;
#ifdef VM_TRACE
#ifdef VCD_OUTPUT
VerilatedVcdC tfp_;
#endif
@ -35,7 +35,7 @@ public:
vl_simulator() {
top_.clk = 0;
top_.reset = 0;
#ifdef VM_TRACE
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
top_.trace(&tfp_, 99);
tfp_.open("trace.vcd");
@ -43,7 +43,7 @@ public:
}
~vl_simulator() {
#ifdef VM_TRACE
#ifdef VCD_OUTPUT
tfp_.close();
#endif
top_.final();
@ -59,7 +59,7 @@ public:
uint64_t step(uint64_t ticks, uint32_t count = 1) {
while (count--) {
top_.eval();
#ifdef VM_TRACE
#ifdef VCD_OUTPUT
tfp_.dump(ticks);
#endif
top_.clk = !top_.clk;

View file

@ -0,0 +1,65 @@
DESTDIR ?= .
RTL_DIR = ../../rtl
DPI_DIR = ../../dpi
CONFIGS +=
PARAMS +=
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
CXXFLAGS += $(CONFIGS)
LDFLAGS +=
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
RTL_PKGS +=
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
SRCS = main.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp
TOP = VX_fifo_queue
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(PARAMS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = generic_queue
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
run: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT)
waves: trace.vcd
gtkwave -o trace.vcd
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT)

View file

@ -17,6 +17,14 @@
#define MAX_TICKS 20
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
#define CHECK(x) \
do { \
if (x) \
@ -25,10 +33,24 @@
std::abort(); \
} while (false)
uint64_t ticks = 0;
static uint64_t timestamp = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
double sc_time_stamp() {
return ticks;
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
using Device = VVX_fifo_queue;
@ -40,14 +62,14 @@ int main(int argc, char **argv) {
vl_simulator<Device> sim;
// run test
ticks = sim.reset(0);
while (ticks < MAX_TICKS) {
switch (ticks) {
timestamp = sim.reset(0);
while (timestamp < MAX_TICKS) {
switch (timestamp) {
case 0:
// initial values
sim->pop = 0;
sim->push = 0;
ticks = sim.step(ticks, 2);
timestamp = sim.step(timestamp, 2);
break;
case 2:
// Verify outputs
@ -96,11 +118,11 @@ int main(int argc, char **argv) {
}
// advance clock
ticks = sim.step(ticks, 2);
timestamp = sim.step(timestamp, 2);
}
std::cout << "PASSED!" << std::endl;
std::cout << "Simulation time: " << std::dec << ticks/2 << " cycles" << std::endl;
std::cout << "Simulation time: " << std::dec << timestamp/2 << " cycles" << std::endl;
return 0;
}

View file

@ -0,0 +1,65 @@
DESTDIR ?= .
RTL_DIR = ../../rtl
DPI_DIR = ../../dpi
CONFIGS +=
PARAMS +=
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
CXXFLAGS += $(CONFIGS)
LDFLAGS +=
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
RTL_PKGS +=
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
SRCS = memsim.cpp ram.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp
TOP = VX_mem_scheduler
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(PARAMS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = mem_streamer
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
run: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT)
waves: trace.vcd
gtkwave -o trace.vcd
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT)

View file

@ -15,6 +15,14 @@
#include "memsim.h"
#include "ram.h"
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
static bool trace_enabled = false;
static uint64_t trace_start_time = 0;
static uint64_t trace_stop_time = -1ull;
@ -35,15 +43,11 @@ void sim_trace_enable (bool enable) {
trace_enabled = enable;
}
//////////////////////////////////////////////////////
int generate_rand (int min, int max) {
int range = max - min + 1;
return rand() % range + min;
}
//////////////////////////////////////////////////////
int generate_rand_mask (int mask) {
int result = 0;
int m = mask;
@ -56,34 +60,34 @@ int generate_rand_mask (int mask) {
return result;
}
//////////////////////////////////////////////////////
MemSim::MemSim() {
msu_ = new VVX_mem_streamer();
msu_ = new VVX_mem_scheduler();
// Enable tracing
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC;
msu_->trace(trace_, 99);
trace_->open("trace.vcd");
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC;
cache_->trace(trace_, 99);
race_->open("trace.vcd");
#endif
}
//////////////////////////////////////////////////////
MemSim::~MemSim() {
#ifdef VCD_OUTPUT
trace_->close();
#endif
delete msu_;
}
//////////////////////////////////////////////////////
void MemSim::eval() {
msu_->eval();
#ifdef VCD_OUTPUT
trace_->dump(timestamp++);
#endif
}
//////////////////////////////////////////////////////
void MemSim::step() {
msu_->clk = 0;
this->eval();
@ -92,19 +96,14 @@ void MemSim::step() {
this->eval();
}
//////////////////////////////////////////////////////
void MemSim::reset() {
msu_->reset = 1;
this->step();
msu_->reset = 0;
this->step();
}
//////////////////////////////////////////////////////
void MemSim::attach_core() {
if (msu_->req_ready) {
msu_->req_valid = generate_rand(0, 1);
@ -118,8 +117,6 @@ void MemSim::attach_core() {
msu_->rsp_ready = true;
}
//////////////////////////////////////////////////////
void MemSim::attach_ram (RAM *ram) {
req_t req;
@ -137,7 +134,6 @@ void MemSim::attach_ram (RAM *ram) {
rsp = ram->schedule_rsp();
msu_->mem_rsp_valid = rsp.valid;
msu_->mem_rsp_mask = rsp.mask;
msu_->mem_rsp_data = rsp.data;
msu_->mem_rsp_tag = rsp.tag;
rsp.ready = msu_->mem_rsp_ready;
@ -146,8 +142,6 @@ void MemSim::attach_ram (RAM *ram) {
ram->halt_rsp(rsp);
}
//////////////////////////////////////////////////////
void MemSim::run(RAM *ram) {
this->reset();
@ -160,8 +154,6 @@ void MemSim::run(RAM *ram) {
}
}
//////////////////////////////////////////////////////
int main (int argc, char** argv, char** env) {
Verilated::commandArgs(argc, argv);
@ -172,5 +164,3 @@ int main (int argc, char** argv, char** env) {
return 0;
}
//////////////////////////////////////////////////////

View file

@ -18,8 +18,8 @@
#include <vector>
#include <verilated.h>
#include <verilated_vcd_c.h>
#include "VVX_mem_streamer.h"
#include "VVX_mem_streamer__Syms.h"
#include "VVX_mem_scheduler.h"
#include "VVX_mem_scheduler__Syms.h"
#include "ram.h"
#define SIM_TIME 5000
@ -28,20 +28,22 @@ int generate_rand (int min, int max);
int generate_rand_mask (int mask);
class MemSim {
private:
VVX_mem_streamer *msu_;
VerilatedVcdC *trace_;
public:
MemSim();
virtual ~MemSim();
void eval();
void step();
void reset();
void run(RAM *ram);
void attach_core();
void attach_ram(RAM *ram);
private:
VVX_mem_scheduler *msu_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif
public:
MemSim();
virtual ~MemSim();
void eval();
void step();
void reset();
void run(RAM *ram);
void attach_core();
void attach_ram(RAM *ram);
};

View file

@ -21,8 +21,6 @@ RAM::RAM() {
is_rsp_stall_ = false;
}
//////////////////////////////////////////////////////
bool RAM::check_duplicate_req(req_t req) {
for(int i = 0; i < ram_.size(); i++) {
if (ram_[i].addr == req.addr) {
@ -33,8 +31,6 @@ bool RAM::check_duplicate_req(req_t req) {
return false;
}
//////////////////////////////////////////////////////
int RAM::simulate_cycle_delay() {
std::cout<<"RAM: # entries: "<<ram_.size()<<std::endl;
@ -57,8 +53,6 @@ int RAM::simulate_cycle_delay() {
return dequeue_index;
}
//////////////////////////////////////////////////////
void RAM::insert_req(req_t req) {
if ( !(this->check_duplicate_req(req)) && req.valid && !req.rw) {
req_t r;
@ -78,15 +72,11 @@ void RAM::insert_req(req_t req) {
}
}
//////////////////////////////////////////////////////
uint8_t RAM::is_ready() {
// return generate_rand(0b1000, 0b1111);
return 0b1111;
}
//////////////////////////////////////////////////////
rsp_t RAM::schedule_rsp() {
rsp_t rsp;
int dequeue_index = this->simulate_cycle_delay();
@ -124,8 +114,6 @@ rsp_t RAM::schedule_rsp() {
return rsp;
}
//////////////////////////////////////////////////////
// Schedule response for only one cycle
void RAM::halt_rsp(rsp_t rsp) {
if (is_rsp_active_ && rsp.valid && rsp.ready) {
@ -133,5 +121,3 @@ void RAM::halt_rsp(rsp_t rsp) {
is_rsp_active_ = false;
}
}
//////////////////////////////////////////////////////

View file

@ -0,0 +1,68 @@
DESTDIR ?= .
RTL_DIR = ../../rtl
DPI_DIR = ../../dpi
CONFIGS +=
PARAMS +=
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
CXXFLAGS += $(CONFIGS)
LDFLAGS +=
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core
SRCS = main.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(PARAMS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = top_modules
all: build
build: $(SRCS)
verilator --build $(VL_FLAGS) --cc VX_cache_cluster_top --top-module VX_cache_cluster_top $^ -CFLAGS '$(CXXFLAGS)'
verilator --build $(VL_FLAGS) --cc VX_cache_top --top-module VX_cache_top $^ -CFLAGS '$(CXXFLAGS)'
verilator --build $(VL_FLAGS) --cc VX_core_top --top-module VX_core_top $^ -CFLAGS '$(CXXFLAGS)'
run:
waves:
clean:
rm -rf obj_dir

View file

@ -0,0 +1,49 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "vl_simulator.h"
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
static uint64_t timestamp = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
int main(int argc, char **argv) {
// Initialize Verilators variables
Verilated::commandArgs(argc, argv);
return 0;
}