adding dogfood unit test

2025-04-23 13:27:29 -04:00 · 2020-08-07 12:13:34 -04:00 · 2020-08-07 12:13:34 -04:00 · e336d401ea
commit e336d401ea
parent aef5743846
19 changed files with 1267 additions and 2180 deletions
--- a/driver/tests/basic/Makefile
+++ b/driver/tests/basic/Makefile
@ -1,6 +1,8 @@
 RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
 VORTEX_RT_PATH ?= $(wildcard ../../../runtime)

+OPTS ?= -n256
+
 VX_CC  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
 VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
 VX_DP  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
@ -38,16 +40,16 @@ $(PROJECT): $(SRCS)
 	$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@

 run-fpga: $(PROJECT)
-	LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256
+	LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)

 run-ase: $(PROJECT)
-	ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256
+	ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)

 run-rtlsim: $(PROJECT)
-	LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256
+	LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)

 run-simx: $(PROJECT)
-	LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256
+	LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)

 .depend: $(SRCS)
 	$(CXX) $(CXXFLAGS) -MM $^ > .depend;
--- a/driver/tests/basic/basic.cpp
+++ b/driver/tests/basic/basic.cpp
@ -14,6 +14,8 @@
     exit(-1);                                                  \
   } while (false)

+///////////////////////////////////////////////////////////////////////////////
+
 const char* kernel_file = "kernel.bin";
 int test = -1;
 uint32_t count = 0;
--- a/driver/tests/demo/Makefile
+++ b/driver/tests/demo/Makefile
@ -1,6 +1,8 @@
 RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
 VORTEX_RT_PATH ?= $(wildcard ../../../runtime)

+OPTS ?= -n64
+
 VX_CC  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
 VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
 VX_DP  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
@ -36,16 +38,16 @@ $(PROJECT): $(SRCS)
 	$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@

 run-fpga: $(PROJECT)
-	LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64
+	LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)

 run-ase: $(PROJECT)
-	ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64
+	ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)

 run-rtlsim: $(PROJECT)
-	LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64
+	LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
 	
 run-simx: $(PROJECT)
-	LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64
+	LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)

 .depend: $(SRCS)
 	$(CXX) $(CXXFLAGS) -MM $^ > .depend;
--- a/driver/tests/demo/demo.cpp
+++ b/driver/tests/demo/demo.cpp
@ -14,6 +14,8 @@
     exit(-1);                                                  \
   } while (false)

+///////////////////////////////////////////////////////////////////////////////
+
 const char* kernel_file = "kernel.bin";
 uint32_t count = 0;

--- a/driver/tests/dogfood/Makefile
+++ b/driver/tests/dogfood/Makefile
@ -0,0 +1,64 @@
+RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
+VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
+
+OPTS ?= -n64
+
+VX_CC  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
+VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
+VX_DP  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
+VX_CP  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
+
+VX_CFLAGS += -march=rv32imf -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
+VX_CFLAGS += -I$(VORTEX_RT_PATH)/include
+
+VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a
+VX_LDFLAGS += -lm
+
+VX_SRCS = kernel.c
+
+CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
+
+CXXFLAGS += -I../../include
+
+PROJECT = dogfood
+
+SRCS = dogfood.cpp
+
+all: $(PROJECT) kernel.bin kernel.dump
+ 
+kernel.dump: kernel.elf
+	$(VX_DP) -D kernel.elf > kernel.dump
+
+kernel.bin: kernel.elf
+	$(VX_CP) -O binary kernel.elf kernel.bin
+
+kernel.elf: $(VX_SRCS)
+	$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
+
+$(PROJECT): $(SRCS)
+	$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
+
+run-fpga: $(PROJECT)
+	LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
+
+run-ase: $(PROJECT)
+	ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
+
+run-rtlsim: $(PROJECT)
+	LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
+	
+run-simx: $(PROJECT)
+	LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
+
+.depend: $(SRCS)
+	$(CXX) $(CXXFLAGS) -MM $^ > .depend;
+
+clean:
+	rm -rf $(PROJECT) *.o .depend
+
+clean-all:
+	rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend
+
+ifneq ($(MAKECMDGOALS),clean)
+    -include .depend
+endif
--- a/driver/tests/dogfood/Memcpy/hw/rtl/_hdr
+++ b/driver/tests/dogfood/Memcpy/hw/rtl/_hdr
@ -1,603 +0,0 @@
-//
-// Copyright (c) 2017, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// Redistributions of source code must retain the above copyright notice, this
-// list of conditions and the following disclaimer.
-//
-// Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// Neither the name of the Intel Corporation nor the names of its contributors
-// may be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-
-
-// Read from the memory locations first and then write to the memory locations
-
-`include "platform_if.vh"
-`include "afu_json_info.vh"
-
-
-module ccip_std_afu
-   (
-    // CCI-P Clocks and Resets
-    input           logic             pClk,              // 400MHz - CCI-P clock domain. Primary interface clock
-    input           logic             pClkDiv2,          // 200MHz - CCI-P clock domain.
-    input           logic             pClkDiv4,          // 100MHz - CCI-P clock domain.
-    input           logic             uClk_usr,          // User clock domain. Refer to clock programming guide  ** Currently provides fixed 300MHz clock **
-    input           logic             uClk_usrDiv2,      // User clock domain. Half the programmed frequency  ** Currently provides fixed 150MHz clock **
-    input           logic             pck_cp2af_softReset,      // CCI-P ACTIVE HIGH Soft Reset
-    input           logic [1:0]       pck_cp2af_pwrState,       // CCI-P AFU Power State
-    input           logic             pck_cp2af_error,          // CCI-P Protocol Error Detected
-
-    // Interface structures
-    input           t_if_ccip_Rx      pck_cp2af_sRx,        // CCI-P Rx Port
-    output          t_if_ccip_Tx      pck_af2cp_sTx         // CCI-P Tx Port
-    );
-
-
-    //
-    // Run the entire design at the standard CCI-P frequency (400 MHz).
-    //
-    logic clk;
-    assign clk = pClk;
-
-    logic reset;
-    assign reset = pck_cp2af_softReset;
-
-    logic [511:0] wr_data;
-    logic [511:0] rd_data;
-
-    logic get_write_addr;
-    logic do_update;
-    logic rd_end_of_list;
-    logic rd_needed;
-    logic wr_needed;
-    logic [15:0] cnt_list_length;
-
-    // =========================================================================
-    //
-    //   Register requests.
-    //
-    // =========================================================================
-
-    //
-    // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
-    // registered.  Here we register pck_cp2af_sRx and assign it to sRx.
-    // We also assign pck_af2cp_sTx to sTx here but don't register it.
-    // The code below never uses combinational logic to write sTx.
-    //
-
-    t_if_ccip_Rx sRx;
-    always_ff @(posedge clk)
-    begin
-        sRx <= pck_cp2af_sRx;
-    end
-
-    t_if_ccip_Tx sTx;
-    assign pck_af2cp_sTx = sTx;
-
-
-    // =========================================================================
-    //
-    //   CSR (MMIO) handling.
-    //
-    // =========================================================================
-
-    // The AFU ID is a unique ID for a given program.  Here we generated
-    // one with the "uuidgen" program and stored it in the AFU's JSON file.
-    // ASE and synthesis setup scripts automatically invoke afu_json_mgr
-    // to extract the UUID into afu_json_info.vh.
-    logic [127:0] afu_id = `AFU_ACCEL_UUID;
-
-    //
-    // A valid AFU must implement a device feature list, starting at MMIO
-    // address 0.  Every entry in the feature list begins with 5 64-bit
-    // words: a device feature header, two AFU UUID words and two reserved
-    // words.
-    //
-
-    // Is a CSR read request active this cycle?
-    logic is_csr_read;
-    assign is_csr_read = sRx.c0.mmioRdValid;
-
-    // Is a CSR write request active this cycle?
-    logic is_csr_write;
-    assign is_csr_write = sRx.c0.mmioWrValid;
-
-    // The MMIO request header is overlayed on the normal c0 memory read
-    // response data structure.  Cast the c0Rx header to an MMIO request
-    // header.
-    t_ccip_c0_ReqMmioHdr mmio_req_hdr;
-    assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
-
-
-    //
-    // Implement the device feature list by responding to MMIO reads.
-    //
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c2.mmioRdValid <= 1'b0;
-        end
-        else
-        begin
-            // Always respond with something for every read request
-            sTx.c2.mmioRdValid <= is_csr_read;
-
-            // The unique transaction ID matches responses to requests
-            sTx.c2.hdr.tid <= mmio_req_hdr.tid;
-
-            // Addresses are of 32-bit objects in MMIO space.  Addresses
-            // of 64-bit objects are thus multiples of 2.
-            case (mmio_req_hdr.address)
-              0: // AFU DFH (device feature header)
-                begin
-                    // Here we define a trivial feature list.  In this
-                    // example, our AFU is the only entry in this list.
-                    sTx.c2.data <= t_ccip_mmioData'(0);
-                    // Feature type is AFU
-                    sTx.c2.data[63:60] <= 4'h1;
-                    // End of list (last entry in list)
-                    sTx.c2.data[40] <= 1'b1;
-                end
-
-              // AFU_ID_L
-              2: sTx.c2.data <= afu_id[63:0];
-
-              // AFU_ID_H
-              4: sTx.c2.data <= afu_id[127:64];
-
-              // DFH_RSVD0
-              6: sTx.c2.data <= t_ccip_mmioData'(0);
-
-              // DFH_RSVD1
-              8: sTx.c2.data <= t_ccip_mmioData'(0);
-
-              default: sTx.c2.data <= t_ccip_mmioData'(0);
-            endcase
-        end
-    end
-
-
-    //
-    // CSR write handling.  Host software must tell the AFU the memory address
-    // to which it should be writing.  The address is set by writing a CSR.
-    //
-
-    // We use MMIO address 0 to set the memory address.  The read and
-    // write MMIO spaces are logically separate so we are free to use
-    // whatever we like.  This may not be good practice for cleanly
-    // organizing the MMIO address space, but it is legal.
-    logic is_mem_addr_csr_write;
-    assign is_mem_addr_csr_write = get_write_addr && is_csr_write &&
-                                   (mmio_req_hdr.address == t_ccip_mmioAddr'(0));
-
-    // Memory address to which this AFU will write.
-    t_ccip_clAddr write_mem_addr;
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            get_write_addr <= 1'b1;
-        end
-	else if (is_mem_addr_csr_write)
-        begin
-            write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
-	    get_write_addr <= 1'b0;
-        end
-    end
-    
-
-    // We use MMIO address 0 to set the memory address for reading data.
-    logic is_mem_addr_csr_read;
-    assign is_mem_addr_csr_read = !get_write_addr && is_csr_write &&
-                                   (mmio_req_hdr.address == t_ccip_mmioAddr'(0));
-
-    // Memory address from which this AFU will read.
-    logic start_read;
-    t_ccip_clAddr read_mem_addr;
-
-    //logic start_traversal = 'b0;
-    //t_ccip_clAddr start_traversal_addr;
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-	    start_read <= 1'b0;
-        end
-        else if (is_mem_addr_csr_read)
-        begin
-            read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
-	    start_read <= 'b1;
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Main AFU logic
-    //
-    // =========================================================================
-
-    //
-    // States in our simple example.
-    //
-    //typedef enum logic [0:0]
-    typedef enum logic [1:0]
-    {
-	STATE_IDLE,
-        STATE_READ,
-        STATE_UPDATE,
-        STATE_WRITE
-    }
-    t_state;
-
-    t_state state;
-
-    //
-    // State machine
-    //
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            state <= STATE_IDLE;
-	    rd_end_of_list <= 1'b0;
-        end
-        else
-        begin
-            case (state)
-              STATE_IDLE:
-                begin
-                    // Traversal begins when CSR 1 is written
-                    if (start_read)
-                    begin
-                        state <= STATE_READ;
-                        $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
-                    end
-                end
-
-              STATE_READ:
-                begin
-                    if (rd_needed)
-                    begin
-		    // Read data from the address and update address
-		    	state <= STATE_UPDATE;
-		    	start_read <= 'b0;
-                    	$display("AFU reading data and pointing to next read address...");
-                    end
-                end
-
-              STATE_UPDATE:
-                begin
-		    // Update the read value to be written back
-                    if (do_update)
-		    begin
-		    	state <= STATE_WRITE;
-                    	$display("AFU performing comutations on the read values...");
-		    end
-                end
-
-              STATE_WRITE:
-                begin
-		    // Write the updated value to the address
-		    // Point to new address after that
-		    // if done then point to IDLE; else read new values 
-                    if (rd_end_of_list)
-		    begin
-			state <= STATE_IDLE;
-			$display("AFU done...");
-		    end
-                    else
-		    begin
-			if (wr_needed)
-		    	begin
-			    state <= STATE_READ;
-			    $display("AFU reading again from read address...");
-		    	end
-		    end
-                end
-            endcase
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Read logic.
-    //
-    // =========================================================================
-
-    //
-    // READ REQUEST
-    //
-
-    // Did a write response just arrive
-    logic addr_next_valid;
-
-    // Next read address
-    t_ccip_clAddr addr_next;
-
-    always_ff @(posedge clk)
-    begin
-	// Next read address is valid when we have got the write response back
-	// and channel is not full
-        //addr_next_valid <= sRx.c0TxAlmFull; 
-        addr_next_valid <= sRx.c1.rspValid;
-
-        // Next address is current address plus address length
-	// Apurve 
-        //addr_next <= addr_next + addr_size;
-        addr_next <= addr_next + 0;
-
-        // End of list reached if we have read 10 times
-        rd_end_of_list <= (cnt_list_length == 'h10);
-    end
-
-    //
-    // Since back pressure may prevent an immediate read request, we must
-    // record whether a read is needed and hold it until the request can
-    // be sent to the FIU.
-    //
-    t_ccip_clAddr rd_addr;
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            rd_needed <= 1'b0;
-        end
-        else
-        begin
-            // If reads are allowed this cycle then we can safely clear
-            // any previously requested reads.  This simple AFU has only
-            // one read in flight at a time since it is walking a pointer
-            // chain.
-            if (rd_needed)
-            begin
-                rd_needed <= sRx.c0TxAlmFull;
-            end
-            else
-            begin
-                // Need a read under two conditions:
-                //   - Starting a new walk
-                //   - A read response just arrived from a line containing
-                //     a next pointer.
-                rd_needed <= (start_read || (addr_next_valid && ! rd_end_of_list));
-                rd_addr <= (start_read ? read_mem_addr : addr_next);
-            end
-        end
-    end
-
-    //
-    // Emit read requests to the FIU.
-    //
-
-    // Read header defines the request to the FIU
-    t_cci_c0_ReqMemHdr rd_hdr;
-
-    always_comb
-    begin
-        rd_hdr = t_cci_c0_ReqMemHdr'(0);
-
-        // Read request type
-        rd_hdr.req_type = eREQ_RDLINE_I;
-        // Virtual address (MPF virtual addressing is enabled)
-        rd_hdr.address = rd_addr;
-        // Let the FIU pick the channel
-        rd_hdr.vc_sel = eVC_VA;
-        // Read 4 lines (the size of an entry in the list)
-        rd_hdr.cl_len = eCL_LEN_4;
-    end
-
-    // Send read requests to the FIU
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c0.valid <= 1'b0;
-            cnt_list_length <= 0;
-        end
-        else
-        begin
-            // Generate a read request when needed and the FIU isn't full
-            sTx.c0.valid <= (rd_needed && ! sRx.c0TxAlmFull);
-            sTx.c0.hdr <= rd_hdr;
-
-            if (rd_needed && ! sRx.c0TxAlmFull)
-            begin
-                cnt_list_length <= cnt_list_length + 1;
-                //$display("  Reading from VA 0x%x", clAddrToByteAddr(rd_addr));
-                $display("Incrementing read count...");
-            end
-        end
-    end
-
-    //
-    // READ RESPONSE HANDLING
-    //
-
-    //
-    // Receive data (read responses).
-    //
-    always_ff @(posedge clk)
-    begin
-	if (reset)
-	begin
-            do_update <= 1'b0;
-        end
-	else
-	begin
-	    if (state == STATE_READ)
-	    begin
-                rd_data <= sRx.c0.data;
-                do_update <= 1'b1;
-            end
-	    if (state == STATE_UPDATE)
-	    begin
-	        // Update the read data and put it in the write data to be written
-                wr_data <= rd_data + 1;
-                do_update <= 1'b0;
-            end
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Write logic.
-    //
-    // =========================================================================
-
-
-    //
-    // WRITE REQUEST
-    //
-
-    // Did a write response just arrive
-    logic wr_addr_next_valid;
-
-    // Next write address
-    t_ccip_clAddr wr_addr_next;
-
-    always_ff @(posedge clk)
-    begin
-        // Next write address is valid when we have got the read response back
-        // and channel is not full
-        //wr_addr_next_valid <= sRx.c1TxAlmFull; 
-        wr_addr_next_valid <= sRx.c0.rspValid;
-
-        // Next address is current address plus address length
-        // Apurve 
-        //wr_addr_next <= wr_addr_next + addr_size;
-        wr_addr_next <= wr_addr_next + 0;
-    end
-
-    //
-    // Since back pressure may prevent an immediate write request, we must
-    // record whether a write is needed and hold it until the request can
-    // be sent to the FIU.
-    //
-    t_ccip_clAddr wr_addr;
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            wr_needed <= 1'b0;
-        end
-        else
-        begin
-            // If writes are allowed this cycle then we can safely clear
-            // any previously requested writes.  This simple AFU has only
-            // one write in flight at a time since it is walking a pointer
-            // chain.
-            if (wr_needed)
-            begin
-                wr_needed <= sRx.c1TxAlmFull;
-            end
-            else
-            begin
-                // Need a write under two conditions:
-                //   - Starting a new walk
-                //   - A write response just arrived from a line containing
-                //     a next pointer.
-                //wr_needed <= (start_write || (wr_addr_next_valid && ! rd_end_of_list));
-                wr_needed <= (start_write || wr_addr_next_valid);
-                wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
-            end
-        end
-    end
-
-    //
-    // Emit write requests to the FIU.
-    //
-
-    // Write header defines the request to the FIU
-    t_ccip_c1_ReqMemHdr wr_hdr;
-
-    always_comb
-    begin
-        wr_hdr = t_cci_c1_ReqMemHdr'(0);
-
-        // Write request type
-        wr_hdr.req_type = eREQ_RDLINE_I;
-        // Virtual address (MPF virtual addressing is enabled)
-        wr_hdr.address = wr_addr;
-        // Let the FIU pick the channel
-        wr_hdr.vc_sel = eVC_VA;
-        // Write 4 lines (the size of an entry in the list)
-        wr_hdr.cl_len = eCL_LEN_4;
-        // Start of packet is true (single line write)
-        wr_hdr.sop = 1'b1;
-    end
-
-    // Send write requests to the FIU
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c1.valid <= 1'b0;
-            //cnt_list_length <= 0;
-        end
-        else
-        begin
-            // Generate a write request when needed and the FIU isn't full
-            sTx.c1.valid <= (wr_needed && ! sRx.c1TxAlmFull);
-            sTx.c1.hdr <= wr_hdr;
-	    sTx.c1.data = t_ccip_clData'(wr_data);
-
-            //if (wr_needed && ! sRx.c1TxAlmFull)
-            //begin
-            //    cnt_list_length <= cnt_list_length + 1;
-            //    //$display("  Writing from VA 0x%x", clAddrToByteAddr(rd_addr));
-            //    $display("Incrementing write count...");
-            //end
-        end
-    end
-
-    //
-    // WRITE RESPONSE HANDLING
-    //
-
-    // Apurve: Check if a signal is to be sent to read to start reading in case
-    // write response does not work
-    //
-    // Send data (write requests).
-    //
-    //always_ff @(posedge clk)
-    //begin
-    //    if (state == STATE_WRITE)
-    //    begin
-    //        rd_data <= sRx.c0.data;
-    //    end
-    //    if (state == STATE_UPDATE)
-    //    begin
-    //        // Update the write data and put it in the write data to be written
-    //        wr_data <= rd_data + 1;
-    //    end
-    //end
-
-endmodule
--- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json
+++ b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json
@ -1,18 +0,0 @@
-{
-   "version": 1,
-   "afu-image": {
-      "power": 0,
-      "afu-top-interface":
-         {
-            "name": "ccip_std_afu"
-         },
-      "accelerator-clusters":
-         [
-            {
-               "name": "cci_hello",
-               "total-contexts": 1,
-               "accelerator-type-uuid": "c6aa954a-9b91-4a37-abc1-1d9f0709dcc3"
-            }
-         ]
-   }
-}
--- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv
+++ b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv
@ -1,653 +0,0 @@
-//
-// Copyright (c) 2017, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// Redistributions of source code must retain the above copyright notice, this
-// list of conditions and the following disclaimer.
-//
-// Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// Neither the name of the Intel Corporation nor the names of its contributors
-// may be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-
-
-// Read from the memory locations first and then write to the memory locations
-
-`include "platform_if.vh"
-`include "afu_json_info.vh"
-
-
-module ccip_std_afu
-   (
-    // CCI-P Clocks and Resets
-    input           logic             pClk,              // 400MHz - CCI-P clock domain. Primary interface clock
-    input           logic             pClkDiv2,          // 200MHz - CCI-P clock domain.
-    input           logic             pClkDiv4,          // 100MHz - CCI-P clock domain.
-    input           logic             uClk_usr,          // User clock domain. Refer to clock programming guide  ** Currently provides fixed 300MHz clock **
-    input           logic             uClk_usrDiv2,      // User clock domain. Half the programmed frequency  ** Currently provides fixed 150MHz clock **
-    input           logic             pck_cp2af_softReset,      // CCI-P ACTIVE HIGH Soft Reset
-    input           logic [1:0]       pck_cp2af_pwrState,       // CCI-P AFU Power State
-    input           logic             pck_cp2af_error,          // CCI-P Protocol Error Detected
-
-    // Interface structures
-    input           t_if_ccip_Rx      pck_cp2af_sRx,        // CCI-P Rx Port
-    output          t_if_ccip_Tx      pck_af2cp_sTx         // CCI-P Tx Port
-    );
-
-
-    //
-    // Run the entire design at the standard CCI-P frequency (400 MHz).
-    //
-    logic clk;
-    assign clk = pClk;
-
-    logic reset;
-    assign reset = pck_cp2af_softReset;
-
-    logic [511:0] wr_data;
-    logic [511:0] rd_data;
-
-    logic do_update;
-    logic start_read;
-    logic start_write;
-    logic wr_addr_next_valid;
-    logic addr_next_valid;
-    logic rd_end_of_list;
-    logic rd_needed;
-    logic wr_needed;
-    logic read_req;
-    logic write_req;
-    logic [15:0] cnt_list_length;
-    t_ccip_clAddr rd_addr;
-    t_ccip_clAddr wr_addr;
-    t_ccip_clAddr addr_next;
-    t_ccip_clAddr wr_addr_next;
-
-    // =========================================================================
-    //
-    //   Register requests.
-    //
-    // =========================================================================
-
-    //
-    // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
-    // registered.  Here we register pck_cp2af_sRx and assign it to sRx.
-    // We also assign pck_af2cp_sTx to sTx here but don't register it.
-    // The code below never uses combinational logic to write sTx.
-    //
-
-    t_if_ccip_Rx sRx;
-    always_ff @(posedge clk)
-    begin
-        sRx <= pck_cp2af_sRx;
-    end
-
-    t_if_ccip_Tx sTx;
-    assign pck_af2cp_sTx = sTx;
-
-
-    // =========================================================================
-    //
-    //   CSR (MMIO) handling.
-    //
-    // =========================================================================
-
-    // The AFU ID is a unique ID for a given program.  Here we generated
-    // one with the "uuidgen" program and stored it in the AFU's JSON file.
-    // ASE and synthesis setup scripts automatically invoke afu_json_mgr
-    // to extract the UUID into afu_json_info.vh.
-    logic [127:0] afu_id = `AFU_ACCEL_UUID;
-
-    //
-    // A valid AFU must implement a device feature list, starting at MMIO
-    // address 0.  Every entry in the feature list begins with 5 64-bit
-    // words: a device feature header, two AFU UUID words and two reserved
-    // words.
-    //
-
-    // Is a CSR read request active this cycle?
-    logic is_csr_read;
-    assign is_csr_read = sRx.c0.mmioRdValid;
-
-    // Is a CSR write request active this cycle?
-    logic is_csr_write;
-    assign is_csr_write = sRx.c0.mmioWrValid;
-
-    // The MMIO request header is overlayed on the normal c0 memory read
-    // response data structure.  Cast the c0Rx header to an MMIO request
-    // header.
-    t_ccip_c0_ReqMmioHdr mmio_req_hdr;
-    assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
-
-
-    //
-    // Implement the device feature list by responding to MMIO reads.
-    //
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c2.mmioRdValid <= 1'b0;
-        end
-        else
-        begin
-            // Always respond with something for every read request
-            sTx.c2.mmioRdValid <= is_csr_read;
-
-            // The unique transaction ID matches responses to requests
-            sTx.c2.hdr.tid <= mmio_req_hdr.tid;
-
-            // Addresses are of 32-bit objects in MMIO space.  Addresses
-            // of 64-bit objects are thus multiples of 2.
-            case (mmio_req_hdr.address)
-              0: // AFU DFH (device feature header)
-                begin
-                    // Here we define a trivial feature list.  In this
-                    // example, our AFU is the only entry in this list.
-                    sTx.c2.data <= t_ccip_mmioData'(0);
-                    // Feature type is AFU
-                    sTx.c2.data[63:60] <= 4'h1;
-                    // End of list (last entry in list)
-                    sTx.c2.data[40] <= 1'b1;
-                end
-
-              // AFU_ID_L
-              2: sTx.c2.data <= afu_id[63:0];
-
-              // AFU_ID_H
-              4: sTx.c2.data <= afu_id[127:64];
-
-              // DFH_RSVD0
-              6: sTx.c2.data <= t_ccip_mmioData'(0);
-
-              // DFH_RSVD1
-              8: sTx.c2.data <= t_ccip_mmioData'(0);
-
-	      // Updated by apurve to check fpgaReadMMIO
-              10: sTx.c2.data <= t_ccip_mmioData'(start_read);
-
-              default: sTx.c2.data <= t_ccip_mmioData'(0);
-            endcase
-        end
-    end
-
-
-    //
-    // CSR write handling.  Host software must tell the AFU the memory address
-    // to which it should be writing.  The address is set by writing a CSR.
-    //
-
-    // We use MMIO address 0 to set the memory address.  The read and
-    // write MMIO spaces are logically separate so we are free to use
-    // whatever we like.  This may not be good practice for cleanly
-    // organizing the MMIO address space, but it is legal.
-    logic is_mem_addr_csr_write;
-    assign is_mem_addr_csr_write = is_csr_write &&
-                                   (mmio_req_hdr.address == t_ccip_mmioAddr'(0));
-
-    // Memory address to which this AFU will write.
-    t_ccip_clAddr write_mem_addr;
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-	    start_write <= 1'b0;
-        end
-	else if (is_mem_addr_csr_write)
-        begin
-            write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
-	    start_write <= 1'b1;
-            //$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
-        end
-    end
-    
-
-    // We use MMIO address 8 to set the memory address for reading data.
-    logic is_mem_addr_csr_read;
-    assign is_mem_addr_csr_read = is_csr_write &&
-                                   (mmio_req_hdr.address == t_ccip_mmioAddr'(2));
-
-    // Memory address from which this AFU will read.
-    t_ccip_clAddr read_mem_addr;
-
-    //logic start_traversal = 'b0;
-    //t_ccip_clAddr start_traversal_addr;
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-	    start_read <= 1'b0;
-        end
-        else if (is_mem_addr_csr_read)
-        begin
-            read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
-	    start_read <= 1'b1;
-            //$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Main AFU logic
-    //
-    // =========================================================================
-
-    //
-    // States in our simple example.
-    //
-    //typedef enum logic [0:0]
-    typedef enum logic [1:0]
-    {
-	STATE_IDLE,
-        STATE_READ,
-        STATE_UPDATE,
-        STATE_WRITE
-    }
-    t_state;
-
-    t_state state;
-
-    //
-    // State machine
-    //
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            state <= STATE_IDLE;
-	    rd_end_of_list <= 1'b0;
-        end
-        else
-        begin
-            case (state)
-              STATE_IDLE:
-                begin
-                    // Traversal begins when CSR 1 is written
-                    if (start_read)
-                    begin
-                        state <= STATE_READ;
-                        $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
-                    end
-                end
-
-              STATE_READ:
-                begin
-                    $display("AFU in READ...");
-                    $display("do_update is %d...",do_update);
-                    $display("addr_next_valid is %d...",addr_next_valid);
-                    $display("rd_needed is %d...",rd_needed);
-                    if (!rd_needed && do_update)
-                    begin
-		    	state <= STATE_UPDATE;
-                        $display("AFU moving to UPDATE...");
-                    end
-                end
-
-              STATE_UPDATE:
-                begin
-		    // Update the read value to be written back
-                    $display("AFU in UPDATE...");
-                    if (!do_update)
-		    begin
-		    	state <= STATE_WRITE;
-			wr_needed <= 1'b1; 
-                        $display("AFU moving to WRITE...");
-		    end
-                end
-
-              STATE_WRITE:
-                begin
-		    // Write the updated value to the address
-		    // Point to new address after that
-		    // if done then point to IDLE; else read new values 
-                    $display("AFU in WRITE...");
-                    if (rd_end_of_list)
-		    begin
-			state <= STATE_IDLE;
-			$display("AFU done...");
-		    end
-                    else if (!wr_needed)
-		    begin
-			state <= STATE_READ;
-			$display("AFU moving to READ from WRITE...");
-		    	start_write <= 1'b0;
-			write_req <= 1'b0;
-		    end
-                end
-            endcase
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Read logic.
-    //
-    // =========================================================================
-
-    //
-    // READ REQUEST
-    //
-
-    // Did a write response just arrive
-
-    // Next read address
-
-    always_ff @(posedge clk)
-    begin
-	// Next read address is valid when we have got the write response back
-	if (sRx.c1.rspValid)
-    	begin
-            addr_next_valid <= sRx.c1.rspValid;
-
-	    //if (state == STATE_READ && !rd_needed)
-    	    //begin
-                // Apurve: Next address is current address plus address length
-                //addr_next <= addr_next + addr_size;
-            addr_next <= (addr_next_valid ? rd_addr + 0 : rd_addr);
-
-                // End of list reached if we have read 5 times
-            rd_end_of_list <= (cnt_list_length == 'h5);
-    	    //end
-    	end	
-    end
-
-    //
-    // Since back pressure may prevent an immediate read request, we must
-    // record whether a read is needed and hold it until the request can
-    // be sent to the FIU.
-    //
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            rd_needed <= 1'b0;
-        end
-        else
-        begin
-            // If reads are allowed this cycle then we can safely clear
-            // any previously requested reads.  This simple AFU has only
-            // one read in flight at a time since it is walking a pointer
-            // chain.
-            if (rd_needed)
-            begin
-                //rd_needed <= sRx.c0TxAlmFull;
-                //rd_needed <= (!sRx.c0TxAlmFull && !sRx.c0.rspValid);
-                rd_needed <= !sRx.c0.rspValid;
-            end
-            else if (state == STATE_READ)
-            begin
-                // Need a read under two conditions:
-                //   - Starting a new walk
-                //   - A read response just arrived from a line containing
-                //     a next pointer.
-                rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
-                rd_addr <= (start_read ? read_mem_addr : addr_next);
-            	//$display("rd_addr is 0x%x",  t_ccip_clAddr'(rd_addr));
-            	//$display("read mem addr is 0x%x",  t_ccip_clAddr'(read_mem_addr));
-            	//$display("start read is %d", start_read);
-            end
-        end
-    end
-
-    //
-    // Emit read requests to the FIU.
-    //
-
-    // Read header defines the request to the FIU
-    t_ccip_c0_ReqMemHdr rd_hdr;
-
-    always_comb
-    begin
-        rd_hdr = t_ccip_c0_ReqMemHdr'(0);
-
-        // Read request type (No intention to cache)
-        //rd_hdr.req_type = 4'h0;
-
-        // Virtual address (MPF virtual addressing is enabled)
-        rd_hdr.address = rd_addr;
-
-        // Read over channel VA 
-        //rd_hdr.vc_sel = 2'h0;
-
-        // Read one cache line (64 bytes) 
-        //rd_hdr.cl_len = 2'h0;
-    end
-
-    // Send read requests to the FIU
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c0.valid <= 1'b0;
-            cnt_list_length <= 0;
-	    read_req <= 1'b0;
-        end
-        else
-        begin
-            // Generate a read request when needed and the FIU isn't full
-	    if (state == STATE_READ)
-            begin
-            	sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull && !read_req);
-
-            	if (rd_needed && !sRx.c0TxAlmFull && !read_req)
-            	begin
-	    	    sTx.c0.hdr <= rd_hdr;
-            	    cnt_list_length <= cnt_list_length + 1;
-		    read_req <= 1'b1;
-            	    $display("Incrementing read count...%d",cnt_list_length);
-            	    $display("Read address is 0x%x...",rd_hdr.address);
-		    addr_next_valid <= 1'b0;
-		    // Apurve: Add something to stop read once this section has been accessed
-		    //rd_needed <= 1'b0; 
-            	end
-            end
-        end
-    end
-
-    //
-    // READ RESPONSE HANDLING
-    //
-
-    //
-    // Receive data (read responses).
-    //
-    always_ff @(posedge clk)
-    begin
-	if (reset)
-	begin
-            do_update <= 1'b0;
-        end
-	else
-	begin
-	    if (!do_update && sRx.c0.rspValid)
-	    begin
-                rd_data <= sRx.c0.data;
-                do_update <= 1'b1;
-	        $display("rd data is %d...",rd_data);
-            end
-
-	    if ((state == STATE_UPDATE) && (do_update == 1'b1))
-	    begin
-	        // Update the read data and put it in the write data to be written
-                wr_data <= rd_data + 2;
-                do_update <= 1'b0;
-		read_req <= 1'b0;
-	        $display("write data is %d...",wr_data);
-
-		// First read done. Next reads should be from the updated addresses
-		start_read <= 1'b0; 
-            end
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Write logic.
-    //
-    // =========================================================================
-
-
-    //
-    // WRITE REQUEST
-    //
-
-    // Did a write response just arrive
-
-    // Next write address
-
-    always_ff @(posedge clk)
-    begin
-	if (sRx.c0.rspValid)
-    	begin
-            // Next write address is valid when we have got the read response back
-            wr_addr_next_valid <= sRx.c0.rspValid;
-            //wr_addr_next_valid <= (!start_write && sRx.c0.rspValid);
-
-	    //if (state == STATE_WRITE && !wr_needed)
-	    //begin
-                // Apurve: Next address is current address plus address length
-                //wr_addr_next <= wr_addr + 0;
-                wr_addr_next <= (wr_addr_next_valid ? wr_addr + 0 : wr_addr);
-	    //end
-	end
-    end
-
-    //
-    // Since back pressure may prevent an immediate write request, we must
-    // record whether a write is needed and hold it until the request can
-    // be sent to the FIU.
-    //
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            wr_needed <= 1'b0;
-        end
-        else
-        begin
-            // If writes are allowed this cycle then we can safely clear
-            // any previously requested writes.  This simple AFU has only
-            // one write in flight at a time since it is walking a pointer
-            // chain.
-            if (wr_needed)
-            begin
-                //wr_needed <= sRx.c1TxAlmFull;
-                //wr_needed <= (!sRx.c1TxAlmFull && !sRx.c1.rspValid);
-                wr_needed <= !sRx.c1.rspValid;
-            end
-            else
-            begin
-                // Need a write under two conditions:
-                //   - Starting a new walk
-                //   - A write response just arrived from a line containing
-                //     a next pointer.
-                wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
-                wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
-            	//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
-            end
-        end
-    end
-
-    //
-    // Emit write requests to the FIU.
-    //
-
-    // Write header defines the request to the FIU
-    t_ccip_c1_ReqMemHdr wr_hdr;
-
-    always_comb
-    begin
-        wr_hdr = t_ccip_c1_ReqMemHdr'(0);
-
-        // Write request type
-        //wr_hdr.req_type = 4'h0;
-
-        // Virtual address (MPF virtual addressing is enabled)
-        wr_hdr.address = wr_addr;
-
-        // Let the FIU pick the channel
-        //wr_hdr.vc_sel = 2'h2;
-
-        // Write 1 cache line (64 bytes) 
-        //wr_hdr.cl_len = 2'h0;
-
-        // Start of packet is true (single line write)
-        wr_hdr.sop = 1'b1;
-    end
-
-    // Send write requests to the FIU
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c1.valid <= 1'b0;
-            write_req <= 1'b0;
-        end
-        else
-        begin
-            // Generate a write request when needed and the FIU isn't full
-	    if (state == STATE_WRITE)
-            begin
-            	sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull && !write_req);
-		if (wr_needed && !sRx.c1TxAlmFull && !write_req)
-		begin
-            	    sTx.c1.hdr <= wr_hdr;
-	    	    sTx.c1.data <= t_ccip_clData'(wr_data);
-		    write_req <= 1'b1;
-		    wr_addr_next_valid <= 1'b0;
-		    $display("Write address is 0x%x...", wr_hdr.address);
-            	end
-            end
-        end
-    end
-
-
-    //
-    // WRITE RESPONSE HANDLING
-    //
-
-    // Apurve: Check if a signal is to be sent to read to start reading in case
-    // write response does not work
-    //
-    // Send data (write requests).
-    //
-    //always_ff @(posedge clk)
-    //begin
-    //    if (state == STATE_WRITE)
-    //    begin
-    //        rd_data <= sRx.c0.data;
-    //    end
-    //    if (state == STATE_UPDATE)
-    //    begin
-    //        // Update the write data and put it in the write data to be written
-    //        wr_data <= rd_data + 1;
-    //    end
-    //end
-
-endmodule
--- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv
+++ b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv
@ -1,621 +0,0 @@
-//
-// Copyright (c) 2017, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// Redistributions of source code must retain the above copyright notice, this
-// list of conditions and the following disclaimer.
-//
-// Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// Neither the name of the Intel Corporation nor the names of its contributors
-// may be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-
-
-// Read from the memory locations first and then write to the memory locations
-
-`include "platform_if.vh"
-`include "afu_json_info.vh"
-
-
-module ccip_std_afu
-   (
-    // CCI-P Clocks and Resets
-    input           logic             pClk,              // 400MHz - CCI-P clock domain. Primary interface clock
-    input           logic             pClkDiv2,          // 200MHz - CCI-P clock domain.
-    input           logic             pClkDiv4,          // 100MHz - CCI-P clock domain.
-    input           logic             uClk_usr,          // User clock domain. Refer to clock programming guide  ** Currently provides fixed 300MHz clock **
-    input           logic             uClk_usrDiv2,      // User clock domain. Half the programmed frequency  ** Currently provides fixed 150MHz clock **
-    input           logic             pck_cp2af_softReset,      // CCI-P ACTIVE HIGH Soft Reset
-    input           logic [1:0]       pck_cp2af_pwrState,       // CCI-P AFU Power State
-    input           logic             pck_cp2af_error,          // CCI-P Protocol Error Detected
-
-    // Interface structures
-    input           t_if_ccip_Rx      pck_cp2af_sRx,        // CCI-P Rx Port
-    output          t_if_ccip_Tx      pck_af2cp_sTx         // CCI-P Tx Port
-    );
-
-
-    //
-    // Run the entire design at the standard CCI-P frequency (400 MHz).
-    //
-    logic clk;
-    assign clk = pClk;
-
-    logic reset;
-    assign reset = pck_cp2af_softReset;
-
-    logic [511:0] wr_data;
-    logic [511:0] rd_data;
-
-    logic do_update;
-    logic start_read;
-    logic start_write;
-    logic wr_addr_next_valid;
-    logic addr_next_valid;
-    logic rd_end_of_list;
-    logic rd_needed;
-    logic wr_needed;
-    logic [15:0] cnt_list_length;
-    t_ccip_clAddr rd_addr;
-    t_ccip_clAddr wr_addr;
-    t_ccip_clAddr addr_next;
-    t_ccip_clAddr wr_addr_next;
-
-    // =========================================================================
-    //
-    //   Register requests.
-    //
-    // =========================================================================
-
-    //
-    // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
-    // registered.  Here we register pck_cp2af_sRx and assign it to sRx.
-    // We also assign pck_af2cp_sTx to sTx here but don't register it.
-    // The code below never uses combinational logic to write sTx.
-    //
-
-    t_if_ccip_Rx sRx;
-    always_ff @(posedge clk)
-    begin
-        sRx <= pck_cp2af_sRx;
-    end
-
-    t_if_ccip_Tx sTx;
-    assign pck_af2cp_sTx = sTx;
-
-
-    // =========================================================================
-    //
-    //   CSR (MMIO) handling.
-    //
-    // =========================================================================
-
-    // The AFU ID is a unique ID for a given program.  Here we generated
-    // one with the "uuidgen" program and stored it in the AFU's JSON file.
-    // ASE and synthesis setup scripts automatically invoke afu_json_mgr
-    // to extract the UUID into afu_json_info.vh.
-    logic [127:0] afu_id = `AFU_ACCEL_UUID;
-
-    //
-    // A valid AFU must implement a device feature list, starting at MMIO
-    // address 0.  Every entry in the feature list begins with 5 64-bit
-    // words: a device feature header, two AFU UUID words and two reserved
-    // words.
-    //
-
-    // Is a CSR read request active this cycle?
-    logic is_csr_read;
-    assign is_csr_read = sRx.c0.mmioRdValid;
-
-    // Is a CSR write request active this cycle?
-    logic is_csr_write;
-    assign is_csr_write = sRx.c0.mmioWrValid;
-
-    // The MMIO request header is overlayed on the normal c0 memory read
-    // response data structure.  Cast the c0Rx header to an MMIO request
-    // header.
-    t_ccip_c0_ReqMmioHdr mmio_req_hdr;
-    assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
-
-
-    //
-    // Implement the device feature list by responding to MMIO reads.
-    //
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c2.mmioRdValid <= 1'b0;
-        end
-        else
-        begin
-            // Always respond with something for every read request
-            sTx.c2.mmioRdValid <= is_csr_read;
-
-            // The unique transaction ID matches responses to requests
-            sTx.c2.hdr.tid <= mmio_req_hdr.tid;
-
-            // Addresses are of 32-bit objects in MMIO space.  Addresses
-            // of 64-bit objects are thus multiples of 2.
-            case (mmio_req_hdr.address)
-              0: // AFU DFH (device feature header)
-                begin
-                    // Here we define a trivial feature list.  In this
-                    // example, our AFU is the only entry in this list.
-                    sTx.c2.data <= t_ccip_mmioData'(0);
-                    // Feature type is AFU
-                    sTx.c2.data[63:60] <= 4'h1;
-                    // End of list (last entry in list)
-                    sTx.c2.data[40] <= 1'b1;
-                end
-
-              // AFU_ID_L
-              2: sTx.c2.data <= afu_id[63:0];
-
-              // AFU_ID_H
-              4: sTx.c2.data <= afu_id[127:64];
-
-              // DFH_RSVD0
-              6: sTx.c2.data <= t_ccip_mmioData'(0);
-
-              // DFH_RSVD1
-              8: sTx.c2.data <= t_ccip_mmioData'(0);
-
-	      // Updated by apurve to check fpgaReadMMIO
-              10: sTx.c2.data <= t_ccip_mmioData'(start_read);
-
-              default: sTx.c2.data <= t_ccip_mmioData'(0);
-            endcase
-        end
-    end
-
-
-    //
-    // CSR write handling.  Host software must tell the AFU the memory address
-    // to which it should be writing.  The address is set by writing a CSR.
-    //
-
-    // We use MMIO address 0 to set the memory address.  The read and
-    // write MMIO spaces are logically separate so we are free to use
-    // whatever we like.  This may not be good practice for cleanly
-    // organizing the MMIO address space, but it is legal.
-    logic is_mem_addr_csr_write;
-    assign is_mem_addr_csr_write = is_csr_write &&
-                                   (mmio_req_hdr.address == t_ccip_mmioAddr'(0));
-
-    // Memory address to which this AFU will write.
-    t_ccip_clAddr write_mem_addr;
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-	    start_write <= 1'b0;
-        end
-	else if (is_mem_addr_csr_write)
-        begin
-            write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
-	    start_write <= 1'b1;
-            //$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
-        end
-    end
-    
-
-    // We use MMIO address 8 to set the memory address for reading data.
-    logic is_mem_addr_csr_read;
-    assign is_mem_addr_csr_read = is_csr_write &&
-                                   (mmio_req_hdr.address == t_ccip_mmioAddr'(2));
-
-    // Memory address from which this AFU will read.
-    t_ccip_clAddr read_mem_addr;
-
-    //logic start_traversal = 'b0;
-    //t_ccip_clAddr start_traversal_addr;
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-	    start_read <= 1'b0;
-        end
-        else if (is_mem_addr_csr_read)
-        begin
-            read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
-	    start_read <= 1'b1;
-            //$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Main AFU logic
-    //
-    // =========================================================================
-
-    //
-    // States in our simple example.
-    //
-    //typedef enum logic [0:0]
-    typedef enum logic [1:0]
-    {
-	STATE_IDLE,
-        STATE_READ,
-        STATE_UPDATE,
-        STATE_WRITE
-    }
-    t_state;
-
-    t_state state;
-
-    //
-    // State machine
-    //
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            state <= STATE_IDLE;
-	    rd_end_of_list <= 1'b0;
-        end
-        else
-        begin
-            case (state)
-              STATE_IDLE:
-                begin
-                    // Traversal begins when CSR 1 is written
-                    if (start_read)
-                    begin
-                        state <= STATE_READ;
-                        $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
-                    end
-                end
-
-              STATE_READ:
-                begin
-                    $display("AFU in READ...");
-                    if (!rd_needed && do_update)
-                    begin
-		    	state <= STATE_UPDATE;
-                        $display("AFU moving to UPDATE...");
-                    end
-                end
-
-              STATE_UPDATE:
-                begin
-		    // Update the read value to be written back
-                    $display("AFU in UPDATE...");
-                    if (!do_update)
-		    begin
-		    	state <= STATE_WRITE;
-			wr_needed <= 1'b1; 
-                        $display("AFU moving to WRITE...");
-		    end
-                end
-
-              STATE_WRITE:
-                begin
-		    // Write the updated value to the address
-		    // Point to new address after that
-		    // if done then point to IDLE; else read new values 
-                    $display("AFU in WRITE...");
-                    if (rd_end_of_list)
-		    begin
-			state <= STATE_IDLE;
-			$display("AFU done...");
-		    end
-                    else if (!wr_needed)
-		    begin
-			state <= STATE_READ;
-			$display("AFU moving to READ from WRITE...");
-		    	start_write <= 1'b0;
-		    end
-                end
-            endcase
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Read logic.
-    //
-    // =========================================================================
-
-    //
-    // READ REQUEST
-    //
-
-    // Did a write response just arrive
-
-    // Next read address
-
-    always_ff @(posedge clk)
-    begin
-	// Next read address is valid when we have got the write response back
-        addr_next_valid <= sRx.c1.rspValid;
-
-        // Apurve: Next address is current address plus address length
-        //addr_next <= addr_next + addr_size;
-        addr_next <= rd_addr + 0;
-
-        // End of list reached if we have read 5 times
-        rd_end_of_list <= (cnt_list_length == 'h5);
-    end
-
-    //
-    // Since back pressure may prevent an immediate read request, we must
-    // record whether a read is needed and hold it until the request can
-    // be sent to the FIU.
-    //
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            rd_needed <= 1'b0;
-        end
-        else
-        begin
-            // If reads are allowed this cycle then we can safely clear
-            // any previously requested reads.  This simple AFU has only
-            // one read in flight at a time since it is walking a pointer
-            // chain.
-            if (rd_needed)
-            begin
-                rd_needed <= sRx.c0TxAlmFull;
-            end
-            else
-            begin
-                // Need a read under two conditions:
-                //   - Starting a new walk
-                //   - A read response just arrived from a line containing
-                //     a next pointer.
-                rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
-                rd_addr <= (start_read ? read_mem_addr : addr_next);
-            	//$display("rd_addr is 0x%x",  t_ccip_clAddr'(rd_addr));
-            	//$display("read mem addr is 0x%x",  t_ccip_clAddr'(read_mem_addr));
-            	//$display("start read is %d", start_read);
-            end
-        end
-    end
-
-    //
-    // Emit read requests to the FIU.
-    //
-
-    // Read header defines the request to the FIU
-    t_ccip_c0_ReqMemHdr rd_hdr;
-
-    always_comb
-    begin
-        rd_hdr = t_ccip_c0_ReqMemHdr'(0);
-
-        // Read request type (No intention to cache)
-        //rd_hdr.req_type = 4'h0;
-
-        // Virtual address (MPF virtual addressing is enabled)
-        rd_hdr.address = rd_addr;
-
-        // Read over channel VA 
-        //rd_hdr.vc_sel = 2'h0;
-
-        // Read one cache line (64 bytes) 
-        //rd_hdr.cl_len = 2'h0;
-    end
-
-    // Send read requests to the FIU
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c0.valid <= 1'b0;
-            cnt_list_length <= 0;
-        end
-        else
-        begin
-            // Generate a read request when needed and the FIU isn't full
-	    if (state == STATE_READ)
-            begin
-            	sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull);
-
-            	if (rd_needed && !sRx.c0TxAlmFull)
-            	begin
-	    	    sTx.c0.hdr <= rd_hdr;
-            	    cnt_list_length <= cnt_list_length + 1;
-            	    $display("Incrementing read count...%d",cnt_list_length);
-            	    $display("Read address is 0x%x...",rd_hdr.address);
-		    // Apurve: Add something to stop read once this section has been accessed
-            	end
-            end
-        end
-    end
-
-    //
-    // READ RESPONSE HANDLING
-    //
-
-    //
-    // Receive data (read responses).
-    //
-    always_ff @(posedge clk)
-    begin
-	if (reset)
-	begin
-            do_update <= 1'b0;
-        end
-	else
-	begin
-	    if (sRx.c0.rspValid)
-	    begin
-                rd_data <= sRx.c0.data;
-                do_update <= 1'b1;
-	        //$display("rd data is %d...",rd_data);
-            end
-
-	    if (state == STATE_UPDATE)
-	    begin
-	        // Update the read data and put it in the write data to be written
-                wr_data <= rd_data + 2;
-                do_update <= 1'b0;
-	        $display("write data is %d...",wr_data);
-
-		// First read done. Next reads should be from the updated addresses
-		start_read <= 1'b0; 
-            end
-        end
-    end
-
-
-    // =========================================================================
-    //
-    //   Write logic.
-    //
-    // =========================================================================
-
-
-    //
-    // WRITE REQUEST
-    //
-
-    // Did a write response just arrive
-
-    // Next write address
-
-    always_ff @(posedge clk)
-    begin
-        // Next write address is valid when we have got the read response back
-        wr_addr_next_valid <= sRx.c0.rspValid;
-
-        // Apurve: Next address is current address plus address length
-        wr_addr_next <= wr_addr + 0;
-
-    end
-
-    //
-    // Since back pressure may prevent an immediate write request, we must
-    // record whether a write is needed and hold it until the request can
-    // be sent to the FIU.
-    //
-
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            wr_needed <= 1'b0;
-        end
-        else
-        begin
-            // If writes are allowed this cycle then we can safely clear
-            // any previously requested writes.  This simple AFU has only
-            // one write in flight at a time since it is walking a pointer
-            // chain.
-            if (wr_needed)
-            begin
-                wr_needed <= sRx.c1TxAlmFull;
-            end
-            else
-            begin
-                // Need a write under two conditions:
-                //   - Starting a new walk
-                //   - A write response just arrived from a line containing
-                //     a next pointer.
-                wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
-                wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
-            	//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
-            end
-        end
-    end
-
-    //
-    // Emit write requests to the FIU.
-    //
-
-    // Write header defines the request to the FIU
-    t_ccip_c1_ReqMemHdr wr_hdr;
-
-    always_comb
-    begin
-        wr_hdr = t_ccip_c1_ReqMemHdr'(0);
-
-        // Write request type
-        //wr_hdr.req_type = 4'h0;
-
-        // Virtual address (MPF virtual addressing is enabled)
-        wr_hdr.address = wr_addr;
-
-        // Let the FIU pick the channel
-        //wr_hdr.vc_sel = 2'h2;
-
-        // Write 1 cache line (64 bytes) 
-        //wr_hdr.cl_len = 2'h0;
-
-        // Start of packet is true (single line write)
-        wr_hdr.sop = 1'b1;
-    end
-
-    // Send write requests to the FIU
-    always_ff @(posedge clk)
-    begin
-        if (reset)
-        begin
-            sTx.c1.valid <= 1'b0;
-        end
-        else
-        begin
-            // Generate a write request when needed and the FIU isn't full
-	    if (state == STATE_WRITE)
-            begin
-            	sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull);
-		if (wr_needed && !sRx.c1TxAlmFull)
-		begin
-            	    sTx.c1.hdr <= wr_hdr;
-	    	    sTx.c1.data <= t_ccip_clData'(wr_data);
-            	end
-            end
-        end
-    end
-
-
-    //
-    // WRITE RESPONSE HANDLING
-    //
-
-    // Apurve: Check if a signal is to be sent to read to start reading in case
-    // write response does not work
-    //
-    // Send data (write requests).
-    //
-    //always_ff @(posedge clk)
-    //begin
-    //    if (state == STATE_WRITE)
-    //    begin
-    //        rd_data <= sRx.c0.data;
-    //    end
-    //    if (state == STATE_UPDATE)
-    //    begin
-    //        // Update the write data and put it in the write data to be written
-    //        wr_data <= rd_data + 1;
-    //    end
-    //end
-
-endmodule
--- a/driver/tests/dogfood/Memcpy/hw/rtl/sources.txt
+++ b/driver/tests/dogfood/Memcpy/hw/rtl/sources.txt
@ -1,2 +0,0 @@
-cci_hello.json
-cci_hello_afu.sv
--- a/driver/tests/dogfood/Memcpy/hw/sim/setup_ase
+++ b/driver/tests/dogfood/Memcpy/hw/sim/setup_ase
@ -1,11 +0,0 @@
-#!/bin/sh
-
-##
-## Setup ASE environment using ../rtl/sources.txt.
-##
-
-# Absolute path to this script
-SCRIPT=$(readlink -f "$0")
-SCRIPT_PATH=$(dirname "$SCRIPT")
-
-afu_sim_setup --sources="${SCRIPT_PATH}/../rtl/sources.txt" $@
--- a/driver/tests/dogfood/Memcpy/sw/Makefile
+++ b/driver/tests/dogfood/Memcpy/sw/Makefile
@ -1,41 +0,0 @@
-include ../../common/sw/common_include.mk
-
-# Primary test name
-TEST = cci_hello
-
-# Build directory
-OBJDIR = obj
-CFLAGS += -I./$(OBJDIR)
-CPPFLAGS += -I./$(OBJDIR)
-
-# Files and folders
-SRCS = $(TEST).c
-OBJS = $(addprefix $(OBJDIR)/,$(patsubst %.c,%.o,$(SRCS)))
-
-# Targets (build only $(TEST)_ase by default)
-all: $(TEST) $(TEST)_ase
-
-# AFU info from JSON file, including AFU UUID
-AFU_JSON_INFO = $(OBJDIR)/afu_json_info.h
-
-$(AFU_JSON_INFO): ../hw/rtl/$(TEST).json | objdir
-	afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
-
-$(OBJS): $(AFU_JSON_INFO)
-
-$(TEST): $(OBJS)
-	$(CC) -o $@ $^ $(LDFLAGS) $(FPGA_LIBS)
-
-$(TEST)_ase: $(OBJS)
-	$(CC) -o $@ $^ $(LDFLAGS) $(ASE_LIBS)
-
-$(OBJDIR)/%.o: %.c | objdir
-	$(CC) $(CFLAGS) -c $< -o $@
-
-clean:
-	rm -rf $(TEST) $(TEST)_ase $(OBJDIR)
-
-objdir:
-	@mkdir -p $(OBJDIR)
-
-.PHONY: all clean
--- a/driver/tests/dogfood/Memcpy/sw/cci_hello.c
+++ b/driver/tests/dogfood/Memcpy/sw/cci_hello.c
@ -1,210 +0,0 @@
-//
-// Copyright (c) 2017, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// Redistributions of source code must retain the above copyright notice, this
-// list of conditions and the following disclaimer.
-//
-// Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// Neither the name of the Intel Corporation nor the names of its contributors
-// may be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <assert.h>
-#include <uuid/uuid.h>
-
-#include <opae/fpga.h>
-
-// State from the AFU's JSON file, extracted using OPAE's afu_json_mgr script
-#include "afu_json_info.h"
-
-#define CACHELINE_BYTES 64
-#define CL(x) ((x) * CACHELINE_BYTES)
-
-
-//
-// Search for an accelerator matching the requested UUID and connect to it.
-//
-static fpga_handle connect_to_accel(const char *accel_uuid)
-{
-    fpga_properties filter = NULL;
-    fpga_guid guid;
-    fpga_token accel_token;
-    uint32_t num_matches;
-    fpga_handle accel_handle;
-    fpga_result r;
-
-    // Don't print verbose messages in ASE by default
-    //setenv("ASE_LOG", "0", 0);
-
-    // Set up a filter that will search for an accelerator
-    fpgaGetProperties(NULL, &filter);
-    fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
-
-    // Add the desired UUID to the filter
-    uuid_parse(accel_uuid, guid);
-    fpgaPropertiesSetGUID(filter, guid);
-
-    // Do the search across the available FPGA contexts
-    num_matches = 1;
-    fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
-
-    // Not needed anymore
-    fpgaDestroyProperties(&filter);
-
-    if (num_matches < 1)
-    {
-        fprintf(stderr, "Accelerator %s not found!\n", accel_uuid);
-        return 0;
-    }
-
-    // Open accelerator
-    r = fpgaOpen(accel_token, &accel_handle, 0);
-    assert(FPGA_OK == r);
-
-    // Done with token
-    fpgaDestroyToken(&accel_token);
-
-    return accel_handle;
-}
-
-
-//
-// Allocate a buffer in I/O memory, shared with the FPGA.
-//
-static volatile void* alloc_buffer(fpga_handle accel_handle,
-                                   ssize_t size,
-                                   uint64_t *wsid,
-                                   uint64_t *io_addr)
-{
-    fpga_result r;
-    volatile void* buf;
-
-    r = fpgaPrepareBuffer(accel_handle, size, (void*)&buf, wsid, 0);
-    if (FPGA_OK != r) return NULL;
-
-    // Get the physical address of the buffer in the accelerator
-    r = fpgaGetIOAddress(accel_handle, *wsid, io_addr);
-    assert(FPGA_OK == r);
-
-    return buf;
-}
-
-
-int main(int argc, char *argv[])
-{
-    fpga_handle accel_handle;
-    volatile char *buf;
-    volatile char *buf_r;
-    uint64_t wsid1;
-    uint64_t wsid2;
-    uint64_t buf_pa;
-    uint64_t ret_buf_pa;
-    uint64_t buf_rpa;
-    uint64_t ret_buf_rpa;
-    fpga_result r;
-
-    // Find and connect to the accelerator
-    accel_handle = connect_to_accel(AFU_ACCEL_UUID);
-
-    // Allocate a single page memory buffer for write
-    buf = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
-                                       &wsid1, &buf_pa);
-    // Allocate a single page memory buffer for read
-    buf_r = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
-                                       &wsid2, &buf_rpa);
-    assert(NULL != buf);
-
-    //// Set the low byte of the shared buffer to 0.  The FPGA will write
-    //// a non-zero value to it.
-    //buf[0] = 0;
-
-    // Set the low byte of the shared buffer buf_r to 0.  The FPGA will read
-    // the values and write to buf address 
-    buf[0] = 5;
-    buf_r[0] = 5;
-
-    // Tell the accelerator the address of the buffer using cache line
-    // addresses.  The accelerator will respond by writing to the buffer.
-    r = fpgaWriteMMIO64(accel_handle, 0, 0, buf_pa / CL(1));
-    printf("Write address is %08lx\n", buf_pa);
-    printf("Write address div 64 is %08lx\n", buf_pa/ CL(1));
-    assert(FPGA_OK == r);
-
-    // Wait for response from FPGA. Check using fpgaReadMMIO
-    //r = fpgaReadMMIO64(accel_handle, 0, 0, &ret_buf_pa);
-    //printf("Returned write is %08lx\n", ret_buf_pa);
-    //assert(FPGA_OK == r);
-
-///////////////////// Added to check fpgaRead
-    // Wait for response from FPGA. Check using fpgaReadMMIO
-    r = fpgaReadMMIO64(accel_handle, 0, 5 * sizeof(uint64_t), &ret_buf_rpa);
-    printf("Returned read at 10 is %08lx\n", ret_buf_rpa);
-    assert(FPGA_OK == r);
-///////////////////////////////////////////////
-
-
-    // Tell the accelerator the address of the buffer using cache line
-    // addresses.  The accelerator will read from the buffer.
-    // Write the address to MMIO 1
-    r = fpgaWriteMMIO64(accel_handle, 0, sizeof(uint64_t), buf_rpa / CL(1));
-    printf("Read address is %08lx\n", buf_rpa);
-    printf("Read address div64 is %08lx\n", buf_rpa / CL(1));
-    assert(FPGA_OK == r);
-
-    // Wait for response from FPGA. Check using fpgaReadMMIO
-    //r = fpgaReadMMIO64(accel_handle, 0, sizeof(uint64_t), &ret_buf_rpa);
-    //printf("Returned write is %08lx\n", ret_buf_rpa);
-    //assert(FPGA_OK == r);
-
-
-
-
-
-
-
-
-    // Update this
-    // Spin, waiting for the value in memory to change to something non-zero.
-    while (5 == buf[0])
-    {
-        // A well-behaved program would use _mm_pause(), nanosleep() or
-        // equivalent to save power here.
-    };
-
-    // Print the string written by the FPGA
-    printf("%d\n", buf[0]);
-
-    do {
-        //printf("%d\n", buf[0]);
-    } while (10 != buf[0]);
-
-    // Done
-    fpgaReleaseBuffer(accel_handle, wsid1);
-    fpgaReleaseBuffer(accel_handle, wsid2);
-    fpgaClose(accel_handle);
-
-    return 0;
-}
--- a/driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h
+++ b/driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h
@ -1,13 +0,0 @@
-//
-// Generated by afu_json_mgr from ../hw/rtl/cci_hello.json
-//
-
-#ifndef __AFU_JSON_INFO__
-#define __AFU_JSON_INFO__
-
-#define AFU_ACCEL_NAME "cci_hello"
-#define AFU_ACCEL_UUID "C6AA954A-9B91-4A37-ABC1-1D9F0709DCC3"
-#define AFU_IMAGE_POWER 0
-#define AFU_TOP_IFC "ccip_std_afu"
-
-#endif // __AFU_JSON_INFO__
--- a/driver/tests/dogfood/Memcpy/sw/obj/cci_hello.o
+++ b/driver/tests/dogfood/Memcpy/sw/obj/cci_hello.o
--- a/driver/tests/dogfood/common.h
+++ b/driver/tests/dogfood/common.h
@ -0,0 +1,14 @@
+#ifndef _COMMON_H_
+#define _COMMON_H_
+
+#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
+
+struct kernel_arg_t {
+  uint32_t testid;
+  uint32_t count;  
+  uint32_t src0_ptr;
+  uint32_t src1_ptr;
+  uint32_t dst_ptr;  
+};
+
+#endif
--- a/driver/tests/dogfood/dogfood.cpp
+++ b/driver/tests/dogfood/dogfood.cpp
@ -0,0 +1,264 @@
+#include <iostream>
+#include <vector>
+#include <unistd.h>
+#include <string.h>
+#include <vortex.h>
+#include "testcases.h"
+#include "common.h"
+
+#define RT_CHECK(_expr)                                         \
+   do {                                                         \
+     int _ret = _expr;                                          \
+     if (0 == _ret)                                             \
+       break;                                                   \
+     printf("Error: '%s' returned %d!\n", #_expr, (int)_ret);   \
+	 cleanup();			                                              \
+     exit(-1);                                                  \
+   } while (false)
+
+///////////////////////////////////////////////////////////////////////////////
+
+class TestMngr {
+public:
+  TestMngr() {
+    this->add_test("iadd", new Test_IADD());
+    this->add_test("imul", new Test_IMUL());
+    this->add_test("idiv", new Test_IDIV());
+    this->add_test("idiv-mul", new Test_IDIV_MUL());
+    this->add_test("fadd", new Test_FADD());
+    this->add_test("fsub", new Test_FSUB());
+    this->add_test("fmul", new Test_FMUL());
+    this->add_test("fmadd", new Test_FMADD());
+    this->add_test("fmsub", new Test_FMSUB());
+    this->add_test("fnmadd", new Test_FNMADD());
+    this->add_test("fnmsub", new Test_FNMSUB());
+    this->add_test("fnmadd-madd", new Test_FNMADD_MADD());
+    this->add_test("fdiv", new Test_FDIV());
+    this->add_test("fdiv2", new Test_FDIV2());
+    this->add_test("fsqrt", new Test_FSQRT());
+    this->add_test("ftoi", new Test_FTOI());
+    this->add_test("ftou", new Test_FTOU());
+    this->add_test("tof", new Test_ITOF());
+    this->add_test("utof", new Test_UTOF());
+  }
+
+  ~TestMngr() {
+    for (size_t i = 0; i < _tests.size(); ++i) {
+      delete _tests[i];
+    }
+  }
+
+  const std::string& get_name(int testid) const {
+    return _names.at(testid);
+  }
+
+  ITestCase* get_test(int testid) const {
+    return _tests.at(testid);
+  }
+
+  void add_test(const char* name, ITestCase* test) {
+    _names.push_back(name);
+    _tests.push_back(test);
+  }
+
+  size_t size() const {
+    return _tests.size();
+  }  
+
+private:
+  std::vector<std::string> _names;
+  std::vector<ITestCase*> _tests;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+TestMngr testMngr;
+const char* kernel_file = "kernel.bin";
+int count    = 0;
+int testid_s = 0;
+int testid_e = (testMngr.size() - 1);
+
+vx_device_h device   = nullptr;
+vx_buffer_h arg_buf  = nullptr;
+vx_buffer_h src1_buf = nullptr;
+vx_buffer_h src2_buf = nullptr;
+vx_buffer_h dst_buf  = nullptr;
+
+static void show_usage() {
+   std::cout << "Vortex Driver Test." << std::endl;
+   std::cout << "Usage: [-s:testid] [-e:testid] [-k: kernel] [-n words] [-h: help]" << std::endl;
+}
+
+static void parse_args(int argc, char **argv) {
+  int c;
+  while ((c = getopt(argc, argv, "n:s:e:k:h?")) != -1) {
+    switch (c) {
+    case 'n':
+      count = atoi(optarg);
+      break;
+    case 's':
+      testid_s = atoi(optarg);
+      break;
+    case 'e':
+      testid_e = atoi(optarg);
+      break;
+    case 'k':
+      kernel_file = optarg;
+      break;
+    case 'h':
+    case '?': {
+      show_usage();
+      exit(0);
+    } break;
+    default:
+      show_usage();
+      exit(-1);
+    }
+  }
+}
+
+void cleanup() {  
+  if (arg_buf) {
+    vx_buf_release(arg_buf);
+  }
+   if (src1_buf) {
+    vx_buf_release(src1_buf);
+  }
+  if (src2_buf) {
+    vx_buf_release(src2_buf);
+  }
+  if (dst_buf) {
+    vx_buf_release(dst_buf);
+  }
+  if (device) {
+    vx_dev_close(device);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  size_t value; 
+  kernel_arg_t kernel_arg;
+  
+  // parse command arguments
+  parse_args(argc, argv);
+
+  if (count == 0) {
+    count = 1;
+  }
+
+  std::cout << "test ids: " << testid_s << " - " << testid_e << std::endl;
+  std::cout << "workitem size: " << count << std::endl;
+  std::cout << "using kernel: " << kernel_file << std::endl;
+
+  // open device connection
+  std::cout << "open device connection" << std::endl;  
+  RT_CHECK(vx_dev_open(&device));
+
+  unsigned max_cores, max_warps, max_threads;
+  RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
+  RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
+  RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
+
+  int num_points = count * max_cores * max_warps * max_threads;
+  size_t buf_size = num_points * sizeof(uint32_t);
+  
+  std::cout << "number of points: " << num_points << std::endl;
+  std::cout << "number of points: " << num_points << std::endl;
+  std::cout << "number of points: " << num_points << std::endl;
+  std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
+
+  // upload program
+  std::cout << "upload kernel" << std::endl;  
+  RT_CHECK(vx_upload_kernel_file(device, kernel_file));
+
+  // allocate device memory
+  std::cout << "allocate device memory" << std::endl;  
+
+  RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
+  kernel_arg.src0_ptr = value;
+  RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
+  kernel_arg.src1_ptr = value;
+  RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
+  kernel_arg.dst_ptr = value;
+
+  kernel_arg.count = count;
+
+  std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
+  std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
+  std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
+  
+  // allocate shared memory  
+  std::cout << "allocate shared memory" << std::endl;
+  RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf));
+  RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf));
+  RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf));
+  RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf));
+
+  for (int t = testid_s; t <= testid_e; ++t) { 
+    auto name = testMngr.get_name(t);
+    auto test = testMngr.get_test(t);
+
+    std::cout << "Test" << t << ": " << name << std::endl;
+
+    // upload kernel argument
+    std::cout << "upload kernel argument" << std::endl;
+    kernel_arg.testid = t;
+    memcpy((void*)vx_host_ptr(arg_buf), &kernel_arg, sizeof(kernel_arg_t));
+    RT_CHECK(vx_copy_to_dev(arg_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
+
+    // get test arguments
+    std::cout << "get test arguments" << std::endl;
+    test->setup(num_points, (void*)vx_host_ptr(src1_buf), (void*)vx_host_ptr(src2_buf));
+    
+    // upload source buffer0
+    std::cout << "upload source buffer0" << std::endl;      
+    RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0));
+    
+    // upload source buffer1
+    std::cout << "upload source buffer1" << std::endl;      
+    RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0));
+
+    // clear destination buffer    
+    std::cout << "clear destination buffer" << std::endl;     
+    for (int i = 0; i < num_points; ++i) {
+      ((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
+    }         
+    RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
+
+    // start device
+    std::cout << "start device" << std::endl;
+    RT_CHECK(vx_start(device));
+
+    // wait for completion
+    std::cout << "wait for completion" << std::endl;
+    RT_CHECK(vx_ready_wait(device, -1));
+
+    // flush the destination buffer caches
+    std::cout << "flush the destination buffer caches" << std::endl;
+    RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
+
+    // download destination buffer
+    std::cout << "download destination buffer" << std::endl;
+    RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
+
+    // verify destination
+    std::cout << "verify test result" << std::endl;
+    int errors = test->verify(num_points, 
+                              (void*)vx_host_ptr(dst_buf), 
+                              (void*)vx_host_ptr(src1_buf), 
+                              (void*)vx_host_ptr(src2_buf));
+    if (errors != 0) {
+      std::cout << "found " << errors << " errors!" << std::endl;
+      std::cout << "FAILED!" << std::endl << std::flush;
+      cleanup();
+      exit(1);  
+    }
+    std::cout << "PASSED!" << std::endl << std::flush;
+  } 
+
+  // cleanup
+  std::cout << "cleanup" << std::endl;  
+  cleanup();
+
+  return 0;
+}
--- a/driver/tests/dogfood/kernel.c
+++ b/driver/tests/dogfood/kernel.c
@ -0,0 +1,354 @@
+#include <stdint.h>
+#include <math.h>
+#include <vx_intrinsics.h>
+#include <vx_spawn.h>
+#include "common.h"
+
+typedef void (*PFN_Kernel)(void* arg);
+
+void kernel_iadd(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count    = _arg->count;
+	int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
+	int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
+	int32_t* dst_ptr  = (int32_t*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		int32_t a = src0_ptr[offset+i];
+		int32_t b = src1_ptr[offset+i];
+		int32_t c = a + b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_imul(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count    = _arg->count;
+	int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
+	int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
+	int32_t* dst_ptr  = (int32_t*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		int32_t a = src0_ptr[offset+i];
+		int32_t b = src1_ptr[offset+i];
+		int32_t c = a * b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_idiv(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count    = _arg->count;
+	int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
+	int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
+	int32_t* dst_ptr  = (int32_t*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		int32_t a = src0_ptr[offset+i];
+		int32_t b = src1_ptr[offset+i];
+		int32_t c = a / b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_idiv_mul(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count    = _arg->count;
+	int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
+	int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
+	int32_t* dst_ptr  = (int32_t*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		int32_t a = src0_ptr[offset+i];
+		int32_t b = src1_ptr[offset+i];
+		int32_t c = a / b;
+		int32_t d = a * b;
+		int32_t e = c + d;
+		dst_ptr[offset+i] = e;
+	}
+}
+
+void kernel_fadd(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a + b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_fsub(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a - b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_fmul(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a * b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_fmadd(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a * 0.5f + b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_fmsub(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a * 0.5f - b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_fnmadd(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = -a * 0.5f - b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_fnmsub(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = -a * 0.5f + b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_fnmadd_madd(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = -a * 0.25f - b;
+		float d = a * 0.25f + b;
+		float e = c + d;
+		dst_ptr[offset+i] = e;
+	}
+}
+
+void kernel_fdiv(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a / b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_fdiv2(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a / b;
+		float d = b / a;
+		float e = c + d;
+		dst_ptr[offset+i] = e;
+	}
+}
+
+void kernel_fsqrt(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = sqrt(a) + b;
+		dst_ptr[offset+i] = c;
+	}
+}
+
+void kernel_ftoi(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	int32_t* dst_ptr  = (int32_t*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a + b;
+		int32_t d = (int32_t)c;
+		dst_ptr[offset+i] = d;
+	}
+}
+
+void kernel_ftou(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	uint32_t* dst_ptr  = (uint32_t*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		float c = a + b;
+		uint32_t d = (uint32_t)c;
+		dst_ptr[offset+i] = d;
+	}
+}
+
+void kernel_itof(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		int32_t c = (int32_t)a;
+		int32_t d = (int32_t)b;
+		int32_t e = c + d;
+		float f = (float)e;
+		dst_ptr[offset+i] = f;
+	}
+}
+
+void kernel_utof(void* arg) {
+	struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
+	uint32_t count  = _arg->count;
+	float* src0_ptr = (float*)_arg->src0_ptr;
+	float* src1_ptr = (float*)_arg->src1_ptr;
+	float* dst_ptr  = (float*)_arg->dst_ptr;	
+	uint32_t offset = vx_thread_gid() * count;
+
+	for (uint32_t i = 0; i < count; ++i) {
+		float a = src0_ptr[offset+i];
+		float b = src1_ptr[offset+i];
+		uint32_t c = (uint32_t)a;
+		uint32_t d = (uint32_t)b;
+		uint32_t e = c + d;
+		float f = (float)e;
+		dst_ptr[offset+i] = f;
+	}
+}
+
+static const PFN_Kernel sc_tests[] = {
+	kernel_iadd,
+	kernel_imul,
+	kernel_idiv,
+	kernel_idiv_mul,
+	kernel_fadd,
+	kernel_fsub,
+	kernel_fmul,
+	kernel_fmadd,
+	kernel_fmsub,
+	kernel_fnmadd,	
+	kernel_fnmsub,
+	kernel_fnmadd_madd,
+	kernel_fdiv,
+	kernel_fdiv2,
+	kernel_fsqrt,
+	kernel_ftoi,
+	kernel_ftou,
+	kernel_itof,
+	kernel_utof,
+};
+
+void main() {
+	struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
+	int num_warps = vx_num_warps();
+	int num_threads = vx_num_threads();
+	vx_spawn_warps(num_warps, num_threads, sc_tests[arg->testid], arg);
+}
--- a/driver/tests/dogfood/testcases.h
+++ b/driver/tests/dogfood/testcases.h
@ -0,0 +1,555 @@
+#pragma once
+
+#include <iostream>
+#include <math.h>
+
+class ITestCase {
+public:
+  ITestCase() {}
+  virtual ~ITestCase() {}
+
+  virtual void setup(int n, void* src1, void* src2)  = 0;  
+  virtual int verify(int n, void* dst, const void* src1, const void* src2) = 0;
+};
+
+class Test_IADD : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = n/2 + i;
+      b[i] = n/2 - i;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    auto c = (int32_t*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] + b[i]; 
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_IMUL : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = n/2 + i;
+      b[i] = n/2 - i;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    auto c = (int32_t*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] * b[i]; 
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_IDIV : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = n/2 - i;
+      b[i] = n/2 + i;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    auto c = (int32_t*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] / b[i]; 
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_IDIV_MUL : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = n/2 - i;
+      b[i] = n/2 + i;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    auto c = (int32_t*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto x = a[i] / b[i]; 
+      auto y = a[i] * b[i]; 
+      auto ref = x + y; 
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FADD : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] + b[i]; 
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FSUB : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] - b[i]; 
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FMUL : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] * b[i]; 
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FMADD : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] * 0.5f + b[i];
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FMSUB : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] * 0.5f - b[i];
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FNMADD : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = -a[i] * 0.5f - b[i];
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FNMSUB : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = -a[i] * 0.5f + b[i];
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FNMADD_MADD : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto x = -a[i] * 0.5f - b[i];
+      auto y =  a[i] * 0.5f + b[i];
+      auto ref = x + y;
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FDIV : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n - i) * 0.125f;
+      b[i] = (n + i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = a[i] / b[i];
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FDIV2 : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n - i) * 0.125f;
+      b[i] = (n + i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto x = a[i] / b[i];
+      auto y = b[i] / a[i];
+      auto ref = x + y;
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FSQRT : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.125f;
+      b[i] = (n - i) * 0.125f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto ref = sqrt(a[i]) + b[i];
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FTOI : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.5f;
+      b[i] = (n - i) * 0.5f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto x = a[i] + b[i];
+      auto ref = (int32_t)x;
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_FTOU : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = (n + i) * 0.5f;
+      b[i] = (n - i) * 0.5f;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (float*)src1;
+    auto b = (float*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto x = a[i] + b[i];
+      auto ref = (uint32_t)x;
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_ITOF : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = n/2 + i;
+      b[i] = n/2 - i;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (int32_t*)src1;
+    auto b = (int32_t*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto x = a[i] + b[i];
+      auto ref = (float)x;
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};
+
+class Test_UTOF : public ITestCase {
+public:
+
+  void setup(int n, void* src1, void* src2) override {
+    auto a = (uint32_t*)src1;
+    auto b = (uint32_t*)src2;
+    for (int i = 0; i < n; ++i) {
+      a[i] = n/2 + i;
+      b[i] = n/2 - i;
+    }
+  }
+  
+  int verify(int n, void* dst, const void* src1, const void* src2) override {
+    int errors = 0;
+    auto a = (uint32_t*)src1;
+    auto b = (uint32_t*)src2;
+    auto c = (float*)dst;
+    for (int i = 0; i < n; ++i) {
+      auto x = a[i] + b[i];
+      auto ref = (float)x;
+      if (c[i] != ref) {
+        std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl;
+        ++errors;
+      }
+    }
+    return errors;
+  }
+};