Minor update

2025-04-23 21:39:10 -04:00 · 2021-12-15 17:21:38 -05:00 · 2021-12-15 17:21:38 -05:00 · f93303bac7
commit f93303bac7
parent 71acf4eadb
5 changed files with 66 additions and 114 deletions
--- a/hw/dpi/float_dpi.cpp
+++ b/hw/dpi/float_dpi.cpp
@ -41,131 +41,131 @@ extern "C" {
 void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fadd(a, b, (*frm & 0x7), fflags);
+  *result = rv_fadd_s(a, b, (*frm & 0x7), fflags);
 }

 void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fsub(a, b, (*frm & 0x7), fflags);
+  *result = rv_fsub_s(a, b, (*frm & 0x7), fflags);
 }

 void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fmul(a, b, (*frm & 0x7), fflags);
+  *result = rv_fmul_s(a, b, (*frm & 0x7), fflags);
 }

 void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fmadd(a, b, c, (*frm & 0x7), fflags);
+  *result = rv_fmadd_s(a, b, c, (*frm & 0x7), fflags);
 }

 void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fmsub(a, b, c, (*frm & 0x7), fflags);
+  *result = rv_fmsub_s(a, b, c, (*frm & 0x7), fflags);
 }

 void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fnmadd(a, b, c, (*frm & 0x7), fflags);
+  *result = rv_fnmadd_s(a, b, c, (*frm & 0x7), fflags);
 }

 void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fnmsub(a, b, c, (*frm & 0x7), fflags);
+  *result = rv_fnmsub_s(a, b, c, (*frm & 0x7), fflags);
 }

 void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fdiv(a, b, (*frm & 0x7), fflags);
+  *result = rv_fdiv_s(a, b, (*frm & 0x7), fflags);
 }

 void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fsqrt(a, (*frm & 0x7), fflags);
+  *result = rv_fsqrt_s(a, (*frm & 0x7), fflags);
 }

 void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_ftoi(a, (*frm & 0x7), fflags);
+  *result = rv_ftoi_s(a, (*frm & 0x7), fflags);
 }

 void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_ftou(a, (*frm & 0x7), fflags);
+  *result = rv_ftou_s(a, (*frm & 0x7), fflags);
 }

 void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_itof(a, (*frm & 0x7), fflags);
+  *result = rv_itof_s(a, (*frm & 0x7), fflags);
 }

 void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_utof(a, (*frm & 0x7), fflags);
+  *result = rv_utof_s(a, (*frm & 0x7), fflags);
 }

 void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_flt(a, b, fflags);
+  *result = rv_flt_s(a, b, fflags);
 }

 void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fle(a, b, fflags);
+  *result = rv_fle_s(a, b, fflags);
 }

 void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_feq(a, b, fflags);
+  *result = rv_feq_s(a, b, fflags);
 }

 void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fmin(a, b, fflags);
+  *result = rv_fmin_s(a, b, fflags);
 }

 void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
  if (!enable) 
    return;
-  *result = rv_fmax(a, b, fflags);
+  *result = rv_fmax_s(a, b, fflags);
 }

 void dpi_fclss(bool enable, int a, int* result) {
  if (!enable) 
    return;
-  *result = rv_fclss(a);
+  *result = rv_fclss_s(a);
 }

 void dpi_fsgnj(bool enable, int a, int b, int* result) {
  if (!enable) 
    return;
-  *result = rv_fsgnj(a, b);
+  *result = rv_fsgnj_s(a, b);
 }

 void dpi_fsgnjn(bool enable, int a, int b, int* result) {
  if (!enable) 
    return;
-  *result = rv_fsgnjn(a, b);
+  *result = rv_fsgnjn_s(a, b);
 }

 void dpi_fsgnjx(bool enable, int a, int b, int* result) {
  if (!enable) 
    return;
-  *result = rv_fsgnjx(a, b);
+  *result = rv_fsgnjx_s(a, b);
 }
--- a/miscs/docker/vortex/Dockerfile
+++ b/miscs/docker/vortex/Dockerfile
@ -1,48 +0,0 @@
-# Dockerfile for setting up the vortex development environment
-
-FROM ubuntu:18.04
-
-# Install dependencies
-RUN apt update && apt install -y \
-    git build-essential g++ libfl2 \
-    libfl-dev zlibc zlib1g zlib1g-dev \
-    ccache libgoogle-perftools-dev numactl perl-doc \
-    python3 device-tree-compiler gdb
-
-# Download vortex-toolchain-prebuilt
-RUN git clone https://github.com/SantoshSrivatsan24/vortex-toolchain-prebuilt.git /tmp/vortex-toolchain-prebuilt
-
-# Copy riscv-gnu-toolchain
-RUN cd /tmp/vortex-toolchain-prebuilt/riscv-gnu-toolchain/ubuntu/bionic; \
-    cat riscv-gnu-toolchain.tar.bz2.part* > riscv-gnu-toolchain.tar.bz2; \
-    tar -xf riscv-gnu-toolchain.tar.bz2 -C /opt/; 
-
-# Copy riscv64-gnu-toolchain
-RUN cd /tmp/vortex-toolchain-prebuilt/riscv64-gnu-toolchain/ubuntu/bionic; \
-    cat riscv64-gnu-toolchain.tar.bz2.part* > riscv64-gnu-toolchain.tar.bz2; \
-    tar -xf riscv64-gnu-toolchain.tar.bz2 -C /opt/; 
-
-# Copy llvm-riscv
-RUN cd /tmp/vortex-toolchain-prebuilt/llvm-riscv/ubuntu/bionic; \
-    cat llvm-riscv.tar.bz2.part* > llvm-riscv.tar.bz2; \
-    tar -xf llvm-riscv.tar.bz2 -C /opt/; 
-
-# Copy pocl
-RUN cd /tmp/vortex-toolchain-prebuilt/pocl/ubuntu/bionic; \
-    tar -xf pocl.tar.bz2 -C /opt/; 
-
-# Copy verilator
-RUN cd /tmp/vortex-toolchain-prebuilt/verilator/ubuntu/bionic; \
-    tar -xf verilator.tar.bz2 -C /opt/; 
-
-# Set environment variables
-ENV RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
-ENV RISCV64_TOOLCHAIN_PATH=/opt/riscv64-gnu-toolchain
-ENV VERILATOR_ROOT=/opt/verilator
-ENV PATH=$PATH:/${RISCV_TOOLCHAIN_PATH}/bin:${RISCV64_TOOLCHAIN_PATH}/bin:${RISCV64_TOOLCHAIN_PATH}/riscv64-unknown-elf/bin:${VERILATOR_ROOT}/bin
-
-# Cleanup
-RUN rm -rf /tmp/vortex-toolchain-prebuilt
-
-# Set working directory
-WORKDIR /home/vortex
--- a/sim/common/bitmanip.h
+++ b/sim/common/bitmanip.h
@ -82,8 +82,8 @@ inline uint64_t sext64(uint64_t word, uint64_t width) {
  assert(width > 1);
  assert(width <= 64);
  uint64_t unity = 1;
-  uint64_t mask = (unity << width) - 0x1;
-  return ((word >> (width - 0x1)) & 0x1) ? (word | ~mask) : word;
+  uint64_t mask = (unity << width) - 1;
+  return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
 }

 inline __uint128_t sext128(__uint128_t word, uint32_t width) {
--- a/sim/simx/execute.cpp
+++ b/sim/simx/execute.cpp
@ -742,7 +742,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
      uint32_t fflags = 0;
      switch (func7) {
      case 0x00: // RV32F: FADD.S
-        rddata[t] = rv_fadd(rsdata[t][0], rsdata[t][1], frm, &fflags);
+        rddata[t] = rv_fadd_s(rsdata[t][0], rsdata[t][1], frm, &fflags);
        trace->fpu.type = FpuType::FMA;
        trace->used_fregs.set(rsrc0);
        trace->used_fregs.set(rsrc1);
@ -754,7 +754,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
        trace->used_fregs.set(rsrc1);
        break;
      case 0x04: // RV32F: FSUB.S
-        rddata[t] = rv_fsub(rsdata[t][0], rsdata[t][1], frm, &fflags);
+        rddata[t] = rv_fsub_s(rsdata[t][0], rsdata[t][1], frm, &fflags);
        trace->fpu.type = FpuType::FMA;
        trace->used_fregs.set(rsrc0);
        trace->used_fregs.set(rsrc1);
@ -766,7 +766,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
        trace->used_fregs.set(rsrc1);
        break;
      case 0x08: // RV32F: FMUL.S
-        rddata[t] = rv_fmul(rsdata[t][0], rsdata[t][1], frm, &fflags);
+        rddata[t] = rv_fmul_s(rsdata[t][0], rsdata[t][1], frm, &fflags);
        trace->fpu.type = FpuType::FMA;
        trace->used_fregs.set(rsrc0);
        trace->used_fregs.set(rsrc1);
@ -778,7 +778,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
        trace->used_fregs.set(rsrc1);
        break;
      case 0x0c: // RV32F: FDIV.S
-        rddata[t] = rv_fdiv(rsdata[t][0], rsdata[t][1], frm, &fflags);
+        rddata[t] = rv_fdiv_s(rsdata[t][0], rsdata[t][1], frm, &fflags);
        trace->fpu.type = FpuType::FDIV;
        trace->used_fregs.set(rsrc0);
        trace->used_fregs.set(rsrc1);
@ -790,7 +790,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
        trace->used_fregs.set(rsrc1);
        break;
      case 0x2c: // RV32F: FSQRT.S
-        rddata[t] = rv_fsqrt(rsdata[t][0], frm, &fflags);
+        rddata[t] = rv_fsqrt_s(rsdata[t][0], frm, &fflags);
        trace->fpu.type = FpuType::FSQRT;
        trace->used_fregs.set(rsrc0);
        break;
@ -802,13 +802,13 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
      case 0x10:
        switch (func3) {            
        case 0: // RV32F: FSGNJ.S
-          rddata[t] = rv_fsgnj(rsdata[t][0], rsdata[t][1]);
+          rddata[t] = rv_fsgnj_s(rsdata[t][0], rsdata[t][1]);
          break;          
        case 1: // RV32F: FSGNJN.S
-          rddata[t] = rv_fsgnjn(rsdata[t][0], rsdata[t][1]);
+          rddata[t] = rv_fsgnjn_s(rsdata[t][0], rsdata[t][1]);
          break;          
        case 2: // RV32F: FSGNJX.S
-          rddata[t] = rv_fsgnjx(rsdata[t][0], rsdata[t][1]);
+          rddata[t] = rv_fsgnjx_s(rsdata[t][0], rsdata[t][1]);
          break;
        }
      case 0x11:
@ -830,10 +830,10 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
      case 0x14:              
        if (func3) {
          // RV32F: FMAX.S
-          rddata[t] = rv_fmax(rsdata[t][0], rsdata[t][1], &fflags);
+          rddata[t] = rv_fmax_s(rsdata[t][0], rsdata[t][1], &fflags);
        } else {
          // RV32F: FMIN.S
-          rddata[t] = rv_fmin(rsdata[t][0], rsdata[t][1], &fflags);
+          rddata[t] = rv_fmin_s(rsdata[t][0], rsdata[t][1], &fflags);
        }
        trace->fpu.type = FpuType::FNCP;
        trace->used_fregs.set(rsrc0);
@ -855,19 +855,19 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
        switch(rsrc1) {
          case 0: 
            // RV32F: FCVT.W.S
-            rddata[t] = sext64(rv_ftoi(rsdata[t][0], frm, &fflags), 32);
+            rddata[t] = sext64(rv_ftoi_s(rsdata[t][0], frm, &fflags), 32);
            break;
          case 1:
            // RV32F: FCVT.WU.S
-            rddata[t] = sext64(rv_ftou(rsdata[t][0], frm, &fflags), 32);
+            rddata[t] = sext64(rv_ftou_s(rsdata[t][0], frm, &fflags), 32);
            break;
          case 2:
            // RV64F: FCVT.L.S
-            rddata[t] = rv_ftol(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_ftol_s(rsdata[t][0], frm, &fflags);
            break;
          case 3:
            // RV64F: FCVT.LU.S
-            rddata[t] = rv_ftolu(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_ftolu_s(rsdata[t][0], frm, &fflags);
            break;
        }
        trace->fpu.type = FpuType::FCVT;
@ -898,7 +898,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
      case 0x70:      
        if (func3) {
          // RV32F: FCLASS.S
-          rddata[t] = rv_fclss(rsdata[t][0]);
+          rddata[t] = rv_fclss_s(rsdata[t][0]);
        } else {          
          // RV32F: FMV.X.W
          rddata[t] = rsdata[t][0];
@ -908,7 +908,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
        break;
      case 0x71:      
        if (func3) {
-          // RV32D: FCLASS.S
+          // RV32D: FCLASS.D
          rddata[t] = rv_fclss_d(rsdata[t][0]);
        } else {          
          // RV64D: FMV.X.D
@ -921,15 +921,15 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
        switch(func3) {              
        case 0:
          // RV32F: FLE.S
-          rddata[t] = rv_fle(rsdata[t][0], rsdata[t][1], &fflags);    
+          rddata[t] = rv_fle_s(rsdata[t][0], rsdata[t][1], &fflags);    
          break;              
        case 1:
          // RV32F: FLT.S
-          rddata[t] = rv_flt(rsdata[t][0], rsdata[t][1], &fflags);
+          rddata[t] = rv_flt_s(rsdata[t][0], rsdata[t][1], &fflags);
          break;              
        case 2:
          // RV32F: FEQ.S
-          rddata[t] = rv_feq(rsdata[t][0], rsdata[t][1], &fflags);
+          rddata[t] = rv_feq_s(rsdata[t][0], rsdata[t][1], &fflags);
          break;
        } 
        trace->fpu.type = FpuType::FNCP;
@ -959,19 +959,19 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
        switch(rsrc1) {
          case 0: 
            // RV32F: FCVT.S.W
-            rddata[t] = rv_itof(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_itof_s(rsdata[t][0], frm, &fflags);
            break;
          case 1:
            // RV32F: FCVT.S.WU
-            rddata[t] = rv_utof(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_utof_s(rsdata[t][0], frm, &fflags);
            break;
          case 2:
            // RV64F: FCVT.S.L
-            rddata[t] = rv_ltof(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_ltof_s(rsdata[t][0], frm, &fflags);
            break;
          case 3:
            // RV64F: FCVT.S.LU
-            rddata[t] = rv_lutof(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_lutof_s(rsdata[t][0], frm, &fflags);
            break;
        }
        trace->fpu.type = FpuType::FCVT;
@ -1030,7 +1030,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
          rddata[t] = rv_fmadd_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
        else
          // RV32F: FMADD.S
-          rddata[t] = rv_fmadd(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
+          rddata[t] = rv_fmadd_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
        break;
      case FMSUB:
        if (func2)
@ -1038,7 +1038,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
          rddata[t] = rv_fmsub_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
        else 
          // RV32F: FMSUB.S
-          rddata[t] = rv_fmsub(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
+          rddata[t] = rv_fmsub_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
        break;
      case FMNMADD:
        if (func2)
@ -1046,7 +1046,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
          rddata[t] = rv_fnmadd_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
        else
          // RV32F: FNMADD.S
-          rddata[t] = rv_fnmadd(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
+          rddata[t] = rv_fnmadd_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
        break; 
      case FMNMSUB:
        if (func2)
@ -1054,7 +1054,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
          rddata[t] = rv_fnmsub_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
        else
          // RV32F: FNMSUB.S
-          rddata[t] = rv_fnmsub(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
+          rddata[t] = rv_fnmsub_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
        break;
      default:
        break;
--- a/tests/riscv/isa/ramulator.ddr4.log
+++ b/tests/riscv/isa/ramulator.ddr4.log
@ -1,19 +1,19 @@
               ramulator.active_cycles_0                  76                                      # Total active cycles for level _0
                 ramulator.busy_cycles_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0
            ramulator.serving_requests_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0
-    ramulator.average_serving_requests_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0
+    ramulator.average_serving_requests_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0
             ramulator.active_cycles_0_0                  76                                      # Total active cycles for level _0_0
               ramulator.busy_cycles_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0
          ramulator.serving_requests_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0
-  ramulator.average_serving_requests_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0
+  ramulator.average_serving_requests_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0
           ramulator.active_cycles_0_0_0                  76                                      # Total active cycles for level _0_0_0
             ramulator.busy_cycles_0_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0
        ramulator.serving_requests_0_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
-ramulator.average_serving_requests_0_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
+ramulator.average_serving_requests_0_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
         ramulator.active_cycles_0_0_0_0                  76                                      # Total active cycles for level _0_0_0_0
           ramulator.busy_cycles_0_0_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_0
      ramulator.serving_requests_0_0_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
-ramulator.average_serving_requests_0_0_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
+ramulator.average_serving_requests_0_0_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
         ramulator.active_cycles_0_0_0_1                   0                                      # Total active cycles for level _0_0_0_1
           ramulator.busy_cycles_0_0_0_1                   0                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_1
      ramulator.serving_requests_0_0_0_1                   0                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_1
@ -106,9 +106,9 @@ ramulator.write_row_conflicts_channel_0_core                   0
      ramulator.useless_activates_0_core                   0                                      # Number of useless activations. E.g, ACT -> PRE w/o RD or WR
            ramulator.read_latency_avg_0           26.333333                                      # The average memory latency cycles (in memory time domain) per request for all read requests in this channel
            ramulator.read_latency_sum_0                  79                                      # The memory latency cycles (in memory time domain) sum for all read requests in this channel
-        ramulator.req_queue_length_avg_0            0.046529                                      # Average of read and write queue length per memory cycle per channel.
+        ramulator.req_queue_length_avg_0            0.044681                                      # Average of read and write queue length per memory cycle per channel.
        ramulator.req_queue_length_sum_0                  63                                      # Sum of read and write queue length per memory cycle per channel.
-   ramulator.read_req_queue_length_avg_0            0.046529                                      # Read queue length average per memory cycle per channel.
+   ramulator.read_req_queue_length_avg_0            0.044681                                      # Read queue length average per memory cycle per channel.
   ramulator.read_req_queue_length_sum_0                  63                                      # Read queue length sum per memory cycle per channel.
  ramulator.write_req_queue_length_avg_0            0.000000                                      # Write queue length average per memory cycle per channel.
  ramulator.write_req_queue_length_sum_0                   0                                      # Write queue length sum per memory cycle per channel.
@ -127,19 +127,19 @@ ramulator.write_row_conflicts_channel_0_core                   0
               ramulator.active_cycles_1                  76                                      # Total active cycles for level _1
                 ramulator.busy_cycles_1                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1
            ramulator.serving_requests_1                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1
-    ramulator.average_serving_requests_1            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1
+    ramulator.average_serving_requests_1            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1
             ramulator.active_cycles_1_0                  76                                      # Total active cycles for level _1_0
               ramulator.busy_cycles_1_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0
          ramulator.serving_requests_1_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0
-  ramulator.average_serving_requests_1_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0
+  ramulator.average_serving_requests_1_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0
           ramulator.active_cycles_1_0_0                  76                                      # Total active cycles for level _1_0_0
             ramulator.busy_cycles_1_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0
        ramulator.serving_requests_1_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
-ramulator.average_serving_requests_1_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
+ramulator.average_serving_requests_1_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
         ramulator.active_cycles_1_0_0_0                  76                                      # Total active cycles for level _1_0_0_0
           ramulator.busy_cycles_1_0_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_0
      ramulator.serving_requests_1_0_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
-ramulator.average_serving_requests_1_0_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
+ramulator.average_serving_requests_1_0_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
         ramulator.active_cycles_1_0_0_1                   0                                      # Total active cycles for level _1_0_0_1
           ramulator.busy_cycles_1_0_0_1                   0                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_1
      ramulator.serving_requests_1_0_0_1                   0                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_1
@ -232,9 +232,9 @@ ramulator.write_row_conflicts_channel_1_core                   0
      ramulator.useless_activates_1_core                   0                                      # Number of useless activations. E.g, ACT -> PRE w/o RD or WR
            ramulator.read_latency_avg_1           26.333333                                      # The average memory latency cycles (in memory time domain) per request for all read requests in this channel
            ramulator.read_latency_sum_1                  79                                      # The memory latency cycles (in memory time domain) sum for all read requests in this channel
-        ramulator.req_queue_length_avg_1            0.046529                                      # Average of read and write queue length per memory cycle per channel.
+        ramulator.req_queue_length_avg_1            0.044681                                      # Average of read and write queue length per memory cycle per channel.
        ramulator.req_queue_length_sum_1                  63                                      # Sum of read and write queue length per memory cycle per channel.
-   ramulator.read_req_queue_length_avg_1            0.046529                                      # Read queue length average per memory cycle per channel.
+   ramulator.read_req_queue_length_avg_1            0.044681                                      # Read queue length average per memory cycle per channel.
   ramulator.read_req_queue_length_sum_1                  63                                      # Read queue length sum per memory cycle per channel.
  ramulator.write_req_queue_length_avg_1            0.000000                                      # Write queue length average per memory cycle per channel.
  ramulator.write_req_queue_length_sum_1                   0                                      # Write queue length sum per memory cycle per channel.
@ -251,7 +251,7 @@ ramulator.write_row_conflicts_channel_1_core                   0
        ramulator.record_write_conflicts                 0.0                                      # record write conflict for this core when it reaches request limit or to the end
                                     [0]                 0.0                                      # 
                 ramulator.dram_capacity          8589934592                                      # Number of bytes in simulated DRAM
-                   ramulator.dram_cycles                1354                                      # Number of DRAM cycles simulated
+                   ramulator.dram_cycles                1410                                      # Number of DRAM cycles simulated
             ramulator.incoming_requests                   6                                      # Number of incoming requests to DRAM
                 ramulator.read_requests                   6                                      # Number of incoming read requests to DRAM per core
                                     [0]                 6.0                                      # 
@ -269,8 +269,8 @@ ramulator.incoming_read_reqs_per_channel                 6.0
          ramulator.in_queue_req_num_sum                 126                                      # Sum of read/write queue length
     ramulator.in_queue_read_req_num_sum                 126                                      # Sum of read queue length
    ramulator.in_queue_write_req_num_sum                   0                                      # Sum of write queue length
-          ramulator.in_queue_req_num_avg            0.093058                                      # Average of read/write queue length per memory cycle
-     ramulator.in_queue_read_req_num_avg            0.093058                                      # Average of read queue length per memory cycle
+          ramulator.in_queue_req_num_avg            0.089362                                      # Average of read/write queue length per memory cycle
+     ramulator.in_queue_read_req_num_avg            0.089362                                      # Average of read queue length per memory cycle
    ramulator.in_queue_write_req_num_avg            0.000000                                      # Average of write queue length per memory cycle
          ramulator.record_read_requests                 0.0                                      # record read requests for this core when it reaches request limit or to the end
                                     [0]                 0.0                                      #