tabs cleanup

2025-06-27 17:01:10 -04:00 · 2024-03-13 23:19:54 -07:00 · 2024-03-13 23:19:54 -07:00 · 07c063031f
commit 07c063031f
parent a8f2bb30da
11 changed files with 1112 additions and 1112 deletions
--- a/sim/simx/cache_cluster.h
+++ b/sim/simx/cache_cluster.h
@ -19,88 +19,88 @@ namespace vortex {

 class CacheCluster : public SimObject<CacheCluster> {
 public:
-    std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
-    std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
-    SimPort<MemReq> MemReqPort;
-    SimPort<MemRsp> MemRspPort;
+	std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
+	std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
+	SimPort<MemReq> MemReqPort;
+	SimPort<MemRsp> MemRspPort;

-    CacheCluster(const SimContext& ctx, 
-                 const char* name, 
-                 uint32_t num_units, 
-                 uint32_t num_caches, 
-                 uint32_t num_requests,
-                 const CacheSim::Config& config) 
-        : SimObject(ctx, name)        
-        , CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
-        , CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
-        , MemReqPort(this)
-        , MemRspPort(this)
-        , caches_(MAX(num_caches, 0x1)) {
+	CacheCluster(const SimContext& ctx, 
+							const char* name, 
+							uint32_t num_units, 
+							uint32_t num_caches, 
+							uint32_t num_requests,
+							const CacheSim::Config& config) 
+		: SimObject(ctx, name)
+		, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
+		, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
+		, MemReqPort(this)
+		, MemRspPort(this)
+		, caches_(MAX(num_caches, 0x1)) {

-        CacheSim::Config config2(config);
-        if (0 == num_caches) {
-            num_caches = 1;
-            config2.bypass = true;
-        }
+		CacheSim::Config config2(config);
+		if (0 == num_caches) {
+			num_caches = 1;
+			config2.bypass = true;
+		}

-        char sname[100];
-        
-        std::vector<MemSwitch::Ptr> unit_arbs(num_units);
-        for (uint32_t u = 0; u < num_units; ++u) {
-            snprintf(sname, 100, "%s-unit-arb-%d", name, u);
-            unit_arbs.at(u) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
-            for (uint32_t i = 0; i < num_requests; ++i) {
-                this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
-                unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
-            }
-        }
+		char sname[100];
+		
+		std::vector<MemSwitch::Ptr> unit_arbs(num_units);
+		for (uint32_t u = 0; u < num_units; ++u) {
+			snprintf(sname, 100, "%s-unit-arb-%d", name, u);
+			unit_arbs.at(u) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
+			for (uint32_t i = 0; i < num_requests; ++i) {
+				this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
+				unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
+			}
+		}

-        std::vector<MemSwitch::Ptr> mem_arbs(config.num_inputs);
-        for (uint32_t i = 0; i < config.num_inputs; ++i) {
-            snprintf(sname, 100, "%s-mem-arb-%d", name, i);
-            mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
-            for (uint32_t u = 0; u < num_units; ++u) {              
-                unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
-                mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
-            }            
-        }
+		std::vector<MemSwitch::Ptr> mem_arbs(config.num_inputs);
+		for (uint32_t i = 0; i < config.num_inputs; ++i) {
+			snprintf(sname, 100, "%s-mem-arb-%d", name, i);
+			mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
+			for (uint32_t u = 0; u < num_units; ++u) {
+				unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
+				mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
+			}
+		}

-        snprintf(sname, 100, "%s-cache-arb", name);
-        auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
+		snprintf(sname, 100, "%s-cache-arb", name);
+		auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);

-        for (uint32_t i = 0; i < num_caches; ++i) {
-            snprintf(sname, 100, "%s-cache%d", name, i);
-            caches_.at(i) = CacheSim::Create(sname, config2);
+		for (uint32_t i = 0; i < num_caches; ++i) {
+			snprintf(sname, 100, "%s-cache%d", name, i);
+			caches_.at(i) = CacheSim::Create(sname, config2);

-            for (uint32_t j = 0; j < config.num_inputs; ++j) {
-                mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
-                caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
-            }
+			for (uint32_t j = 0; j < config.num_inputs; ++j) {
+				mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
+				caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
+			}

-            caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
-            cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
-        }
+			caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
+			cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
+		}

-        cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
-        this->MemRspPort.bind(&cache_arb->RspOut.at(0));
-    }
+		cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
+		this->MemRspPort.bind(&cache_arb->RspOut.at(0));
+	}

-    ~CacheCluster() {}
+	~CacheCluster() {}

-    void reset() {}
-    
-    void tick() {}
+	void reset() {}
+	
+	void tick() {}
+
+	CacheSim::PerfStats perf_stats() const {
+		CacheSim::PerfStats perf;
+		for (auto cache : caches_) {
+			perf += cache->perf_stats();
+		} 
+		return perf;
+	}

-    CacheSim::PerfStats perf_stats() const {
-        CacheSim::PerfStats perf;
-        for (auto cache : caches_) {
-            perf += cache->perf_stats();
-        }   
-        return perf;
-    }
-    
 private:
-    std::vector<CacheSim::Ptr> caches_;
+  std::vector<CacheSim::Ptr> caches_;
 };

 }
--- a/sim/simx/cache_sim.cpp
+++ b/sim/simx/cache_sim.cpp
--- a/sim/simx/cache_sim.h
+++ b/sim/simx/cache_sim.h
@ -20,76 +20,76 @@ namespace vortex {

 class CacheSim : public SimObject<CacheSim> {
 public:
-    struct Config {
-        bool    bypass;         // cache bypass
-        uint8_t C;              // log2 cache size
-        uint8_t L;              // log2 line size
-        uint8_t W;              // log2 word size
-        uint8_t A;              // log2 associativity
-        uint8_t B;              // log2 number of banks
-        uint8_t addr_width;     // word address bits        
-        uint8_t ports_per_bank; // number of ports per bank
-        uint8_t num_inputs;     // number of inputs
-        bool    write_through;  // is write-through
-        bool    write_reponse;  // enable write response
-        uint16_t mshr_size;     // MSHR buffer size
-        uint8_t latency;        // pipeline latency
-    };
-    
-    struct PerfStats {
-        uint64_t reads;
-        uint64_t writes;
-        uint64_t read_misses;
-        uint64_t write_misses;
-        uint64_t evictions;
-        uint64_t pipeline_stalls;
-        uint64_t bank_stalls;
-        uint64_t mshr_stalls;
-        uint64_t mem_latency;
+	struct Config {
+		bool    bypass;         // cache bypass
+		uint8_t C;              // log2 cache size
+		uint8_t L;              // log2 line size
+		uint8_t W;              // log2 word size
+		uint8_t A;              // log2 associativity
+		uint8_t B;              // log2 number of banks
+		uint8_t addr_width;     // word address bits
+		uint8_t ports_per_bank; // number of ports per bank
+		uint8_t num_inputs;     // number of inputs
+		bool    write_through;  // is write-through
+		bool    write_reponse;  // enable write response
+		uint16_t mshr_size;     // MSHR buffer size
+		uint8_t latency;        // pipeline latency
+	};
+	
+	struct PerfStats {
+		uint64_t reads;
+		uint64_t writes;
+		uint64_t read_misses;
+		uint64_t write_misses;
+		uint64_t evictions;
+		uint64_t pipeline_stalls;
+		uint64_t bank_stalls;
+		uint64_t mshr_stalls;
+		uint64_t mem_latency;

-        PerfStats() 
-            : reads(0)
-            , writes(0)
-            , read_misses(0)
-            , write_misses(0)
-            , evictions(0)
-            , pipeline_stalls(0)
-            , bank_stalls(0)
-            , mshr_stalls(0)
-            , mem_latency(0)
-        {}
+		PerfStats() 
+			: reads(0)
+			, writes(0)
+			, read_misses(0)
+			, write_misses(0)
+			, evictions(0)
+			, pipeline_stalls(0)
+			, bank_stalls(0)
+			, mshr_stalls(0)
+			, mem_latency(0)
+		{}

-        PerfStats& operator+=(const PerfStats& rhs) {
-            this->reads += rhs.reads;
-            this->writes += rhs.writes;
-            this->read_misses += rhs.read_misses;
-            this->write_misses += rhs.write_misses;
-            this->evictions += rhs.evictions;
-            this->pipeline_stalls += rhs.pipeline_stalls;
-            this->bank_stalls += rhs.bank_stalls;
-            this->mshr_stalls += rhs.mshr_stalls;
-            this->mem_latency += rhs.mem_latency;
-            return *this;
-        }
-    };
+		PerfStats& operator+=(const PerfStats& rhs) {
+			this->reads += rhs.reads;
+			this->writes += rhs.writes;
+			this->read_misses += rhs.read_misses;
+			this->write_misses += rhs.write_misses;
+			this->evictions += rhs.evictions;
+			this->pipeline_stalls += rhs.pipeline_stalls;
+			this->bank_stalls += rhs.bank_stalls;
+			this->mshr_stalls += rhs.mshr_stalls;
+			this->mem_latency += rhs.mem_latency;
+			return *this;
+		}
+	};

-    std::vector<SimPort<MemReq>> CoreReqPorts;
-    std::vector<SimPort<MemRsp>> CoreRspPorts;
-    SimPort<MemReq>              MemReqPort;
-    SimPort<MemRsp>              MemRspPort;
+	std::vector<SimPort<MemReq>> CoreReqPorts;
+	std::vector<SimPort<MemRsp>> CoreRspPorts;
+	SimPort<MemReq>              MemReqPort;
+	SimPort<MemRsp>              MemRspPort;

-    CacheSim(const SimContext& ctx, const char* name, const Config& config);
-    ~CacheSim();
+	CacheSim(const SimContext& ctx, const char* name, const Config& config);
+	~CacheSim();

-    void reset();
-    
-    void tick();
+	void reset();
+	
+	void tick();

-    const PerfStats& perf_stats() const;
-    
+	const PerfStats& perf_stats() const;
+	
 private:
-    class Impl;
-    Impl* impl_;
+	class Impl;
+	Impl* impl_;
 };

 }
--- a/sim/simx/dcrs.h
+++ b/sim/simx/dcrs.h
@ -21,25 +21,25 @@ namespace vortex {

 class BaseDCRS {
 public:
-    uint32_t read(uint32_t addr) const {
-        uint32_t state = VX_DCR_BASE_STATE(addr);
-        return states_.at(state);
-    }
+  uint32_t read(uint32_t addr) const {
+    uint32_t state = VX_DCR_BASE_STATE(addr);
+    return states_.at(state);
+  }

-    void write(uint32_t addr, uint32_t value) {
-        uint32_t state = VX_DCR_BASE_STATE(addr);
-        states_.at(state) = value;
-    }
+	void write(uint32_t addr, uint32_t value) {
+		uint32_t state = VX_DCR_BASE_STATE(addr);
+		states_.at(state) = value;
+	}

-private:    
-    std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
+private:
+  std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
 };

 class DCRS {
 public:
-    void write(uint32_t addr, uint32_t value);
-    
-    BaseDCRS base_dcrs;
+  void write(uint32_t addr, uint32_t value);
+
+  BaseDCRS base_dcrs;
 };

 }
--- a/sim/simx/dispatcher.h
+++ b/sim/simx/dispatcher.h
@ -21,120 +21,120 @@ namespace vortex {

 class Dispatcher : public SimObject<Dispatcher> {
 public:
-    std::vector<SimPort<instr_trace_t*>> Outputs;
+	std::vector<SimPort<instr_trace_t*>> Outputs;

-    Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes) 
-        : SimObject<Dispatcher>(ctx, "Dispatcher") 
-        , Outputs(ISSUE_WIDTH, this)
-        , Inputs_(ISSUE_WIDTH, this)
-        , arch_(arch)
-        , queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
-        , buf_size_(buf_size)        
-        , block_size_(block_size)        
-        , num_lanes_(num_lanes)        
-        , batch_count_(ISSUE_WIDTH / block_size)
-        , pid_count_(arch.num_threads() / num_lanes)
-        , batch_idx_(0)
-        , start_p_(block_size, 0)
-    {}
-    
-    virtual ~Dispatcher() {}
+	Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes) 
+		: SimObject<Dispatcher>(ctx, "Dispatcher") 
+		, Outputs(ISSUE_WIDTH, this)
+		, Inputs_(ISSUE_WIDTH, this)
+		, arch_(arch)
+		, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
+		, buf_size_(buf_size)
+		, block_size_(block_size)
+		, num_lanes_(num_lanes)
+		, batch_count_(ISSUE_WIDTH / block_size)
+		, pid_count_(arch.num_threads() / num_lanes)
+		, batch_idx_(0)
+		, start_p_(block_size, 0)
+	{}
+	
+	virtual ~Dispatcher() {}

-    virtual void reset() {
-        batch_idx_ = 0;
-        for (uint32_t b = 0; b < block_size_; ++b) {
-            start_p_.at(b) = 0;
-        }
-    }
+	virtual void reset() {
+		batch_idx_ = 0;
+		for (uint32_t b = 0; b < block_size_; ++b) {
+			start_p_.at(b) = 0;
+		}
+	}

-    virtual void tick() {
-        for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
-            auto& queue = queues_.at(i);
-            if (queue.empty())
-                continue;
-            auto trace = queue.front();
-            Inputs_.at(i).push(trace, 1);
-            queue.pop();
-        }
+	virtual void tick() {
+		for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
+			auto& queue = queues_.at(i);
+			if (queue.empty())
+				continue;
+			auto trace = queue.front();
+			Inputs_.at(i).push(trace, 1);
+			queue.pop();
+		}

-        uint32_t block_sent = 0;
-        for (uint32_t b = 0; b < block_size_; ++b) {
-            uint32_t i = batch_idx_ * block_size_ + b;
-            auto& input = Inputs_.at(i);            
-            if (input.empty()) {
-                ++block_sent;
-                continue;
-            }
-            auto& output = Outputs.at(i);
-            auto trace = input.front();
-            auto new_trace = trace;
-            if (pid_count_ != 1) {
-                auto start_p = start_p_.at(b);
-                if (start_p == -1) {
-                    ++block_sent;
-                    continue;       
-                }             
-                int start(-1), end(-1);
-                for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
-                    if (!trace->tmask.test(j))
-                        continue;
-                    if (start == -1)
-                        start = j;
-                    end = j;
-                }                
-                start /= num_lanes_;
-                end /= num_lanes_;                
-                if (start != end) {
-                    new_trace = new instr_trace_t(*trace);
-                    new_trace->eop = false;
-                    start_p_.at(b) = start + 1;
-                } else {
-                    start_p_.at(b) = -1;
-                    input.pop();
-                    ++block_sent;
-                }
-                new_trace->pid = start;
-                new_trace->sop = (0 == start_p);
-                ThreadMask tmask;
-                for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
-                    tmask[j] = trace->tmask[j];
-                }
-                new_trace->tmask = tmask;                
-            } else {
-                new_trace->pid = 0;
-                input.pop();
-                ++block_sent;
-            }
-            DT(3, "pipeline-dispatch: " << *new_trace);
-            output.push(new_trace, 1);
-        }
-        if (block_sent == block_size_) {
-            batch_idx_ = (batch_idx_ + 1) % batch_count_;
-            for (uint32_t b = 0; b < block_size_; ++b) {
-                start_p_.at(b) = 0;
-            }
-        }
-    };
+		uint32_t block_sent = 0;
+		for (uint32_t b = 0; b < block_size_; ++b) {
+			uint32_t i = batch_idx_ * block_size_ + b;
+			auto& input = Inputs_.at(i);
+			if (input.empty()) {
+				++block_sent;
+				continue;
+			}
+			auto& output = Outputs.at(i);
+			auto trace = input.front();
+			auto new_trace = trace;
+			if (pid_count_ != 1) {
+				auto start_p = start_p_.at(b);
+				if (start_p == -1) {
+					++block_sent;
+					continue; 
+				} 
+				int start(-1), end(-1);
+				for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
+					if (!trace->tmask.test(j))
+						continue;
+					if (start == -1)
+						start = j;
+					end = j;
+				}
+				start /= num_lanes_;
+				end /= num_lanes_;
+				if (start != end) {
+					new_trace = new instr_trace_t(*trace);
+					new_trace->eop = false;
+					start_p_.at(b) = start + 1;
+				} else {
+					start_p_.at(b) = -1;
+					input.pop();
+					++block_sent;
+				}
+				new_trace->pid = start;
+				new_trace->sop = (0 == start_p);
+				ThreadMask tmask;
+				for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
+					tmask[j] = trace->tmask[j];
+				}
+				new_trace->tmask = tmask;
+			} else {
+				new_trace->pid = 0;
+				input.pop();
+				++block_sent;
+			}
+			DT(3, "pipeline-dispatch: " << *new_trace);
+			output.push(new_trace, 1);
+		}
+		if (block_sent == block_size_) {
+			batch_idx_ = (batch_idx_ + 1) % batch_count_;
+			for (uint32_t b = 0; b < block_size_; ++b) {
+				start_p_.at(b) = 0;
+			}
+		}
+	};

-    bool push(uint32_t issue_index, instr_trace_t* trace) {
-        auto& queue = queues_.at(issue_index);
-        if (queue.size() >= buf_size_)
-            return false;
-        queue.push(trace);        
-        return true;
-    }
+	bool push(uint32_t issue_index, instr_trace_t* trace) {
+		auto& queue = queues_.at(issue_index);
+		if (queue.size() >= buf_size_)
+			return false;
+		queue.push(trace);
+		return true;
+	}

 private:
-    std::vector<SimPort<instr_trace_t*>> Inputs_;
-    const Arch& arch_;
-    std::vector<std::queue<instr_trace_t*>> queues_;
-    uint32_t buf_size_;
-    uint32_t block_size_;
-    uint32_t num_lanes_;
-    uint32_t batch_count_;
-    uint32_t pid_count_;
-    uint32_t batch_idx_;
-    std::vector<int> start_p_;
+	std::vector<SimPort<instr_trace_t*>> Inputs_;
+	const Arch& arch_;
+	std::vector<std::queue<instr_trace_t*>> queues_;
+	uint32_t buf_size_;
+	uint32_t block_size_;
+	uint32_t num_lanes_;
+	uint32_t batch_count_;
+	uint32_t pid_count_;
+	uint32_t batch_idx_;
+	std::vector<int> start_p_;
 };

 }
--- a/sim/simx/ibuffer.h
+++ b/sim/simx/ibuffer.h
@ -19,39 +19,39 @@
 namespace vortex {

 class IBuffer {
-public:    
-    IBuffer(uint32_t size) 
-        : capacity_(size)
-    {}
+public:
+	IBuffer(uint32_t size) 
+		: capacity_(size)
+	{}

-    bool empty() const {
-        return entries_.empty();
-    }
-    
-    bool full() const {
-        return (entries_.size() == capacity_);
-    }
+	bool empty() const {
+		return entries_.empty();
+	}
+	
+	bool full() const {
+		return (entries_.size() == capacity_);
+	}

-    instr_trace_t* top() const {
-        return entries_.front();
-    }
+	instr_trace_t* top() const {
+		return entries_.front();
+	}

-    void push(instr_trace_t* trace) {
-        entries_.emplace(trace);
-    }
+	void push(instr_trace_t* trace) {
+		entries_.emplace(trace);
+	}

-    void pop() {
-        return entries_.pop();
-    }
+	void pop() {
+		return entries_.pop();
+	}

-    void clear() {
-        std::queue<instr_trace_t*> empty;
-        std::swap(entries_, empty );
-    }
+	void clear() {
+		std::queue<instr_trace_t*> empty;
+		std::swap(entries_, empty );
+	}

 private:
-    std::queue<instr_trace_t*> entries_;
-    uint32_t capacity_;
+	std::queue<instr_trace_t*> entries_;
+	uint32_t capacity_;
 };

 }
--- a/sim/simx/local_mem.cpp
+++ b/sim/simx/local_mem.cpp
@ -21,118 +21,118 @@ using namespace vortex;

 class LocalMem::Impl {
 protected:
-    LocalMem* simobject_;
-    Config    config_;
-    RAM       ram_;
-    uint32_t  bank_sel_addr_start_;
-    uint32_t  bank_sel_addr_end_;
-    PerfStats perf_stats_;
+	LocalMem* simobject_;
+	Config    config_;
+	RAM       ram_;
+	uint32_t  bank_sel_addr_start_;
+	uint32_t  bank_sel_addr_end_;
+	PerfStats perf_stats_;

-    uint64_t to_local_addr(uint64_t addr) {
-        uint32_t total_lines = config_.capacity / config_.line_size;        
-        uint32_t line_bits = log2ceil(total_lines);
-        uint32_t offset = bit_getw(addr, 0, line_bits-1);
-        return offset;
-    }
+	uint64_t to_local_addr(uint64_t addr) {
+		uint32_t total_lines = config_.capacity / config_.line_size;
+		uint32_t line_bits = log2ceil(total_lines);
+		uint32_t offset = bit_getw(addr, 0, line_bits-1);
+		return offset;
+	}

 public:
-    Impl(LocalMem* simobject, const Config& config) 
-        : simobject_(simobject)
-        , config_(config)
-        , ram_(config.capacity)
-        , bank_sel_addr_start_(0)
-        , bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
-    {}    
-    
-    virtual ~Impl() {}
+	Impl(LocalMem* simobject, const Config& config) 
+		: simobject_(simobject)
+		, config_(config)
+		, ram_(config.capacity)
+		, bank_sel_addr_start_(0)
+		, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
+	{}
+	
+	virtual ~Impl() {}

-    void reset() {
-        perf_stats_ = PerfStats();
-    }
+	void reset() {
+		perf_stats_ = PerfStats();
+	}

-    void read(void* data, uint64_t addr, uint32_t size) {
-        auto s_addr = to_local_addr(addr);        
-        DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
-        ram_.read(data, s_addr, size);
-    }
+	void read(void* data, uint64_t addr, uint32_t size) {
+		auto s_addr = to_local_addr(addr);
+		DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
+		ram_.read(data, s_addr, size);
+	}

-    void write(const void* data, uint64_t addr, uint32_t size) {
-        auto s_addr = to_local_addr(addr);        
-        DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
-        ram_.write(data, s_addr, size);
-    }
+	void write(const void* data, uint64_t addr, uint32_t size) {
+		auto s_addr = to_local_addr(addr);
+		DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
+		ram_.write(data, s_addr, size);
+	}

-    void tick() {
-        std::vector<bool> in_used_banks(config_.num_banks);
-        for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
-            auto& core_req_port = simobject_->Inputs.at(req_id);            
-            if (core_req_port.empty())
-                continue;
+	void tick() {
+		std::vector<bool> in_used_banks(config_.num_banks);
+		for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
+			auto& core_req_port = simobject_->Inputs.at(req_id);
+			if (core_req_port.empty())
+				continue;

-            auto& core_req = core_req_port.front();
+			auto& core_req = core_req_port.front();

-            uint32_t bank_id = 0;
-            if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
-                bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
-            }
+			uint32_t bank_id = 0;
+			if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
+				bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
+			}

-            // bank conflict check
-            if (in_used_banks.at(bank_id)) {
-                ++perf_stats_.bank_stalls;
-                continue;
-            }
+			// bank conflict check
+			if (in_used_banks.at(bank_id)) {
+				++perf_stats_.bank_stalls;
+				continue;
+			}

-            in_used_banks.at(bank_id) = true;
+			in_used_banks.at(bank_id) = true;

-            if (!core_req.write || config_.write_reponse) {
-                // send response
-                MemRsp core_rsp{core_req.tag, core_req.cid};
-                simobject_->Outputs.at(req_id).push(core_rsp, 1);
-            }
+			if (!core_req.write || config_.write_reponse) {
+				// send response
+				MemRsp core_rsp{core_req.tag, core_req.cid};
+				simobject_->Outputs.at(req_id).push(core_rsp, 1);
+			}

-            // update perf counters
-            perf_stats_.reads += !core_req.write;            
-            perf_stats_.writes += core_req.write;
+			// update perf counters
+			perf_stats_.reads += !core_req.write;
+			perf_stats_.writes += core_req.write;

-            // remove input
-            core_req_port.pop();
-        }
-    }
+			// remove input
+			core_req_port.pop();
+		}
+	}

-    const PerfStats& perf_stats() const { 
-        return perf_stats_; 
-    }
+	const PerfStats& perf_stats() const { 
+		return perf_stats_; 
+	}
 };

 ///////////////////////////////////////////////////////////////////////////////

 LocalMem::LocalMem(const SimContext& ctx, const char* name, const Config& config) 
-    : SimObject<LocalMem>(ctx, name)   
-    , Inputs(config.num_reqs, this)
-    , Outputs(config.num_reqs, this)
-    , impl_(new Impl(this, config))
+	: SimObject<LocalMem>(ctx, name) 
+	, Inputs(config.num_reqs, this)
+	, Outputs(config.num_reqs, this)
+	, impl_(new Impl(this, config))
 {}

 LocalMem::~LocalMem() {
-    delete impl_;
+  delete impl_;
 }

 void LocalMem::reset() {
-    impl_->reset();
+  impl_->reset();
 }

 void LocalMem::read(void* data, uint64_t addr, uint32_t size) {
-    impl_->read(data, addr, size);
+  impl_->read(data, addr, size);
 }

 void LocalMem::write(const void* data, uint64_t addr, uint32_t size) {
-    impl_->write(data, addr, size);
+  impl_->write(data, addr, size);
 }

 void LocalMem::tick() {
-    impl_->tick();
+  impl_->tick();
 }

 const LocalMem::PerfStats& LocalMem::perf_stats() const {
-    return impl_->perf_stats();
+  return impl_->perf_stats();
 }
--- a/sim/simx/mem_sim.cpp
+++ b/sim/simx/mem_sim.cpp
@ -32,105 +32,105 @@ using namespace vortex;

 class MemSim::Impl {
 private:
-    MemSim* simobject_;
-    Config config_;
-    PerfStats perf_stats_;
-    ramulator::Gem5Wrapper* dram_;
+	MemSim* simobject_;
+	Config config_;
+	PerfStats perf_stats_;
+	ramulator::Gem5Wrapper* dram_;

 public:

-    Impl(MemSim* simobject, const Config& config) 
-        : simobject_(simobject)
-        , config_(config)
-    {
-        ramulator::Config ram_config;
-        ram_config.add("standard", "DDR4");
-        ram_config.add("channels", std::to_string(config.channels));
-        ram_config.add("ranks", "1");
-        ram_config.add("speed", "DDR4_2400R");
-        ram_config.add("org", "DDR4_4Gb_x8");
-        ram_config.add("mapping", "defaultmapping");
-        ram_config.set_core_num(config.num_cores);
-        dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
-        Stats::statlist.output("ramulator.ddr4.log");
-    }
+	Impl(MemSim* simobject, const Config& config) 
+		: simobject_(simobject)
+		, config_(config)
+	{
+		ramulator::Config ram_config;
+		ram_config.add("standard", "DDR4");
+		ram_config.add("channels", std::to_string(config.channels));
+		ram_config.add("ranks", "1");
+		ram_config.add("speed", "DDR4_2400R");
+		ram_config.add("org", "DDR4_4Gb_x8");
+		ram_config.add("mapping", "defaultmapping");
+		ram_config.set_core_num(config.num_cores);
+		dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
+		Stats::statlist.output("ramulator.ddr4.log");
+	}

-    ~Impl() {
-        dram_->finish();
-        Stats::statlist.printall();
-        delete dram_;
-    }
+	~Impl() {
+		dram_->finish();
+		Stats::statlist.printall();
+		delete dram_;
+	}

-    const PerfStats& perf_stats() const {
-        return perf_stats_;
-    }
+	const PerfStats& perf_stats() const {
+		return perf_stats_;
+	}

-    void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
-        if (req.type == ramulator::Request::Type::WRITE)
-            return;
-        MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
-        simobject_->MemRspPort.push(mem_rsp, 1);
-        DT(3, simobject_->name() << "-" << mem_rsp);
-    }
+	void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
+		if (req.type == ramulator::Request::Type::WRITE)
+			return;
+		MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
+		simobject_->MemRspPort.push(mem_rsp, 1);
+		DT(3, simobject_->name() << "-" << mem_rsp);
+	}

-    void reset() {
-        perf_stats_ = PerfStats();
-    }
+	void reset() {
+		perf_stats_ = PerfStats();
+	}

-    void tick() {
-        if (MEM_CYCLE_RATIO > 0) { 
-            auto cycle = SimPlatform::instance().cycles();
-            if ((cycle % MEM_CYCLE_RATIO) == 0)
-                dram_->tick();
-        } else {
-            for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
-                dram_->tick();            
-        }
-              
-        if (simobject_->MemReqPort.empty())
-            return;
-        
-        auto& mem_req = simobject_->MemReqPort.front();
+	void tick() {
+		if (MEM_CYCLE_RATIO > 0) {
+			auto cycle = SimPlatform::instance().cycles();
+			if ((cycle % MEM_CYCLE_RATIO) == 0)
+				dram_->tick();
+		} else {
+			for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
+				dram_->tick();
+		}
+					
+		if (simobject_->MemReqPort.empty())
+			return;
+		
+		auto& mem_req = simobject_->MemReqPort.front();

-        ramulator::Request dram_req( 
-            mem_req.addr,
-            mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
-            std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
-            mem_req.cid
-        );
+		ramulator::Request dram_req( 
+			mem_req.addr,
+			mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
+			std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
+			mem_req.cid
+		);

-        if (!dram_->send(dram_req))
-            return;
-        
-        if (mem_req.write) {
-            ++perf_stats_.writes;
-        } else {
-            ++perf_stats_.reads;
-        }
-        
-        DT(3, simobject_->name() << "-" << mem_req);
+		if (!dram_->send(dram_req))
+			return;
+		
+		if (mem_req.write) {
+			++perf_stats_.writes;
+		} else {
+			++perf_stats_.reads;
+		}
+		
+		DT(3, simobject_->name() << "-" << mem_req);

-        simobject_->MemReqPort.pop();        
-    }
+		simobject_->MemReqPort.pop();
+	}
 };

 ///////////////////////////////////////////////////////////////////////////////

 MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) 
-    : SimObject<MemSim>(ctx, name)
-    , MemReqPort(this) 
-    , MemRspPort(this)
-    , impl_(new Impl(this, config))
+	: SimObject<MemSim>(ctx, name)
+	, MemReqPort(this) 
+	, MemRspPort(this)
+	, impl_(new Impl(this, config))
 {}

 MemSim::~MemSim() {
-    delete impl_;
+  delete impl_;
 }

 void MemSim::reset() {
-    impl_->reset();
+  impl_->reset();
 }

 void MemSim::tick() {
-    impl_->tick();
+  impl_->tick();
 }
--- a/sim/simx/mem_sim.h
+++ b/sim/simx/mem_sim.h
@ -20,36 +20,36 @@ namespace vortex {

 class MemSim : public SimObject<MemSim>{
 public:
-    struct Config {        
-        uint32_t channels;      
-        uint32_t num_cores;
-    };
+	struct Config {
+		uint32_t channels;
+		uint32_t num_cores;
+	};

-    struct PerfStats {
-        uint64_t reads;
-        uint64_t writes;
+	struct PerfStats {
+		uint64_t reads;
+		uint64_t writes;

-        PerfStats() 
-            : reads(0)
-            , writes(0)
-        {}
-    };
+		PerfStats() 
+			: reads(0)
+			, writes(0)
+		{}
+	};

-    SimPort<MemReq> MemReqPort;
-    SimPort<MemRsp> MemRspPort;
+	SimPort<MemReq> MemReqPort;
+	SimPort<MemRsp> MemRspPort;

-    MemSim(const SimContext& ctx, const char* name, const Config& config);
-    ~MemSim();
+	MemSim(const SimContext& ctx, const char* name, const Config& config);
+	~MemSim();

-    void reset();
+	void reset();

-    void tick();
+	void tick();

-    const PerfStats& perf_stats() const;
-    
+	const PerfStats& perf_stats() const;
+	
 private:
-    class Impl;
-    Impl* impl_;
+	class Impl;
+	Impl* impl_;
 };

 };
--- a/sim/simx/operand.h
+++ b/sim/simx/operand.h
@ -23,37 +23,37 @@ public:
    SimPort<instr_trace_t*> Output;

    Operand(const SimContext& ctx) 
-        : SimObject<Operand>(ctx, "Operand") 
-        , Input(this)
-        , Output(this)
+			: SimObject<Operand>(ctx, "Operand") 
+			, Input(this)
+			, Output(this)
    {}
-    
+
    virtual ~Operand() {}

    virtual void reset() {}

    virtual void tick() {
-        if (Input.empty())
-            return;
-        auto trace = Input.front();
+			if (Input.empty())
+				return;
+			auto trace = Input.front();

-        int delay = 1;
-        for (int i = 0; i < MAX_NUM_REGS; ++i) {
-            bool is_iregs = trace->used_iregs.test(i);
-            bool is_fregs = trace->used_fregs.test(i);
-            bool is_vregs = trace->used_vregs.test(i);
-            if (is_iregs || is_fregs || is_vregs) {
-                if (is_iregs && i == 0)
-                    continue;
-                ++delay;
-            }
-        }
+			int delay = 1;
+			for (int i = 0; i < MAX_NUM_REGS; ++i) {
+				bool is_iregs = trace->used_iregs.test(i);
+				bool is_fregs = trace->used_fregs.test(i);
+				bool is_vregs = trace->used_vregs.test(i);
+				if (is_iregs || is_fregs || is_vregs) {
+					if (is_iregs && i == 0)
+						continue;
+					++delay;
+				}
+			}

-        Output.push(trace, delay);
-        
-        DT(3, "pipeline-operands: " << *trace);
+			Output.push(trace, delay);
+			
+			DT(3, "pipeline-operands: " << *trace);

-        Input.pop();
+			Input.pop();
    };
 };

--- a/sim/simx/scoreboard.h
+++ b/sim/simx/scoreboard.h
@ -22,98 +22,98 @@ namespace vortex {
 class Scoreboard {
 public:

-    struct reg_use_t {
-        RegType  reg_type;
-        uint32_t reg_id; 
-        FUType  fu_type;
-        SfuType  sfu_type;        
-        uint64_t uuid;
-    };
-        
-    Scoreboard(const Arch &arch) 
-        : in_use_iregs_(arch.num_warps())
-        , in_use_fregs_(arch.num_warps())
-    {
-        this->clear();
-    }
+	struct reg_use_t {
+		RegType  reg_type;
+		uint32_t reg_id; 
+		FUType  fu_type;
+		SfuType  sfu_type;
+		uint64_t uuid;
+	};
+			
+	Scoreboard(const Arch &arch) 
+		: in_use_iregs_(arch.num_warps())
+		, in_use_fregs_(arch.num_warps())
+	{
+		this->clear();
+	}

-    void clear() {
-        for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
-            in_use_iregs_.at(i).reset();
-            in_use_fregs_.at(i).reset();
-        }
-        owners_.clear();
-    }
+	void clear() {
+		for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
+			in_use_iregs_.at(i).reset();
+			in_use_fregs_.at(i).reset();
+		}
+		owners_.clear();
+	}

-    bool in_use(instr_trace_t* trace) const {
-        return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0 
-            || (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
-    }
+	bool in_use(instr_trace_t* trace) const {
+		return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0 
+				|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
+	}

-    std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
-        std::vector<reg_use_t> out;  
-        
-        auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
-        auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid);
+	std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
+		std::vector<reg_use_t> out;
+		
+		auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
+		auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid);

-        for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
-            if (used_iregs.test(r)) {
-                uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
-                auto owner = owners_.at(tag);
-                out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
-            }
-        }
+		for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
+			if (used_iregs.test(r)) {
+				uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
+				auto owner = owners_.at(tag);
+				out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
+			}
+		}

-        for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
-            if (used_fregs.test(r)) {
-                uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
-                auto owner = owners_.at(tag);
-                out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
-            }
-        }
+		for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
+			if (used_fregs.test(r)) {
+				uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
+				auto owner = owners_.at(tag);
+				out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
+			}
+		}

-        return out;
-    }
-    
-    void reserve(instr_trace_t* trace) {
-        assert(trace->wb);  
-        switch (trace->rdest_type) {
-        case RegType::Integer:            
-            in_use_iregs_.at(trace->wid).set(trace->rdest);
-            break;
-        case RegType::Float:
-            in_use_fregs_.at(trace->wid).set(trace->rdest);
-            break;
-        default: 
-            assert(false);
-        }      
-        uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
-        assert(owners_.count(tag) == 0);
-        owners_[tag] = trace;
-        assert((int)trace->fu_type < 5);
-    }
+		return out;
+	}
+	
+	void reserve(instr_trace_t* trace) {
+		assert(trace->wb);
+		switch (trace->rdest_type) {
+		case RegType::Integer:
+			in_use_iregs_.at(trace->wid).set(trace->rdest);
+			break;
+		case RegType::Float:
+			in_use_fregs_.at(trace->wid).set(trace->rdest);
+			break;
+		default: 
+			assert(false);
+		}
+		uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
+		assert(owners_.count(tag) == 0);
+		owners_[tag] = trace;
+		assert((int)trace->fu_type < 5);
+	}

-    void release(instr_trace_t* trace) {
-        assert(trace->wb);      
-        switch (trace->rdest_type) {
-        case RegType::Integer:
-            in_use_iregs_.at(trace->wid).reset(trace->rdest);
-            break;
-        case RegType::Float:
-            in_use_fregs_.at(trace->wid).reset(trace->rdest);
-            break;
-        default: 
-            assert(false);
-        }      
-        uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
-        owners_.erase(tag);
-    }
+	void release(instr_trace_t* trace) {
+		assert(trace->wb);
+		switch (trace->rdest_type) {
+		case RegType::Integer:
+			in_use_iregs_.at(trace->wid).reset(trace->rdest);
+			break;
+		case RegType::Float:
+			in_use_fregs_.at(trace->wid).reset(trace->rdest);
+			break;
+		default: 
+			assert(false);
+		}
+		uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
+		owners_.erase(tag);
+	}

 private:

-    std::vector<RegMask> in_use_iregs_;
-    std::vector<RegMask> in_use_fregs_;
-    std::unordered_map<uint32_t, instr_trace_t*> owners_;
+	std::vector<RegMask> in_use_iregs_;
+	std::vector<RegMask> in_use_fregs_;
+	std::unordered_map<uint32_t, instr_trace_t*> owners_;
 };

 }