~

3700081a · songxinkai · a33653ee · 3700081a · 3700081a · 3700081a
Commit 3700081a authored Nov 22, 2019 by songxinkai
15 changed files
--- a/.gitignore
+++ b/.gitignore
 bazel-*
 *.swp
 log
+*.cmdtrace
--- a/build.sh
+++ b/build.sh
 #!/bin/bash
-bazel build --copt="-DRAMULATOR" //ramulator:main
-bazel build --copt="-DRAMULATOR" //ramulator:testram
-bazel build --copt="-DRAMULATOR" //ramulator:testram_break
+#bazel build --copt="-DRAMULATOR" //ramulator:main
+#bazel build --copt="-DRAMULATOR" //ramulator:testram
+#bazel build --copt="-DRAMULATOR" //ramulator:testram_break
+#bazel build --copt="-DRAMULATOR" //ramulator:read_bin_edge
+bazel build --copt="-g" --copt="-O0" --copt="-fpic" --strip=never --copt="-DRAMULATOR" //ramulator:testram_break && \
+bazel build --copt="-g" --copt="-O0" --copt="-fpic" --strip=never --copt="-DRAMULATOR" //ramulator:read_bin_edge
--- a/configs/DDR4-config.cfg
+++ b/configs/DDR4-config.cfg
@@ -5,10 +5,11 @@
 standard = DDR4
 channels = 1
 ranks = 1
+ #speed = DDR4_3200
 speed = DDR4_2400R
 org = DDR4_4Gb_x8
 # record_cmd_trace: (default is off): on, off
- record_cmd_trace = off
+ record_cmd_trace = on
 # print_cmd_trace: (default is off): on, off
 print_cmd_trace = off


--- a/ramulator/BUILD
+++ b/ramulator/BUILD
 cc_binary(
+  name="read_bin_edge",
+  srcs=[
+    "read_bin_edge.cpp",
+  ],
+  deps=[
+    "ram",
+  ],
+  copts=[
+    "-O3",
+    "-std=c++11",
+    "-g",
+    "-Wall",
+    ],
+)
+
+cc_binary(
  name="testram_break",
  srcs=[
    "testram_break.cpp",
@@ -167,6 +183,7 @@ cc_library(
    ":SpeedyController",
    ":Statistics",
    ":GDDR5",
+    ":DDR4",
    ":HBM",
    ":LPDDR3",
    ":LPDDR4",

--- a/ramulator/Controller.h
+++ b/ramulator/Controller.h
@@ -84,7 +84,7 @@ public:
    Queue readq;  // queue for read requests
    Queue writeq;  // queue for write requests
    Queue actq; // read and write requests for which activate was issued are moved to 
-                   // actq, which has higher priority than readq and writeq.
+                   // actq, which ***has higher priority*** than readq and writeq.
                   // This is an optimization
                   // for avoiding useless activations (i.e., PRECHARGE
                   // after ACTIVATE w/o READ of WRITE command)

--- a/ramulator/DRAM.h
+++ b/ramulator/DRAM.h
@@ -248,6 +248,7 @@ template <typename T>
 typename T::Command DRAM<T>::decode(typename T::Command cmd, const int* addr)
 {
    int child_id = addr[int(level)+1];
+    //if (level == T::Level::Rank) cout << "decode bank id: " << child_id << endl;
    if (prereq[int(cmd)]) {
        typename T::Command prereq_cmd = prereq[int(cmd)](this, cmd, child_id);
        if (prereq_cmd != T::Command::MAX)
@@ -405,6 +406,7 @@ void DRAM<T>::update_timing(typename T::Command cmd, const int* addr, long clk)

 }

+// Update the number of requests it serves currently
 template <typename T>
 void DRAM<T>::update_serving_requests(const int* addr, int delta, long clk) {
  assert(id == addr[int(level)]);

--- a/ramulator/Memory.h
+++ b/ramulator/Memory.h
@@ -8,6 +8,7 @@
 #include "SpeedyController.h"
 #include "Statistics.h"
 #include "GDDR5.h"
+#include "DDR4.h"
 #include "HBM.h"
 #include "LPDDR3.h"
 #include "LPDDR4.h"
@@ -46,6 +47,8 @@ public:
 template <class T, template<typename> class Controller = Controller >
 class Memory : public MemoryBase
 {
+public:
+  double get_num_dram_cycles(){return num_dram_cycles.value();}
 protected:
  ScalarStat dram_capacity;
  ScalarStat num_dram_cycles;
@@ -79,6 +82,7 @@ public:
        RoBaRaCoCh,
        MAX,
    } type = Type::RoBaRaCoCh;
+    //} type = Type::ChRaBaRoCo;

    enum class Translation {
      None,
@@ -137,7 +141,7 @@ public:
        max_address = spec->channel_width / 8;

        for (unsigned int lev = 0; lev < addr_bits.size(); lev++) {
-          addr_bits[lev] = calc_log2(sz[lev]);
+          addr_bits[lev] = calc_log2(sz[lev]); // org_entry.count
            max_address *= sz[lev];
        }

@@ -333,6 +337,8 @@ public:
        }

        if(ctrls[req.addr_vec[0]]->enqueue(req)) {
+            // cout << "req bank id: " << req.addr_vec[int(T::Level::Bank)] 
+            //     << ", bank group: " << req.addr_vec[int(T::Level::Bank) - 1] << endl;
            // tally stats here to avoid double counting for requests that aren't enqueued
            ++num_incoming_requests;
            if (req.type == Request::Type::READ) {

--- a/ramulator/Scheduler.h
+++ b/ramulator/Scheduler.h
@@ -216,7 +216,8 @@ public:

    enum class Type {
        Closed, ClosedAP, Opened, Timeout, MAX
-    } type = Type::Opened;
+    //} type = Type::Opened;
+    } type = Type::Closed;

    int timeout = 50;


--- a/ramulator/ram.cpp
+++ b/ramulator/ram.cpp
@@ -4,7 +4,6 @@
 template<typename T>
 Ram<T>::Ram(const std::string& config_file){
  clk_ = 0;
-  frequency_ = 1;
  Config configs(config_file);
  const std::string& standard = configs["standard"];
  assert(standard != "" || "DRAM standard should be specified.");
@@ -95,7 +94,7 @@ void Ram<T>::InitMemory(const Config& configs, T* spec){
  // Loop: memory_->tick()
 template<typename T>
 void Ram<T>::WaitUntil(const double time){
-  long ram_time = long(std::ceil(time * frequency_));
+  long ram_time = long(std::ceil(time / memory_->clk_ns()));
  while (clk_ < ram_time){
    memory_->tick();
    clk_ ++;
@@ -105,7 +104,7 @@ void Ram<T>::WaitUntil(const double time){
 // Loop: memory_->tick()
 template<typename T>
 void Ram<T>::WaitFor(const double time){
-  long ram_time = long(std::ceil(time * this->frequency_));
+  long ram_time = long(std::ceil(time / memory_->clk_ns()));
  for (long i = 0; i < ram_time; ++i){
    memory_->tick();
    clk_ ++;
@@ -126,7 +125,7 @@ bool Ram<T>::AccessCommand(const long req_addr, const Request::Type req_type, in
    });
  Request req(req_addr_param, req_type_param, callback, 0);
  bool res = memory_->send(req);
-  cout << "request id " << message << ", send result: " << res << ", memory_->clk_ns(): " << memory_->clk_ns() << endl;
+  // cout << "request_addr: " << req_addr << ", memory_->get_num_dram_cycles(): " << memory_->get_num_dram_cycles() << endl;
  memory_->tick();
  clk_++;
  return res;
@@ -140,6 +139,7 @@ bool Ram<T>::AccessCommand(const long req_addr, const Request::Type req_type, st
  auto clk = this->clk_;
  Request req(req_addr_param, req_type_param, callback, 0);
  bool res = memory_->send(req);
+  // cout << "request_addr: " << req_addr << ", memory_->get_num_dram_cycles(): " << memory_->get_num_dram_cycles() << endl;
  memory_->tick();
  clk_++;
  return res;
@@ -164,29 +164,48 @@ double Ram<T>::AccessAndWaitUntilReturn(const long req_addr, const Request::Type

 template<typename T>
 double Ram<T>::ConvertRamCycle2Realtime(const long cycles){
-  return double(cycles) / frequency_;
+  return double(cycles) * memory_->clk_ns();
 }

 template<typename T>
 long Ram<T>::ConvertRealtime2RamCycle(const double realtime){
-  return long(double(realtime) * frequency_);
+  return long(double(realtime) / memory_->clk_ns());
 }

 template<typename T>
-void Ram<T>::SetFrequency(const double r){
-  this->frequency_ = r;
+double Ram<T>::GetClock(){
+  return this->clk_;
 }

 template<typename T>
-double Ram<T>::GetClockRealtime(){
-  return double(this->clk_) * frequency_;
+double Ram<T>::GetMemoryClock(){
+  return this->memory_->get_num_dram_cycles();
 }

 template<typename T>
-long Ram<T>::GetClockRam(){
-  return this->clk_;
+double Ram<T>::GetClockNs(){
+  return this->clk_ * memory_->clk_ns();
 }

+template<typename T>
+double Ram<T>::GetClockUs(){
+  return GetClockNs() / 1000.;
+}
+
+template<typename T>
+double Ram<T>::GetClockMs(){
+  return GetClockUs() / 1000.;
+}
+
+template<typename T>
+double Ram<T>::GetClockS(){
+  return GetClockMs() / 1000.;
+}
+
+template<typename T>
+void Ram<T>::ResetClock(){
+  this->clk_ = 0;
+}

 template<typename T>
 void Ram<T>::PrintCtrlsQueuesSize(){
@@ -204,12 +223,12 @@ template class Ram<ramulator::DDR4>;
 template class Ram<ramulator::GDDR5>;
 template class Ram<ramulator::LPDDR3>;
 template class Ram<ramulator::LPDDR4>;
-//template class Ram<ramulator::DSARP>;
 template class Ram<ramulator::WideIO>;
-//template class Ram<ramulator::WideIO2>;
 template class Ram<ramulator::HBM>;
-//template class Ram<ramulator::SALP>;
 template class Ram<ramulator::ALDRAM>;
-//template class Ram<ramulator::TLDRAM>;
 template class Ram<ramulator::STTMRAM>;
 template class Ram<ramulator::PCM>;
+//template class Ram<ramulator::TLDRAM>;
+//template class Ram<ramulator::SALP>;
+//template class Ram<ramulator::WideIO2>;
+//template class Ram<ramulator::DSARP>;
--- a/ramulator/ram.h
+++ b/ramulator/ram.h
@@ -43,14 +43,18 @@ public:
  double AccessAndWaitUntilReturn(const long req_addr, const Request::Type req_type);
  double ConvertRamCycle2Realtime(const long ram_cycle);
  long ConvertRealtime2RamCycle(const double realtime);
-  void SetFrequency(const double r);
-  double GetClockRealtime(); // unit: ns
-  long GetClockRam(); // unit: ram cycle
+  double GetClock(); // unit: ram cycle
+  double GetMemoryClock(); // unit: ram cycle
+  double GetClockNs();
+  double GetClockUs();
+  double GetClockMs();
+  double GetClockS();
+  void ResetClock();
  const std::shared_ptr<Memory<T, Controller> >& GetMemory(){ return memory_; }
  void PrintCtrlsQueuesSize();
+  double clk_ns(){return memory_->clk_ns();}
 protected:
  void InitMemory(const Config& configs, T* spec);
-  double frequency_; // unit: Ghz
  long clk_;
  std::shared_ptr<Memory<T, Controller> > memory_;
 };
--- a/ramulator/read_bin_edge.cpp
+++ b/ramulator/read_bin_edge.cpp
+#include "ram.h"
+
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <assert.h>
+#include <vector>
+#include <queue>
+
+using namespace ramulator;
+using namespace std;
+
+#define CHANNEL_NUM 6
+
+int main(int argc, const char *argv[]){
+  if (argc < 3) {
+      printf("Usage: %s <configs-file> <bin-edges-file>\n"
+          "Example: %s ramulator-configs.cfg edges.bin\n", 
+          argv[0], argv[0]);
+      return 0;
+  }
+  vector<Ram<DDR4>* > ram;
+  for (int i = 0; i < CHANNEL_NUM; ++i){
+    ram.push_back(new Ram<DDR4>(argv[1]));
+  }
+  const int vertex_unit_len = 16;
+  int request_num = 0;
+  int* buf = new int[4];
+  ifstream fedge(argv[2], ios::in|ios::binary);
+  long i = 0;
+  long total_access = 0;
+  while(!fedge.eof()){
+    fedge.read((char*)buf, 4*sizeof(int));
+    if (buf[2] == 0) {
+      continue;
+    }else {
+      ++total_access;
+    }
+    if (i % 1000000 == 0){
+      cout << "edge_id: " << i << ", request_num: " << request_num 
+        << ", clk_ns: " << ram[i%CHANNEL_NUM]->GetClockNs() 
+        << ", clk_us: " << ram[i%CHANNEL_NUM]->GetClockUs() 
+        << ", clk_ms: " << ram[i%CHANNEL_NUM]->GetClockMs() 
+        << ", clk_s: " << ram[i%CHANNEL_NUM]->GetClockS() 
+        << endl;
+    }
+    i ++;
+    request_num ++;
+    long addr = 0 + buf[1] * vertex_unit_len * 4;
+    while (!ram[(buf[1])%CHANNEL_NUM]->AccessCommand(addr, Request::Type::READ, 
+      [&request_num](Request& req){
+        request_num --;
+        assert(request_num > 0);
+      }
+    )){}
+  }
+  fedge.close();
+  while(!request_num){
+    for (int i = 0; i < CHANNEL_NUM; ++i){
+      ram[i]->WaitFor(1);
+    }
+  }
+  cout << "total_data: " << total_access << endl;
+  for (int i = 0; i < CHANNEL_NUM; ++i){
+    cout << "Ram[" << i << "]"
+      << ", total time clk_ms: " << ram[i]->GetClockMs() 
+      << endl;
+  }
+  return 0;
+}
+
+/*
+  const int vertex_unit_len = 16;
+  int request_num = 0;
+  int row_size = 1024*128;
+  int* buf = new int[4];
+  ifstream fedge(argv[2], ios::in|ios::binary);
+  long i = 0;
+  long j = 0;
+  vector<queue<int> > req_q;
+  const int bank_num = 1; // DDR4 has 16 banks per channel
+  req_q.resize(bank_num);
+  int current_bank_id = 0;
+  while(!fedge.eof()){
+    fedge.read((char*)buf, 4*sizeof(int));
+    if (buf[2] == 0) {
+      continue;
+    }else {
+      ++j;
+    }
+    long addr = 0 + buf[1] * vertex_unit_len * 4;
+    int bank_id = int(addr / 8192) % bank_num;
+    req_q[bank_id].push(buf[1]);
+    request_num ++;
+  }
+  fedge.close();
+  for (int bank_id = 0; bank_id < bank_num; ++bank_id){
+    cout << "bank_id: " << bank_id << ", req_num: " << req_q[bank_id].size() << endl;
+  }
+  bool finish = false;
+  while(!finish){
+    if (i % 1000000 == 0){
+      cout << "edge_id: " << i << ", request_num: " << request_num 
+        << ", clk_ns: " << ram[i%CHANNEL_NUM]->GetClockNs() 
+        << ", clk_us: " << ram[i%CHANNEL_NUM]->GetClockUs() 
+        << ", clk_ms: " << ram[i%CHANNEL_NUM]->GetClockMs() 
+        << ", clk_s: " << ram[i%CHANNEL_NUM]->GetClockS() 
+        << endl;
+    }
+    i ++;
+    bool all_empty = true;
+    for (int bank_id = 0; bank_id < bank_num; ++bank_id){
+      //cout << req_q[bank_id].size() << ", ";
+      if (req_q[bank_id].size() != 0){
+        all_empty = false;
+      }else{
+        continue;
+      }
+      int vertex_id = req_q[bank_id].front();
+      req_q[bank_id].pop();
+      long addr = 0 + vertex_id * vertex_unit_len * 4;
+      while (!ram[(vertex_id-1)%CHANNEL_NUM]->AccessCommand(addr, Request::Type::READ, 
+        [&request_num](Request& req){
+          request_num --;
+          assert(request_num > 0);
+        }
+      )){}
+    }
+    finish = all_empty;
+  }
+*/
--- a/ramulator/testram_break.cpp
+++ b/ramulator/testram_break.cpp
@@ -13,36 +13,29 @@ int main(int argc, const char *argv[]){
            "Example: %s ramulator-configs.cfg 16\n", argv[0], argv[0]);
        return 0;
    }
-  Ram<ramulator::HBM> ram(argv[1]);
-  ram.SetFrequency(1.0);
-  ram.PrintCtrlsQueuesSize();
+  Ram<ramulator::DDR4> ram(argv[1]);
  int addr_interval = stoi(argv[2]);
-  for (int i = 0; i < 32; ++i){
-    while (!ram.AccessCommand(long(0 + i * addr_interval), ramulator::Request::Type::READ, i)){}
-    //ram.WaitFor(0);
-    //ram.PrintCtrlsQueuesSize();
+  long request_num = 0;
+  for (int i = 0; i < 320; ++i){ // 13264
+    request_num ++;
+    double send_clk = ram.GetClockNs();
+    while (!ram.AccessCommand(long(0 + i * addr_interval), ramulator::Request::Type::READ, 
+      [i, &request_num, send_clk, &ram](Request& req){
+        request_num --;
+        cout << "request: " << i << ", send: " << send_clk 
+             << ", recieve: " << ram.GetClockNs() 
+             << ", latency: " << ram.GetClockNs() - send_clk
+             << endl;
+      }
+    )){}
+    // while (request_num){
+    //   ram.WaitFor(1);
+    // }
  }
-  ram.WaitUntil(80000);
-  cout << "total cycles: " << ram.GetClockRealtime() << endl;
-
-edges = readfile(task_param.edge_file());
-
-  return 0;
-}
-
-
-void (){
-  // read edges from file
-  ifstream fedge(task_param.edge_aggregation_param().edge_file(), ios::in|ios::binary);
-  CHECK(fedge.is_open()) << task_param.edge_aggregation_param().edge_file() << "Not found.";
-  vector<vector<int> > edges;
-  int* tmp = new int[edge_num*edge_unit_len];
-  fedge.read((char*)tmp, edge_num*edge_unit_len*sizeof(int));
-  fedge.close();
-  edges.resize(edge_num);
-  for (int i = 0; i < edge_num; ++i){
-    for (int j = 0; j < edge_unit_len; ++j){
-      edges[i].push_back(tmp[edge_unit_len*i+j]);
-    }
+  while (request_num){
+    ram.WaitFor(1);
  }
+  cout << "total time(ns): " << ram.GetClockNs() << endl;
+  cout << "clk: " << ram.GetClock() << " v.s. " << ram.GetMemoryClock() << endl;
+  return 0;
 }
--- a/read_edges.sh
+++ b/read_edges.sh
+#!/bin/bash
+#gdb --args \
+./bazel-bin/ramulator/read_bin_edge ./configs/DDR4-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
--- a/run.sh
+++ b/run.sh
 #!/bin/bash
 #./bazel-bin/ramulator/testram ./configs/DDR4-config.cfg $1
-./bazel-bin/ramulator/testram_break ./configs/HBM-config.cfg $1
 #./bazel-bin/ramulator/main ./configs/DDR4-config.cfg $1
+./bazel-bin/ramulator/testram_break ./configs/DDR4-config.cfg $1
+#./bazel-bin/ramulator/read_bin_edge ./configs/DDR4-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
+#./bazel-bin/ramulator/read_bin_edge ./configs/HBM-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
--- a/test_ram.sh
+++ b/test_ram.sh
+#!/bin/bash
+
+./bazel-bin/ramulator/testram_break ./configs/DDR4-config.cfg $1