Commit 3700081a by songxinkai

~

parent a33653ee
bazel-* bazel-*
*.swp *.swp
log log
*.cmdtrace
#!/bin/bash #!/bin/bash
bazel build --copt="-DRAMULATOR" //ramulator:main #bazel build --copt="-DRAMULATOR" //ramulator:main
bazel build --copt="-DRAMULATOR" //ramulator:testram #bazel build --copt="-DRAMULATOR" //ramulator:testram
bazel build --copt="-DRAMULATOR" //ramulator:testram_break #bazel build --copt="-DRAMULATOR" //ramulator:testram_break
#bazel build --copt="-DRAMULATOR" //ramulator:read_bin_edge
bazel build --copt="-g" --copt="-O0" --copt="-fpic" --strip=never --copt="-DRAMULATOR" //ramulator:testram_break && \
bazel build --copt="-g" --copt="-O0" --copt="-fpic" --strip=never --copt="-DRAMULATOR" //ramulator:read_bin_edge
...@@ -5,10 +5,11 @@ ...@@ -5,10 +5,11 @@
standard = DDR4 standard = DDR4
channels = 1 channels = 1
ranks = 1 ranks = 1
#speed = DDR4_3200
speed = DDR4_2400R speed = DDR4_2400R
org = DDR4_4Gb_x8 org = DDR4_4Gb_x8
# record_cmd_trace: (default is off): on, off # record_cmd_trace: (default is off): on, off
record_cmd_trace = off record_cmd_trace = on
# print_cmd_trace: (default is off): on, off # print_cmd_trace: (default is off): on, off
print_cmd_trace = off print_cmd_trace = off
......
cc_binary( cc_binary(
name="read_bin_edge",
srcs=[
"read_bin_edge.cpp",
],
deps=[
"ram",
],
copts=[
"-O3",
"-std=c++11",
"-g",
"-Wall",
],
)
cc_binary(
name="testram_break", name="testram_break",
srcs=[ srcs=[
"testram_break.cpp", "testram_break.cpp",
...@@ -167,6 +183,7 @@ cc_library( ...@@ -167,6 +183,7 @@ cc_library(
":SpeedyController", ":SpeedyController",
":Statistics", ":Statistics",
":GDDR5", ":GDDR5",
":DDR4",
":HBM", ":HBM",
":LPDDR3", ":LPDDR3",
":LPDDR4", ":LPDDR4",
......
...@@ -84,7 +84,7 @@ public: ...@@ -84,7 +84,7 @@ public:
Queue readq; // queue for read requests Queue readq; // queue for read requests
Queue writeq; // queue for write requests Queue writeq; // queue for write requests
Queue actq; // read and write requests for which activate was issued are moved to Queue actq; // read and write requests for which activate was issued are moved to
// actq, which has higher priority than readq and writeq. // actq, which ***has higher priority*** than readq and writeq.
// This is an optimization // This is an optimization
// for avoiding useless activations (i.e., PRECHARGE // for avoiding useless activations (i.e., PRECHARGE
// after ACTIVATE w/o READ of WRITE command) // after ACTIVATE w/o READ of WRITE command)
......
...@@ -248,6 +248,7 @@ template <typename T> ...@@ -248,6 +248,7 @@ template <typename T>
typename T::Command DRAM<T>::decode(typename T::Command cmd, const int* addr) typename T::Command DRAM<T>::decode(typename T::Command cmd, const int* addr)
{ {
int child_id = addr[int(level)+1]; int child_id = addr[int(level)+1];
//if (level == T::Level::Rank) cout << "decode bank id: " << child_id << endl;
if (prereq[int(cmd)]) { if (prereq[int(cmd)]) {
typename T::Command prereq_cmd = prereq[int(cmd)](this, cmd, child_id); typename T::Command prereq_cmd = prereq[int(cmd)](this, cmd, child_id);
if (prereq_cmd != T::Command::MAX) if (prereq_cmd != T::Command::MAX)
...@@ -405,6 +406,7 @@ void DRAM<T>::update_timing(typename T::Command cmd, const int* addr, long clk) ...@@ -405,6 +406,7 @@ void DRAM<T>::update_timing(typename T::Command cmd, const int* addr, long clk)
} }
// Update the number of requests it serves currently
template <typename T> template <typename T>
void DRAM<T>::update_serving_requests(const int* addr, int delta, long clk) { void DRAM<T>::update_serving_requests(const int* addr, int delta, long clk) {
assert(id == addr[int(level)]); assert(id == addr[int(level)]);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "SpeedyController.h" #include "SpeedyController.h"
#include "Statistics.h" #include "Statistics.h"
#include "GDDR5.h" #include "GDDR5.h"
#include "DDR4.h"
#include "HBM.h" #include "HBM.h"
#include "LPDDR3.h" #include "LPDDR3.h"
#include "LPDDR4.h" #include "LPDDR4.h"
...@@ -46,6 +47,8 @@ public: ...@@ -46,6 +47,8 @@ public:
template <class T, template<typename> class Controller = Controller > template <class T, template<typename> class Controller = Controller >
class Memory : public MemoryBase class Memory : public MemoryBase
{ {
public:
double get_num_dram_cycles(){return num_dram_cycles.value();}
protected: protected:
ScalarStat dram_capacity; ScalarStat dram_capacity;
ScalarStat num_dram_cycles; ScalarStat num_dram_cycles;
...@@ -79,6 +82,7 @@ public: ...@@ -79,6 +82,7 @@ public:
RoBaRaCoCh, RoBaRaCoCh,
MAX, MAX,
} type = Type::RoBaRaCoCh; } type = Type::RoBaRaCoCh;
//} type = Type::ChRaBaRoCo;
enum class Translation { enum class Translation {
None, None,
...@@ -137,7 +141,7 @@ public: ...@@ -137,7 +141,7 @@ public:
max_address = spec->channel_width / 8; max_address = spec->channel_width / 8;
for (unsigned int lev = 0; lev < addr_bits.size(); lev++) { for (unsigned int lev = 0; lev < addr_bits.size(); lev++) {
addr_bits[lev] = calc_log2(sz[lev]); addr_bits[lev] = calc_log2(sz[lev]); // org_entry.count
max_address *= sz[lev]; max_address *= sz[lev];
} }
...@@ -333,6 +337,8 @@ public: ...@@ -333,6 +337,8 @@ public:
} }
if(ctrls[req.addr_vec[0]]->enqueue(req)) { if(ctrls[req.addr_vec[0]]->enqueue(req)) {
// cout << "req bank id: " << req.addr_vec[int(T::Level::Bank)]
// << ", bank group: " << req.addr_vec[int(T::Level::Bank) - 1] << endl;
// tally stats here to avoid double counting for requests that aren't enqueued // tally stats here to avoid double counting for requests that aren't enqueued
++num_incoming_requests; ++num_incoming_requests;
if (req.type == Request::Type::READ) { if (req.type == Request::Type::READ) {
......
...@@ -216,7 +216,8 @@ public: ...@@ -216,7 +216,8 @@ public:
enum class Type { enum class Type {
Closed, ClosedAP, Opened, Timeout, MAX Closed, ClosedAP, Opened, Timeout, MAX
} type = Type::Opened; //} type = Type::Opened;
} type = Type::Closed;
int timeout = 50; int timeout = 50;
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
template<typename T> template<typename T>
Ram<T>::Ram(const std::string& config_file){ Ram<T>::Ram(const std::string& config_file){
clk_ = 0; clk_ = 0;
frequency_ = 1;
Config configs(config_file); Config configs(config_file);
const std::string& standard = configs["standard"]; const std::string& standard = configs["standard"];
assert(standard != "" || "DRAM standard should be specified."); assert(standard != "" || "DRAM standard should be specified.");
...@@ -95,7 +94,7 @@ void Ram<T>::InitMemory(const Config& configs, T* spec){ ...@@ -95,7 +94,7 @@ void Ram<T>::InitMemory(const Config& configs, T* spec){
// Loop: memory_->tick() // Loop: memory_->tick()
template<typename T> template<typename T>
void Ram<T>::WaitUntil(const double time){ void Ram<T>::WaitUntil(const double time){
long ram_time = long(std::ceil(time * frequency_)); long ram_time = long(std::ceil(time / memory_->clk_ns()));
while (clk_ < ram_time){ while (clk_ < ram_time){
memory_->tick(); memory_->tick();
clk_ ++; clk_ ++;
...@@ -105,7 +104,7 @@ void Ram<T>::WaitUntil(const double time){ ...@@ -105,7 +104,7 @@ void Ram<T>::WaitUntil(const double time){
// Loop: memory_->tick() // Loop: memory_->tick()
template<typename T> template<typename T>
void Ram<T>::WaitFor(const double time){ void Ram<T>::WaitFor(const double time){
long ram_time = long(std::ceil(time * this->frequency_)); long ram_time = long(std::ceil(time / memory_->clk_ns()));
for (long i = 0; i < ram_time; ++i){ for (long i = 0; i < ram_time; ++i){
memory_->tick(); memory_->tick();
clk_ ++; clk_ ++;
...@@ -126,7 +125,7 @@ bool Ram<T>::AccessCommand(const long req_addr, const Request::Type req_type, in ...@@ -126,7 +125,7 @@ bool Ram<T>::AccessCommand(const long req_addr, const Request::Type req_type, in
}); });
Request req(req_addr_param, req_type_param, callback, 0); Request req(req_addr_param, req_type_param, callback, 0);
bool res = memory_->send(req); bool res = memory_->send(req);
cout << "request id " << message << ", send result: " << res << ", memory_->clk_ns(): " << memory_->clk_ns() << endl; // cout << "request_addr: " << req_addr << ", memory_->get_num_dram_cycles(): " << memory_->get_num_dram_cycles() << endl;
memory_->tick(); memory_->tick();
clk_++; clk_++;
return res; return res;
...@@ -140,6 +139,7 @@ bool Ram<T>::AccessCommand(const long req_addr, const Request::Type req_type, st ...@@ -140,6 +139,7 @@ bool Ram<T>::AccessCommand(const long req_addr, const Request::Type req_type, st
auto clk = this->clk_; auto clk = this->clk_;
Request req(req_addr_param, req_type_param, callback, 0); Request req(req_addr_param, req_type_param, callback, 0);
bool res = memory_->send(req); bool res = memory_->send(req);
// cout << "request_addr: " << req_addr << ", memory_->get_num_dram_cycles(): " << memory_->get_num_dram_cycles() << endl;
memory_->tick(); memory_->tick();
clk_++; clk_++;
return res; return res;
...@@ -164,29 +164,48 @@ double Ram<T>::AccessAndWaitUntilReturn(const long req_addr, const Request::Type ...@@ -164,29 +164,48 @@ double Ram<T>::AccessAndWaitUntilReturn(const long req_addr, const Request::Type
template<typename T> template<typename T>
double Ram<T>::ConvertRamCycle2Realtime(const long cycles){ double Ram<T>::ConvertRamCycle2Realtime(const long cycles){
return double(cycles) / frequency_; return double(cycles) * memory_->clk_ns();
} }
template<typename T> template<typename T>
long Ram<T>::ConvertRealtime2RamCycle(const double realtime){ long Ram<T>::ConvertRealtime2RamCycle(const double realtime){
return long(double(realtime) * frequency_); return long(double(realtime) / memory_->clk_ns());
} }
template<typename T> template<typename T>
void Ram<T>::SetFrequency(const double r){ double Ram<T>::GetClock(){
this->frequency_ = r; return this->clk_;
} }
template<typename T> template<typename T>
double Ram<T>::GetClockRealtime(){ double Ram<T>::GetMemoryClock(){
return double(this->clk_) * frequency_; return this->memory_->get_num_dram_cycles();
} }
template<typename T> template<typename T>
long Ram<T>::GetClockRam(){ double Ram<T>::GetClockNs(){
return this->clk_; return this->clk_ * memory_->clk_ns();
} }
template<typename T>
double Ram<T>::GetClockUs(){
return GetClockNs() / 1000.;
}
template<typename T>
double Ram<T>::GetClockMs(){
return GetClockUs() / 1000.;
}
template<typename T>
double Ram<T>::GetClockS(){
return GetClockMs() / 1000.;
}
template<typename T>
void Ram<T>::ResetClock(){
this->clk_ = 0;
}
template<typename T> template<typename T>
void Ram<T>::PrintCtrlsQueuesSize(){ void Ram<T>::PrintCtrlsQueuesSize(){
...@@ -204,12 +223,12 @@ template class Ram<ramulator::DDR4>; ...@@ -204,12 +223,12 @@ template class Ram<ramulator::DDR4>;
template class Ram<ramulator::GDDR5>; template class Ram<ramulator::GDDR5>;
template class Ram<ramulator::LPDDR3>; template class Ram<ramulator::LPDDR3>;
template class Ram<ramulator::LPDDR4>; template class Ram<ramulator::LPDDR4>;
//template class Ram<ramulator::DSARP>;
template class Ram<ramulator::WideIO>; template class Ram<ramulator::WideIO>;
//template class Ram<ramulator::WideIO2>;
template class Ram<ramulator::HBM>; template class Ram<ramulator::HBM>;
//template class Ram<ramulator::SALP>;
template class Ram<ramulator::ALDRAM>; template class Ram<ramulator::ALDRAM>;
//template class Ram<ramulator::TLDRAM>;
template class Ram<ramulator::STTMRAM>; template class Ram<ramulator::STTMRAM>;
template class Ram<ramulator::PCM>; template class Ram<ramulator::PCM>;
//template class Ram<ramulator::TLDRAM>;
//template class Ram<ramulator::SALP>;
//template class Ram<ramulator::WideIO2>;
//template class Ram<ramulator::DSARP>;
...@@ -43,14 +43,18 @@ public: ...@@ -43,14 +43,18 @@ public:
double AccessAndWaitUntilReturn(const long req_addr, const Request::Type req_type); double AccessAndWaitUntilReturn(const long req_addr, const Request::Type req_type);
double ConvertRamCycle2Realtime(const long ram_cycle); double ConvertRamCycle2Realtime(const long ram_cycle);
long ConvertRealtime2RamCycle(const double realtime); long ConvertRealtime2RamCycle(const double realtime);
void SetFrequency(const double r); double GetClock(); // unit: ram cycle
double GetClockRealtime(); // unit: ns double GetMemoryClock(); // unit: ram cycle
long GetClockRam(); // unit: ram cycle double GetClockNs();
double GetClockUs();
double GetClockMs();
double GetClockS();
void ResetClock();
const std::shared_ptr<Memory<T, Controller> >& GetMemory(){ return memory_; } const std::shared_ptr<Memory<T, Controller> >& GetMemory(){ return memory_; }
void PrintCtrlsQueuesSize(); void PrintCtrlsQueuesSize();
double clk_ns(){return memory_->clk_ns();}
protected: protected:
void InitMemory(const Config& configs, T* spec); void InitMemory(const Config& configs, T* spec);
double frequency_; // unit: Ghz
long clk_; long clk_;
std::shared_ptr<Memory<T, Controller> > memory_; std::shared_ptr<Memory<T, Controller> > memory_;
}; };
#include "ram.h"
#include <iostream>
#include <string>
#include <fstream>
#include <assert.h>
#include <vector>
#include <queue>
using namespace ramulator;
using namespace std;
#define CHANNEL_NUM 6
int main(int argc, const char *argv[]){
if (argc < 3) {
printf("Usage: %s <configs-file> <bin-edges-file>\n"
"Example: %s ramulator-configs.cfg edges.bin\n",
argv[0], argv[0]);
return 0;
}
vector<Ram<DDR4>* > ram;
for (int i = 0; i < CHANNEL_NUM; ++i){
ram.push_back(new Ram<DDR4>(argv[1]));
}
const int vertex_unit_len = 16;
int request_num = 0;
int* buf = new int[4];
ifstream fedge(argv[2], ios::in|ios::binary);
long i = 0;
long total_access = 0;
while(!fedge.eof()){
fedge.read((char*)buf, 4*sizeof(int));
if (buf[2] == 0) {
continue;
}else {
++total_access;
}
if (i % 1000000 == 0){
cout << "edge_id: " << i << ", request_num: " << request_num
<< ", clk_ns: " << ram[i%CHANNEL_NUM]->GetClockNs()
<< ", clk_us: " << ram[i%CHANNEL_NUM]->GetClockUs()
<< ", clk_ms: " << ram[i%CHANNEL_NUM]->GetClockMs()
<< ", clk_s: " << ram[i%CHANNEL_NUM]->GetClockS()
<< endl;
}
i ++;
request_num ++;
long addr = 0 + buf[1] * vertex_unit_len * 4;
while (!ram[(buf[1])%CHANNEL_NUM]->AccessCommand(addr, Request::Type::READ,
[&request_num](Request& req){
request_num --;
assert(request_num > 0);
}
)){}
}
fedge.close();
while(!request_num){
for (int i = 0; i < CHANNEL_NUM; ++i){
ram[i]->WaitFor(1);
}
}
cout << "total_data: " << total_access << endl;
for (int i = 0; i < CHANNEL_NUM; ++i){
cout << "Ram[" << i << "]"
<< ", total time clk_ms: " << ram[i]->GetClockMs()
<< endl;
}
return 0;
}
/*
const int vertex_unit_len = 16;
int request_num = 0;
int row_size = 1024*128;
int* buf = new int[4];
ifstream fedge(argv[2], ios::in|ios::binary);
long i = 0;
long j = 0;
vector<queue<int> > req_q;
const int bank_num = 1; // DDR4 has 16 banks per channel
req_q.resize(bank_num);
int current_bank_id = 0;
while(!fedge.eof()){
fedge.read((char*)buf, 4*sizeof(int));
if (buf[2] == 0) {
continue;
}else {
++j;
}
long addr = 0 + buf[1] * vertex_unit_len * 4;
int bank_id = int(addr / 8192) % bank_num;
req_q[bank_id].push(buf[1]);
request_num ++;
}
fedge.close();
for (int bank_id = 0; bank_id < bank_num; ++bank_id){
cout << "bank_id: " << bank_id << ", req_num: " << req_q[bank_id].size() << endl;
}
bool finish = false;
while(!finish){
if (i % 1000000 == 0){
cout << "edge_id: " << i << ", request_num: " << request_num
<< ", clk_ns: " << ram[i%CHANNEL_NUM]->GetClockNs()
<< ", clk_us: " << ram[i%CHANNEL_NUM]->GetClockUs()
<< ", clk_ms: " << ram[i%CHANNEL_NUM]->GetClockMs()
<< ", clk_s: " << ram[i%CHANNEL_NUM]->GetClockS()
<< endl;
}
i ++;
bool all_empty = true;
for (int bank_id = 0; bank_id < bank_num; ++bank_id){
//cout << req_q[bank_id].size() << ", ";
if (req_q[bank_id].size() != 0){
all_empty = false;
}else{
continue;
}
int vertex_id = req_q[bank_id].front();
req_q[bank_id].pop();
long addr = 0 + vertex_id * vertex_unit_len * 4;
while (!ram[(vertex_id-1)%CHANNEL_NUM]->AccessCommand(addr, Request::Type::READ,
[&request_num](Request& req){
request_num --;
assert(request_num > 0);
}
)){}
}
finish = all_empty;
}
*/
...@@ -13,36 +13,29 @@ int main(int argc, const char *argv[]){ ...@@ -13,36 +13,29 @@ int main(int argc, const char *argv[]){
"Example: %s ramulator-configs.cfg 16\n", argv[0], argv[0]); "Example: %s ramulator-configs.cfg 16\n", argv[0], argv[0]);
return 0; return 0;
} }
Ram<ramulator::HBM> ram(argv[1]); Ram<ramulator::DDR4> ram(argv[1]);
ram.SetFrequency(1.0);
ram.PrintCtrlsQueuesSize();
int addr_interval = stoi(argv[2]); int addr_interval = stoi(argv[2]);
for (int i = 0; i < 32; ++i){ long request_num = 0;
while (!ram.AccessCommand(long(0 + i * addr_interval), ramulator::Request::Type::READ, i)){} for (int i = 0; i < 320; ++i){ // 13264
//ram.WaitFor(0); request_num ++;
//ram.PrintCtrlsQueuesSize(); double send_clk = ram.GetClockNs();
while (!ram.AccessCommand(long(0 + i * addr_interval), ramulator::Request::Type::READ,
[i, &request_num, send_clk, &ram](Request& req){
request_num --;
cout << "request: " << i << ", send: " << send_clk
<< ", recieve: " << ram.GetClockNs()
<< ", latency: " << ram.GetClockNs() - send_clk
<< endl;
} }
ram.WaitUntil(80000); )){}
cout << "total cycles: " << ram.GetClockRealtime() << endl; // while (request_num){
// ram.WaitFor(1);
edges = readfile(task_param.edge_file()); // }
return 0;
}
void (){
// read edges from file
ifstream fedge(task_param.edge_aggregation_param().edge_file(), ios::in|ios::binary);
CHECK(fedge.is_open()) << task_param.edge_aggregation_param().edge_file() << "Not found.";
vector<vector<int> > edges;
int* tmp = new int[edge_num*edge_unit_len];
fedge.read((char*)tmp, edge_num*edge_unit_len*sizeof(int));
fedge.close();
edges.resize(edge_num);
for (int i = 0; i < edge_num; ++i){
for (int j = 0; j < edge_unit_len; ++j){
edges[i].push_back(tmp[edge_unit_len*i+j]);
} }
while (request_num){
ram.WaitFor(1);
} }
cout << "total time(ns): " << ram.GetClockNs() << endl;
cout << "clk: " << ram.GetClock() << " v.s. " << ram.GetMemoryClock() << endl;
return 0;
} }
#!/bin/bash
#gdb --args \
./bazel-bin/ramulator/read_bin_edge ./configs/DDR4-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
#!/bin/bash #!/bin/bash
#./bazel-bin/ramulator/testram ./configs/DDR4-config.cfg $1 #./bazel-bin/ramulator/testram ./configs/DDR4-config.cfg $1
./bazel-bin/ramulator/testram_break ./configs/HBM-config.cfg $1
#./bazel-bin/ramulator/main ./configs/DDR4-config.cfg $1 #./bazel-bin/ramulator/main ./configs/DDR4-config.cfg $1
./bazel-bin/ramulator/testram_break ./configs/DDR4-config.cfg $1
#./bazel-bin/ramulator/read_bin_edge ./configs/DDR4-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
#./bazel-bin/ramulator/read_bin_edge ./configs/HBM-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
#!/bin/bash
./bazel-bin/ramulator/testram_break ./configs/DDR4-config.cfg $1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment