Commit 3700081a by songxinkai

~

parent a33653ee
bazel-*
*.swp
log
*.cmdtrace
#!/bin/bash
bazel build --copt="-DRAMULATOR" //ramulator:main
bazel build --copt="-DRAMULATOR" //ramulator:testram
bazel build --copt="-DRAMULATOR" //ramulator:testram_break
#bazel build --copt="-DRAMULATOR" //ramulator:main
#bazel build --copt="-DRAMULATOR" //ramulator:testram
#bazel build --copt="-DRAMULATOR" //ramulator:testram_break
#bazel build --copt="-DRAMULATOR" //ramulator:read_bin_edge
bazel build --copt="-g" --copt="-O0" --copt="-fpic" --strip=never --copt="-DRAMULATOR" //ramulator:testram_break && \
bazel build --copt="-g" --copt="-O0" --copt="-fpic" --strip=never --copt="-DRAMULATOR" //ramulator:read_bin_edge
......@@ -5,10 +5,11 @@
standard = DDR4
channels = 1
ranks = 1
#speed = DDR4_3200
speed = DDR4_2400R
org = DDR4_4Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
record_cmd_trace = on
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
......
cc_binary(
name="read_bin_edge",
srcs=[
"read_bin_edge.cpp",
],
deps=[
"ram",
],
copts=[
"-O3",
"-std=c++11",
"-g",
"-Wall",
],
)
cc_binary(
name="testram_break",
srcs=[
"testram_break.cpp",
......@@ -167,6 +183,7 @@ cc_library(
":SpeedyController",
":Statistics",
":GDDR5",
":DDR4",
":HBM",
":LPDDR3",
":LPDDR4",
......
......@@ -84,7 +84,7 @@ public:
Queue readq; // queue for read requests
Queue writeq; // queue for write requests
Queue actq; // read and write requests for which activate was issued are moved to
// actq, which has higher priority than readq and writeq.
// actq, which ***has higher priority*** than readq and writeq.
// This is an optimization
// for avoiding useless activations (i.e., PRECHARGE
// after ACTIVATE w/o READ of WRITE command)
......
......@@ -248,6 +248,7 @@ template <typename T>
typename T::Command DRAM<T>::decode(typename T::Command cmd, const int* addr)
{
int child_id = addr[int(level)+1];
//if (level == T::Level::Rank) cout << "decode bank id: " << child_id << endl;
if (prereq[int(cmd)]) {
typename T::Command prereq_cmd = prereq[int(cmd)](this, cmd, child_id);
if (prereq_cmd != T::Command::MAX)
......@@ -405,6 +406,7 @@ void DRAM<T>::update_timing(typename T::Command cmd, const int* addr, long clk)
}
// Update the number of requests it serves currently
template <typename T>
void DRAM<T>::update_serving_requests(const int* addr, int delta, long clk) {
assert(id == addr[int(level)]);
......
......@@ -8,6 +8,7 @@
#include "SpeedyController.h"
#include "Statistics.h"
#include "GDDR5.h"
#include "DDR4.h"
#include "HBM.h"
#include "LPDDR3.h"
#include "LPDDR4.h"
......@@ -46,6 +47,8 @@ public:
template <class T, template<typename> class Controller = Controller >
class Memory : public MemoryBase
{
public:
double get_num_dram_cycles(){return num_dram_cycles.value();}
protected:
ScalarStat dram_capacity;
ScalarStat num_dram_cycles;
......@@ -79,6 +82,7 @@ public:
RoBaRaCoCh,
MAX,
} type = Type::RoBaRaCoCh;
//} type = Type::ChRaBaRoCo;
enum class Translation {
None,
......@@ -137,7 +141,7 @@ public:
max_address = spec->channel_width / 8;
for (unsigned int lev = 0; lev < addr_bits.size(); lev++) {
addr_bits[lev] = calc_log2(sz[lev]);
addr_bits[lev] = calc_log2(sz[lev]); // org_entry.count
max_address *= sz[lev];
}
......@@ -333,6 +337,8 @@ public:
}
if(ctrls[req.addr_vec[0]]->enqueue(req)) {
// cout << "req bank id: " << req.addr_vec[int(T::Level::Bank)]
// << ", bank group: " << req.addr_vec[int(T::Level::Bank) - 1] << endl;
// tally stats here to avoid double counting for requests that aren't enqueued
++num_incoming_requests;
if (req.type == Request::Type::READ) {
......
......@@ -216,7 +216,8 @@ public:
enum class Type {
Closed, ClosedAP, Opened, Timeout, MAX
} type = Type::Opened;
//} type = Type::Opened;
} type = Type::Closed;
int timeout = 50;
......
......@@ -4,7 +4,6 @@
template<typename T>
Ram<T>::Ram(const std::string& config_file){
clk_ = 0;
frequency_ = 1;
Config configs(config_file);
const std::string& standard = configs["standard"];
assert(standard != "" || "DRAM standard should be specified.");
......@@ -95,7 +94,7 @@ void Ram<T>::InitMemory(const Config& configs, T* spec){
// Loop: memory_->tick()
template<typename T>
void Ram<T>::WaitUntil(const double time){
long ram_time = long(std::ceil(time * frequency_));
long ram_time = long(std::ceil(time / memory_->clk_ns()));
while (clk_ < ram_time){
memory_->tick();
clk_ ++;
......@@ -105,7 +104,7 @@ void Ram<T>::WaitUntil(const double time){
// Loop: memory_->tick()
template<typename T>
void Ram<T>::WaitFor(const double time){
long ram_time = long(std::ceil(time * this->frequency_));
long ram_time = long(std::ceil(time / memory_->clk_ns()));
for (long i = 0; i < ram_time; ++i){
memory_->tick();
clk_ ++;
......@@ -126,7 +125,7 @@ bool Ram<T>::AccessCommand(const long req_addr, const Request::Type req_type, in
});
Request req(req_addr_param, req_type_param, callback, 0);
bool res = memory_->send(req);
cout << "request id " << message << ", send result: " << res << ", memory_->clk_ns(): " << memory_->clk_ns() << endl;
// cout << "request_addr: " << req_addr << ", memory_->get_num_dram_cycles(): " << memory_->get_num_dram_cycles() << endl;
memory_->tick();
clk_++;
return res;
......@@ -140,6 +139,7 @@ bool Ram<T>::AccessCommand(const long req_addr, const Request::Type req_type, st
auto clk = this->clk_;
Request req(req_addr_param, req_type_param, callback, 0);
bool res = memory_->send(req);
// cout << "request_addr: " << req_addr << ", memory_->get_num_dram_cycles(): " << memory_->get_num_dram_cycles() << endl;
memory_->tick();
clk_++;
return res;
......@@ -164,29 +164,48 @@ double Ram<T>::AccessAndWaitUntilReturn(const long req_addr, const Request::Type
template<typename T>
double Ram<T>::ConvertRamCycle2Realtime(const long cycles){
return double(cycles) / frequency_;
return double(cycles) * memory_->clk_ns();
}
template<typename T>
long Ram<T>::ConvertRealtime2RamCycle(const double realtime){
return long(double(realtime) * frequency_);
return long(double(realtime) / memory_->clk_ns());
}
template<typename T>
void Ram<T>::SetFrequency(const double r){
this->frequency_ = r;
double Ram<T>::GetClock(){
return this->clk_;
}
template<typename T>
double Ram<T>::GetClockRealtime(){
return double(this->clk_) * frequency_;
double Ram<T>::GetMemoryClock(){
return this->memory_->get_num_dram_cycles();
}
template<typename T>
long Ram<T>::GetClockRam(){
return this->clk_;
double Ram<T>::GetClockNs(){
return this->clk_ * memory_->clk_ns();
}
template<typename T>
double Ram<T>::GetClockUs(){
return GetClockNs() / 1000.;
}
template<typename T>
double Ram<T>::GetClockMs(){
return GetClockUs() / 1000.;
}
template<typename T>
double Ram<T>::GetClockS(){
return GetClockMs() / 1000.;
}
template<typename T>
void Ram<T>::ResetClock(){
this->clk_ = 0;
}
template<typename T>
void Ram<T>::PrintCtrlsQueuesSize(){
......@@ -204,12 +223,12 @@ template class Ram<ramulator::DDR4>;
template class Ram<ramulator::GDDR5>;
template class Ram<ramulator::LPDDR3>;
template class Ram<ramulator::LPDDR4>;
//template class Ram<ramulator::DSARP>;
template class Ram<ramulator::WideIO>;
//template class Ram<ramulator::WideIO2>;
template class Ram<ramulator::HBM>;
//template class Ram<ramulator::SALP>;
template class Ram<ramulator::ALDRAM>;
//template class Ram<ramulator::TLDRAM>;
template class Ram<ramulator::STTMRAM>;
template class Ram<ramulator::PCM>;
//template class Ram<ramulator::TLDRAM>;
//template class Ram<ramulator::SALP>;
//template class Ram<ramulator::WideIO2>;
//template class Ram<ramulator::DSARP>;
......@@ -43,14 +43,18 @@ public:
double AccessAndWaitUntilReturn(const long req_addr, const Request::Type req_type);
double ConvertRamCycle2Realtime(const long ram_cycle);
long ConvertRealtime2RamCycle(const double realtime);
void SetFrequency(const double r);
double GetClockRealtime(); // unit: ns
long GetClockRam(); // unit: ram cycle
double GetClock(); // unit: ram cycle
double GetMemoryClock(); // unit: ram cycle
double GetClockNs();
double GetClockUs();
double GetClockMs();
double GetClockS();
void ResetClock();
const std::shared_ptr<Memory<T, Controller> >& GetMemory(){ return memory_; }
void PrintCtrlsQueuesSize();
double clk_ns(){return memory_->clk_ns();}
protected:
void InitMemory(const Config& configs, T* spec);
double frequency_; // unit: Ghz
long clk_;
std::shared_ptr<Memory<T, Controller> > memory_;
};
#include "ram.h"
#include <iostream>
#include <string>
#include <fstream>
#include <assert.h>
#include <vector>
#include <queue>
using namespace ramulator;
using namespace std;
#define CHANNEL_NUM 6
int main(int argc, const char *argv[]){
if (argc < 3) {
printf("Usage: %s <configs-file> <bin-edges-file>\n"
"Example: %s ramulator-configs.cfg edges.bin\n",
argv[0], argv[0]);
return 0;
}
vector<Ram<DDR4>* > ram;
for (int i = 0; i < CHANNEL_NUM; ++i){
ram.push_back(new Ram<DDR4>(argv[1]));
}
const int vertex_unit_len = 16;
int request_num = 0;
int* buf = new int[4];
ifstream fedge(argv[2], ios::in|ios::binary);
long i = 0;
long total_access = 0;
while(!fedge.eof()){
fedge.read((char*)buf, 4*sizeof(int));
if (buf[2] == 0) {
continue;
}else {
++total_access;
}
if (i % 1000000 == 0){
cout << "edge_id: " << i << ", request_num: " << request_num
<< ", clk_ns: " << ram[i%CHANNEL_NUM]->GetClockNs()
<< ", clk_us: " << ram[i%CHANNEL_NUM]->GetClockUs()
<< ", clk_ms: " << ram[i%CHANNEL_NUM]->GetClockMs()
<< ", clk_s: " << ram[i%CHANNEL_NUM]->GetClockS()
<< endl;
}
i ++;
request_num ++;
long addr = 0 + buf[1] * vertex_unit_len * 4;
while (!ram[(buf[1])%CHANNEL_NUM]->AccessCommand(addr, Request::Type::READ,
[&request_num](Request& req){
request_num --;
assert(request_num > 0);
}
)){}
}
fedge.close();
while(!request_num){
for (int i = 0; i < CHANNEL_NUM; ++i){
ram[i]->WaitFor(1);
}
}
cout << "total_data: " << total_access << endl;
for (int i = 0; i < CHANNEL_NUM; ++i){
cout << "Ram[" << i << "]"
<< ", total time clk_ms: " << ram[i]->GetClockMs()
<< endl;
}
return 0;
}
/*
const int vertex_unit_len = 16;
int request_num = 0;
int row_size = 1024*128;
int* buf = new int[4];
ifstream fedge(argv[2], ios::in|ios::binary);
long i = 0;
long j = 0;
vector<queue<int> > req_q;
const int bank_num = 1; // DDR4 has 16 banks per channel
req_q.resize(bank_num);
int current_bank_id = 0;
while(!fedge.eof()){
fedge.read((char*)buf, 4*sizeof(int));
if (buf[2] == 0) {
continue;
}else {
++j;
}
long addr = 0 + buf[1] * vertex_unit_len * 4;
int bank_id = int(addr / 8192) % bank_num;
req_q[bank_id].push(buf[1]);
request_num ++;
}
fedge.close();
for (int bank_id = 0; bank_id < bank_num; ++bank_id){
cout << "bank_id: " << bank_id << ", req_num: " << req_q[bank_id].size() << endl;
}
bool finish = false;
while(!finish){
if (i % 1000000 == 0){
cout << "edge_id: " << i << ", request_num: " << request_num
<< ", clk_ns: " << ram[i%CHANNEL_NUM]->GetClockNs()
<< ", clk_us: " << ram[i%CHANNEL_NUM]->GetClockUs()
<< ", clk_ms: " << ram[i%CHANNEL_NUM]->GetClockMs()
<< ", clk_s: " << ram[i%CHANNEL_NUM]->GetClockS()
<< endl;
}
i ++;
bool all_empty = true;
for (int bank_id = 0; bank_id < bank_num; ++bank_id){
//cout << req_q[bank_id].size() << ", ";
if (req_q[bank_id].size() != 0){
all_empty = false;
}else{
continue;
}
int vertex_id = req_q[bank_id].front();
req_q[bank_id].pop();
long addr = 0 + vertex_id * vertex_unit_len * 4;
while (!ram[(vertex_id-1)%CHANNEL_NUM]->AccessCommand(addr, Request::Type::READ,
[&request_num](Request& req){
request_num --;
assert(request_num > 0);
}
)){}
}
finish = all_empty;
}
*/
......@@ -13,36 +13,29 @@ int main(int argc, const char *argv[]){
"Example: %s ramulator-configs.cfg 16\n", argv[0], argv[0]);
return 0;
}
Ram<ramulator::HBM> ram(argv[1]);
ram.SetFrequency(1.0);
ram.PrintCtrlsQueuesSize();
Ram<ramulator::DDR4> ram(argv[1]);
int addr_interval = stoi(argv[2]);
for (int i = 0; i < 32; ++i){
while (!ram.AccessCommand(long(0 + i * addr_interval), ramulator::Request::Type::READ, i)){}
//ram.WaitFor(0);
//ram.PrintCtrlsQueuesSize();
long request_num = 0;
for (int i = 0; i < 320; ++i){ // 13264
request_num ++;
double send_clk = ram.GetClockNs();
while (!ram.AccessCommand(long(0 + i * addr_interval), ramulator::Request::Type::READ,
[i, &request_num, send_clk, &ram](Request& req){
request_num --;
cout << "request: " << i << ", send: " << send_clk
<< ", recieve: " << ram.GetClockNs()
<< ", latency: " << ram.GetClockNs() - send_clk
<< endl;
}
)){}
// while (request_num){
// ram.WaitFor(1);
// }
}
ram.WaitUntil(80000);
cout << "total cycles: " << ram.GetClockRealtime() << endl;
edges = readfile(task_param.edge_file());
return 0;
}
void (){
// read edges from file
ifstream fedge(task_param.edge_aggregation_param().edge_file(), ios::in|ios::binary);
CHECK(fedge.is_open()) << task_param.edge_aggregation_param().edge_file() << "Not found.";
vector<vector<int> > edges;
int* tmp = new int[edge_num*edge_unit_len];
fedge.read((char*)tmp, edge_num*edge_unit_len*sizeof(int));
fedge.close();
edges.resize(edge_num);
for (int i = 0; i < edge_num; ++i){
for (int j = 0; j < edge_unit_len; ++j){
edges[i].push_back(tmp[edge_unit_len*i+j]);
}
while (request_num){
ram.WaitFor(1);
}
cout << "total time(ns): " << ram.GetClockNs() << endl;
cout << "clk: " << ram.GetClock() << " v.s. " << ram.GetMemoryClock() << endl;
return 0;
}
#!/bin/bash
#gdb --args \
./bazel-bin/ramulator/read_bin_edge ./configs/DDR4-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
#!/bin/bash
#./bazel-bin/ramulator/testram ./configs/DDR4-config.cfg $1
./bazel-bin/ramulator/testram_break ./configs/HBM-config.cfg $1
#./bazel-bin/ramulator/main ./configs/DDR4-config.cfg $1
./bazel-bin/ramulator/testram_break ./configs/DDR4-config.cfg $1
#./bazel-bin/ramulator/read_bin_edge ./configs/DDR4-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
#./bazel-bin/ramulator/read_bin_edge ./configs/HBM-config.cfg /home/songxinkai/bin_edges/edges.reddit.fea16.bin
#!/bin/bash
./bazel-bin/ramulator/testram_break ./configs/DDR4-config.cfg $1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment