Commit fb993c3c by songxinkai

init ramulator bazel

parents
Copyright (c) 2015, SAFARI Research Group at Carnegie Mellon University
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
This diff is collapsed. Click to expand it.
#!/bin/bash
bazel build --copt="-DRAMULATOR" //ramulator:main
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = ALDRAM
channels = 1
ranks = 1
speed = ALDRAM_1600K
org = ALDRAM_4Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = DDR3
channels = 1
ranks = 1
speed = DDR3_1600K
org = DDR3_2Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 0
warmup_insts = 0
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = DDR4
channels = 1
ranks = 1
speed = DDR4_2400R
org = DDR4_4Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 8
mem_tick = 3
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = DSARP
subarrays = 8
channels = 1
ranks = 1
speed = DSARP_1333
org = DSARP_8Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = GDDR5
channels = 1
ranks = 1
speed = GDDR5_6000
org = GDDR5_8Gb_x16
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 2
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = HBM
channels = 8
ranks = 1
speed = HBM_1Gbps
org = HBM_4Gb
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 32
mem_tick = 5
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = LPDDR3
channels = 1
ranks = 1
speed = LPDDR3_1600
org = LPDDR3_8Gb_x16
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = LPDDR4
channels = 2
ranks = 1
speed = LPDDR4_2400
org = LPDDR4_8Gb_x16
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 8
mem_tick = 3
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = PCM
channels = 1
ranks = 1
speed = PCM_800D
org = PCM_2Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = SALP-MASA
subarrays = 8
channels = 1
ranks = 1
speed = SALP_1600K
org = SALP_4Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = STTMRAM
channels = 4
ranks = 1
speed = STT_1600_1_2
org = STTMRAM_2Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = TLDRAM
subarrays = 16
channels = 1
ranks = 1
speed = TLDRAM_1600K
org = TLDRAM_4Gb_x8
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = WideIO
channels = 4
ranks = 1
speed = WideIO_266
org = WideIO_8Gb
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 4
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translation = None, Random (default value is None)
#
########################
########################
# Example config file
# Comments start with #
# There are restrictions for valid channel/rank numbers
standard = WideIO2
channels = 8
ranks = 1
speed = WideIO2_1066
org = WideIO2_8Gb
# record_cmd_trace: (default is off): on, off
record_cmd_trace = off
# print_cmd_trace: (default is off): on, off
print_cmd_trace = off
### Below are parameters only for CPU trace
cpu_tick = 6
mem_tick = 1
### Below are parameters only for multicore mode
# When early_exit is on, all cores will be terminated when the earliest one finishes.
early_exit = on
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
# translatino = None, Random (default value is None)
#
########################
3 20734016
1 20846400
6 20734208
8 20841280 20841280
0 20734144
2 20918976 20734016
# Customizing Address Mapping
You can specify which physical address is mapped to which channel/rank/bank/row/column.
**Please note that this feature is verified for DDR3 only.** Thus, `Memory.h:124` checks the specs. You can relax the condition for others if you promise to be careful.
## Enabling custom mapping
```bash
./ramulator <config_file> --mode=cpu/dram --stats <stat_file> --mapping <mapping_file> trace0 trace1 ...
```
> --mapping option is completely optional. If not specified, Ramulator uses the default mapping, which is RoBaRaCoCh defined in `Memory.h:81`.
## Syntax of mapping file
### Commenting like `bash`
Just put `#` and the rest in that line is ignored.
### Bit Indices
Please note that index of the least significant bit is 0.
Please note that at this stage of the simulator, physical addresses do not include cacheline offset bits.
### Bit assignments
```
Ba 2 = 13 # BankAddress[2] := PhysicalAddress[13]
```
### Array assignments
```
Ba 2:0 = 5:3 # BankAddress[2:0] := PhysicalAddress[5:3]
```
### Randomization
Zhang et al. proposed an XOR based bank randomization to reduce the row buffer conflicts. [1] To enable it in Ramulator, you can specify which bits to xor and assign to where.
```
Ba 0 = 0 13 # BankAddress[0] := PhysicalAddress[0] xor PhysicalAddress[13]
Ba 1 = 1 7 15 # BankAddress[0] := PhysicalAddress[1] xor PhysicalAddress[7] xor PhysicalAddress[15]
```
### Keywords
Please use the following bigrams of each level:
- Channel: Ch
- Rank: Ra
- Bank Group: Bg
- Bank: Ba
- Subarray: Sa
- Row: Ro
- Column: Co
## Examples
Please refer to the individual files.
[1] Zhao Zhang, Zhichun Zhu, Xiaodong Zhang: [A permutation-based page interleaving scheme to reduce row-buffer conflicts and exploit data locality.](https://ieeexplore.ieee.org/document/898056/) MICRO 2000: 32-41
# Standard DDR3
# Number of bits 41
# Channel 0
# Rank 0
# Bank 3
# Row 32
# Column 6
Ba 2:0 = 2:0
Co 5:0 = 8:3
Ro 31:0 = 40:9
# Standard DDR3
# Number of bits 41
# Channel 0
# Rank 0
# Bank 3
# Row 32
# Column 6
Co 5:0 = 8:3
Ro 31:0 = 40:9
Ba 0 = 0 13
Ba 1 = 1 14
Ba 2 = 2 15
# Standard DDR3
# Number of bits 41
# Channel 0
# Rank 0
# Bank 3
# Row 32
# Column 6
Co 5:0 = 5:0
Ba 2:0 = 8:6
Ro 31:0 = 40:9
# Standard DDR3
# Number of bits 41
# Channel 0
# Rank 0
# Bank 3
# Row 32
# Column 6
Co 5:0 = 5:0
Ro 31:0 = 40:9
Ba 0 = 6 13
Ba 1 = 7 14
Ba 2 = 8 15
#!/usr/bin/python
import os
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
import numpy as np
import operator
def main():
draw_standards()
def draw_standards():
std_names = ['DDR3', 'DDR4', 'SALP', 'LPDDR3', 'LPDDR4','GDDR5', 'HBM', 'WIO', 'WIO2']
colors = ['0.9', '0.6', '0.8', '0.55', '0.7', '0.5', '0.6', '0.4', '0.5']
orig_data = []
res_cnt = 0
for fn in os.listdir('results'):
if fn[0] == '.': continue
with open('results/' + fn, 'r') as f:
orig_data.append(map(float, f.readlines()[1:]))
res_cnt += 1
orig_data = np.swapaxes(orig_data, 0, 1)
means = map(lambda a: reduce(operator.mul, a) ** (1./res_cnt), orig_data)
minmax = np.swapaxes([[m-min(a), max(a)-m] for m, a in zip(means, orig_data)], 0, 1)
pos = np.arange(9)
#fig, ax = plt.subplots(figsize=(1.8, 1.2))
fig, ax = plt.subplots(figsize=(3.5, 1.2))
plt.grid(axis='y', zorder=-3, lw=0.5, ls=':')
#ax.axhline(1.0, ls='-', lw=0.75, color='black')
yticks = [0.0, 0.5, 1.0, 1.5, 2.0]
plt.yticks(yticks, map(str, yticks), size=6)
plt.ylim(0, 2.0)
plt.xlim(-0.15, 9.15)
# ax.set_yticklabels([])
ax.set_ylabel('Gmean IPC \n (Normalized to DDR3)', size=6,
multialignment='center')
bars = ax.bar(pos+0.15, means, yerr=minmax, ecolor='red',
error_kw={'zorder':-1, 'barsabove':True},
width=0.7, color=colors, linewidth=0.5, zorder=-2)
for i, m in enumerate(means):
if m + minmax[1][i] > 2.0: # error bar outside plot
ax.text(pos[i] + 0.525, 1.85, "max=%.2f" % (m + minmax[1][i]), size=5)
for i in xrange(9):
ax.text(pos[i]+0.5, means[i]+0.1, '%.2f' % means[i],
ha='center', size=6, zorder=10)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.set_xticklabels([])
ax.tick_params('both', length=0, which='major')
ax.spines['bottom'].set_linewidth(0.5)
ax.spines['top'].set_linewidth(0.5)
ax.spines['left'].set_linewidth(0.5)
ax.spines['right'].set_linewidth(0.5)
for i in xrange(9):
ax.text(pos[i]+0.525, 0.1, std_names[i], ha='center', va='bottom',
rotation='vertical', size=6, zorder=10)
fig.savefig('std.pdf', bbox_inches='tight', pad_inches=0.05)
if __name__ == '__main__':
main()
#ifndef __ALDRAM_H
#define __ALDRAM_H
#include "DRAM.h"
#include "Request.h"
#include <vector>
#include <map>
#include <string>
#include <functional>
using namespace std;
namespace ramulator
{
class ALDRAM
{
public:
static string standard_name;
enum class Org;
enum class Speed;
enum class Temp;
ALDRAM(Org org, Speed speed);
ALDRAM(const string& org_str, const string& speed_str);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
/*** Level ***/
enum class Level : int
{
Channel, Rank, Bank, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/*** Command ***/
enum class Command : int
{
ACT, PRE, PREA,
RD, WR, RDA, WRA,
REF, PDE, PDX, SRE, SRX,
MAX
};
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PREA",
"RD", "WR", "RDA", "WRA",
"REF", "PDE", "PDX", "SRE", "SRX"
};
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PREA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PDE, Command::SRE
};
/* Prerequisite */
function<Command(DRAM<ALDRAM>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<ALDRAM>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<ALDRAM>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<ALDRAM>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{
ALDRAM_512Mb_x4, ALDRAM_512Mb_x8, ALDRAM_512Mb_x16,
ALDRAM_1Gb_x4, ALDRAM_1Gb_x8, ALDRAM_1Gb_x16,
ALDRAM_2Gb_x4, ALDRAM_2Gb_x8, ALDRAM_2Gb_x16,
ALDRAM_4Gb_x4, ALDRAM_4Gb_x8, ALDRAM_4Gb_x16,
ALDRAM_8Gb_x4, ALDRAM_8Gb_x8, ALDRAM_8Gb_x16,
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
{ 512, 4, {0, 0, 8, 1<<13, 1<<11}}, { 512, 8, {0, 0, 8, 1<<13, 1<<10}}, { 512, 16, {0, 0, 8, 1<<12, 1<<10}},
{1<<10, 4, {0, 0, 8, 1<<14, 1<<11}}, {1<<10, 8, {0, 0, 8, 1<<14, 1<<10}}, {1<<10, 16, {0, 0, 8, 1<<13, 1<<10}},
{2<<10, 4, {0, 0, 8, 1<<15, 1<<11}}, {2<<10, 8, {0, 0, 8, 1<<15, 1<<10}}, {2<<10, 16, {0, 0, 8, 1<<14, 1<<10}},
{4<<10, 4, {0, 0, 8, 1<<16, 1<<11}}, {4<<10, 8, {0, 0, 8, 1<<16, 1<<10}}, {4<<10, 16, {0, 0, 8, 1<<15, 1<<10}},
{8<<10, 4, {0, 0, 8, 1<<16, 1<<12}}, {8<<10, 8, {0, 0, 8, 1<<16, 1<<11}}, {8<<10, 16, {0, 0, 8, 1<<16, 1<<10}}
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
ALDRAM_800D, ALDRAM_800E,
ALDRAM_1066E, ALDRAM_1066F, ALDRAM_1066G,
ALDRAM_1333G, ALDRAM_1333H,
ALDRAM_1600H, ALDRAM_1600J, ALDRAM_1600K,
ALDRAM_1866K, ALDRAM_1866L,
ALDRAM_2133L, ALDRAM_2133M,
MAX
};
Speed current_speed;
enum class Temp : int
{
COLD, HOT, MAX
};
Temp temperature = Temp::HOT; // DHL
int prefetch_size = 8; // 8n prefetch DDR
int channel_width = 64;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCD, nRTRS;
int nCL, nRCD, nRP, nCWL;
int nRAS, nRC;
int nRTP, nWTR, nWR;
int nRRD, nFAW;
int nRFC, nREFI;
int nPD, nXP, nXPDLL;
int nCKESR, nXS, nXSDLL;
} speed_table[int(Temp::MAX)][int(Speed::MAX)] = {
{
{800, (400.0/3)*3, (3/0.4)/3, 4, 4, 2, 5, 5-1, 5-1, 5, 15-5, 20-6, 4, 4, 6, 0, 0, 0, 3120, 3, 3, 10, 4, 0, 512},
{800, (400.0/3)*3, (3/0.4)/3, 4, 4, 2, 6, 6-1, 6-1, 5, 15-5, 21-6, 4, 4, 6, 0, 0, 0, 3120, 3, 3, 10, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 6, 6-1, 6-1, 6, 20-5, 26-6, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 7, 7-1, 7-1, 6, 20-5, 27-6, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 8, 8-1, 8-1, 6, 20-5, 28-6, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1333, (400.0/3)*5, (3/0.4)/5, 4, 4, 2, 8, 8-1, 8-1, 7, 24-5, 32-6, 5, 5, 10, 0, 0, 0, 5200, 4, 4, 16, 5, 0, 512},
{1333, (400.0/3)*5, (3/0.4)/5, 4, 4, 2, 9, 9-1, 9-1, 7, 24-5, 33-6, 5, 5, 10, 0, 0, 0, 5200, 4, 4, 16, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 9, 9-2, 9-2, 8, 28-10, 37-12, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 10, 10-2, 10-2, 8, 28-10, 38-12, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 11, 11-2, 11-2, 8, 28-10, 39-12, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 2, 11, 11-2, 11-2, 9, 32-10, 43-12, 7, 7, 14, 0, 0, 0, 7280, 5, 6, 23, 6, 0, 512},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 2, 12, 12-2, 12-3, 9, 32-10, 44-13, 7, 7, 14, 0, 0, 0, 7280, 5, 6, 23, 6, 0, 512},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 2, 12, 12-2, 12-3, 10, 36-10, 48-13, 8, 8, 16, 0, 0, 0, 8320, 6, 7, 26, 7, 0, 512},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 2, 13, 13-2, 13-3, 10, 36-10, 49-13, 8, 8, 16, 0, 0, 0, 8320, 6, 7, 26, 7, 0, 512}
},
{
{800, (400.0/3)*3, (3/0.4)/3, 4, 4, 2, 5, 5, 5, 5, 15, 20, 4, 4, 6, 0, 0, 0, 3120, 3, 3, 10, 4, 0, 512},
{800, (400.0/3)*3, (3/0.4)/3, 4, 4, 2, 6, 6, 6, 5, 15, 21, 4, 4, 6, 0, 0, 0, 3120, 3, 3, 10, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 6, 6, 6, 6, 20, 26, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 7, 7, 7, 6, 20, 27, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 8, 8, 8, 6, 20, 28, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1333, (400.0/3)*5, (3/0.4)/5, 4, 4, 2, 8, 8, 8, 7, 24, 32, 5, 5, 10, 0, 0, 0, 5200, 4, 4, 16, 5, 0, 512},
{1333, (400.0/3)*5, (3/0.4)/5, 4, 4, 2, 9, 9, 9, 7, 24, 33, 5, 5, 10, 0, 0, 0, 5200, 4, 4, 16, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 9, 9, 9, 8, 28, 37, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 10, 10, 10, 8, 28, 38, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 11, 11, 11, 8, 28, 39, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 2, 11, 11, 11, 9, 32, 43, 7, 7, 14, 0, 0, 0, 7280, 5, 6, 23, 6, 0, 512},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 2, 12, 12, 12, 9, 32, 44, 7, 7, 14, 0, 0, 0, 7280, 5, 6, 23, 6, 0, 512},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 2, 12, 12, 12, 10, 36, 48, 8, 8, 16, 0, 0, 0, 8320, 6, 7, 26, 7, 0, 512},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 2, 13, 13, 13, 10, 36, 49, 8, 8, 16, 0, 0, 0, 8320, 6, 7, 26, 7, 0, 512}
}
}, speed_entry;
int read_latency;
void aldram_timing(Temp current_temperature);
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing(SpeedEntry speed_entry);
};
} /*namespace ramulator*/
#endif /*__ALDRAM_H*/
cc_binary(
name="main",
srcs=[
"Main.cpp",
],
deps=[
":Processor",
":Config",
":Controller",
":SpeedyController",
":Memory",
":DRAM",
":Statistics",
":Gem5Wrapper",
":DDR3",
":DDR4",
":DSARP",
":GDDR5",
":LPDDR3",
":LPDDR4",
":WideIO",
":WideIO2",
":HBM",
":SALP",
":ALDRAM",
":TLDRAM",
":STTMRAM",
":PCM",
],
copts=[
"-O3",
"-std=c++11",
"-g",
"-Wall",
],
)
cc_library(
name="Processor",
srcs=["Processor.cpp",
],
hdrs=["Processor.h",],
deps=[
":Cache",
":Config",
":Memory",
":Request",
":Statistics",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="MemoryFactory",
srcs=["MemoryFactory.cpp",
],
hdrs=["MemoryFactory.h",],
deps=[
":Config",
":Memory",
":WideIO2",
":WideIO",
":SALP",
":LPDDR4",
":HBM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="Memory",
srcs=[
],
hdrs=["Memory.h",],
deps=[
":Config",
":DRAM",
":Request",
":Controller",
":SpeedyController",
":Statistics",
":GDDR5",
":HBM",
":LPDDR3",
":LPDDR4",
":WideIO2",
":DSARP",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="Controller",
srcs=["Controller.cpp",
"Refresh.cpp",
],
hdrs=[
"Refresh.h",
"Controller.h",
"Scheduler.h",
],
deps=[
":Config",
":DRAM",
":DSARP",
":Request",
":Statistics",
":ALDRAM",
":SALP",
":TLDRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="SpeedyController",
srcs=[
],
hdrs=["SpeedyController.h",],
deps=[
":Config",
":DRAM",
":Request",
":Statistics",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="DDR3",
srcs=["DDR3.cpp",
],
hdrs=["DDR3.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="DDR4",
srcs=["DDR4.cpp",
],
hdrs=["DDR4.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="DSARP",
srcs=["DSARP.cpp",
],
hdrs=["DSARP.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="GDDR5",
srcs=["GDDR5.cpp",
],
hdrs=["GDDR5.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="LPDDR3",
srcs=["LPDDR3.cpp",
],
hdrs=["LPDDR3.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="LPDDR4",
srcs=["LPDDR4.cpp",
],
hdrs=["LPDDR4.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="WideIO",
srcs=["WideIO.cpp",
],
hdrs=["WideIO.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="WideIO2",
srcs=["WideIO2.cpp",
],
hdrs=["WideIO2.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="HBM",
srcs=["HBM.cpp",
],
hdrs=["HBM.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="SALP",
srcs=["SALP.cpp",
],
hdrs=["SALP.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="ALDRAM",
srcs=["ALDRAM.cpp",
],
hdrs=["ALDRAM.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="TLDRAM",
srcs=["TLDRAM.cpp",
],
hdrs=["TLDRAM.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="STTMRAM",
srcs=["STTMRAM.cpp",
],
hdrs=["STTMRAM.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="PCM",
srcs=["PCM.cpp",
],
hdrs=["PCM.h",],
deps=[
":Request",
":DRAM",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="Cache",
srcs=["Cache.cpp",
],
hdrs=["Cache.h",],
deps=[
":Config",
":Request",
":Statistics",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="Gem5Wrapper",
srcs=["Gem5Wrapper.cpp",
],
hdrs=["Gem5Wrapper.h",],
deps=[
":Config",
":Controller",
":MemoryFactory",
":Memory",
":DDR3",
":DDR4",
":GDDR5",
":LPDDR3",
":LPDDR4",
":WideIO",
":WideIO2",
":HBM",
":SALP",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="DRAM",
hdrs=["DRAM.h",],
deps=[
":Statistics",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="Statistics",
hdrs=["Statistics.h",],
deps=[
":StatType",
],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="Request",
hdrs=["Request.h",],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
cc_library(
name="StatType",
srcs=["StatType.cpp",
],
hdrs=["StatType.h",],
copts=[
"-O3",
"-std=c++11",
"-g",
"-Wall",
],
)
cc_library(
name="Config",
srcs=["Config.cpp",
],
hdrs=["Config.h",],
copts=[
"-g",
"-O3",
"-std=c++11",
"-Wall",
],
)
#ifndef __CACHE_H
#define __CACHE_H
#include "Config.h"
#include "Request.h"
#include "Statistics.h"
#include <algorithm>
#include <cstdio>
#include <cassert>
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <queue>
#include <list>
namespace ramulator
{
class CacheSystem;
class Cache {
protected:
ScalarStat cache_read_miss;
ScalarStat cache_write_miss;
ScalarStat cache_total_miss;
ScalarStat cache_eviction;
ScalarStat cache_read_access;
ScalarStat cache_write_access;
ScalarStat cache_total_access;
ScalarStat cache_mshr_hit;
ScalarStat cache_mshr_unavailable;
ScalarStat cache_set_unavailable;
public:
enum class Level {
L1,
L2,
L3,
MAX
} level;
std::string level_string;
struct Line {
long addr;
long tag;
bool lock; // When the lock is on, the value is not valid yet.
bool dirty;
Line(long addr, long tag):
addr(addr), tag(tag), lock(true), dirty(false) {}
Line(long addr, long tag, bool lock, bool dirty):
addr(addr), tag(tag), lock(lock), dirty(dirty) {}
};
Cache(int size, int assoc, int block_size, int mshr_entry_num,
Level level, std::shared_ptr<CacheSystem> cachesys);
void tick();
// L1, L2, L3 accumulated latencies
int latency[int(Level::MAX)] = {4, 4 + 12, 4 + 12 + 31};
int latency_each[int(Level::MAX)] = {4, 12, 31};
std::shared_ptr<CacheSystem> cachesys;
// LLC has multiple higher caches
std::vector<Cache*> higher_cache;
Cache* lower_cache;
bool send(Request req);
void concatlower(Cache* lower);
void callback(Request& req);
protected:
bool is_first_level;
bool is_last_level;
size_t size;
unsigned int assoc;
unsigned int block_num;
unsigned int index_mask;
unsigned int block_size;
unsigned int index_offset;
unsigned int tag_offset;
unsigned int mshr_entry_num;
std::vector<std::pair<long, std::list<Line>::iterator>> mshr_entries;
std::list<Request> retry_list;
std::map<int, std::list<Line> > cache_lines;
int calc_log2(int val) {
int n = 0;
while ((val >>= 1))
n ++;
return n;
}
int get_index(long addr) {
return (addr >> index_offset) & index_mask;
};
long get_tag(long addr) {
return (addr >> tag_offset);
}
// Align the address to cache line size
long align(long addr) {
return (addr & ~(block_size-1l));
}
// Evict the cache line from higher level to this level.
// Pass the dirty bit and update LRU queue.
void evictline(long addr, bool dirty);
// Invalidate the line from this level to higher levels
// The return value is a pair. The first element is invalidation
// latency, and the second is wether the value has new version
// in higher level and this level.
std::pair<long, bool> invalidate(long addr);
// Evict the victim from current set of lines.
// First do invalidation, then call evictline(L1 or L2) or send
// a write request to memory(L3) when dirty bit is on.
void evict(std::list<Line>* lines,
std::list<Line>::iterator victim);
// First test whether need eviction, if so, do eviction by
// calling evict function. Then allocate a new line and return
// the iterator points to it.
std::list<Line>::iterator allocate_line(
std::list<Line>& lines, long addr);
// Check whether the set to hold addr has space or eviction is
// needed.
bool need_eviction(const std::list<Line>& lines, long addr);
// Check whether this addr is hit and fill in the pos_ptr with
// the iterator to the hit line or lines.end()
bool is_hit(std::list<Line>& lines, long addr,
std::list<Line>::iterator* pos_ptr);
bool all_sets_locked(const std::list<Line>& lines) {
if (lines.size() < assoc) {
return false;
}
for (const auto& line : lines) {
if (!line.lock) {
return false;
}
}
return true;
}
bool check_unlock(long addr) {
auto it = cache_lines.find(get_index(addr));
if (it == cache_lines.end()) {
return true;
} else {
auto& lines = it->second;
auto line = find_if(lines.begin(), lines.end(),
[addr, this](Line l){return (l.tag == get_tag(addr));});
if (line == lines.end()) {
return true;
} else {
bool check = !line->lock;
if (!is_first_level) {
for (auto hc : higher_cache) {
if (!check) {
return check;
}
check = check && hc->check_unlock(line->addr);
}
}
return check;
}
}
}
std::vector<std::pair<long, std::list<Line>::iterator>>::iterator
hit_mshr(long addr) {
auto mshr_it =
find_if(mshr_entries.begin(), mshr_entries.end(),
[addr, this](std::pair<long, std::list<Line>::iterator>
mshr_entry) {
return (align(mshr_entry.first) == align(addr));
});
return mshr_it;
}
std::list<Line>& get_lines(long addr) {
if (cache_lines.find(get_index(addr))
== cache_lines.end()) {
cache_lines.insert(make_pair(get_index(addr),
std::list<Line>()));
}
return cache_lines[get_index(addr)];
}
};
class CacheSystem {
public:
CacheSystem(const Config& configs, std::function<bool(Request)> send_memory):
send_memory(send_memory) {
if (configs.has_core_caches()) {
first_level = Cache::Level::L1;
} else if (configs.has_l3_cache()) {
first_level = Cache::Level::L3;
} else {
last_level = Cache::Level::MAX; // no cache
}
if (configs.has_l3_cache()) {
last_level = Cache::Level::L3;
} else if (configs.has_core_caches()) {
last_level = Cache::Level::L2;
} else {
last_level = Cache::Level::MAX; // no cache
}
}
// wait_list contains miss requests with their latencies in
// cache. When this latency is met, the send_memory function
// will be called to send the request to the memory system.
std::list<std::pair<long, Request> > wait_list;
// hit_list contains hit requests with their latencies in cache.
// callback function will be called when this latency is met and
// set the instruction status to ready in processor's window.
std::list<std::pair<long, Request> > hit_list;
std::function<bool(Request)> send_memory;
long clk = 0;
void tick();
Cache::Level first_level;
Cache::Level last_level;
};
} // namespace ramulator
#endif /* __CACHE_H */
#include "Config.h"
using namespace std;
using namespace ramulator;
Config::Config(const std::string& fname) {
parse(fname);
}
void Config::parse(const string& fname)
{
ifstream file(fname);
assert(file.good() && "Bad config file");
string line;
while (getline(file, line)) {
char delim[] = " \t=";
vector<string> tokens;
while (true) {
size_t start = line.find_first_not_of(delim);
if (start == string::npos)
break;
size_t end = line.find_first_of(delim, start);
if (end == string::npos) {
tokens.push_back(line.substr(start));
break;
}
tokens.push_back(line.substr(start, end - start));
line = line.substr(end);
}
// empty line
if (!tokens.size())
continue;
// comment line
if (tokens[0][0] == '#')
continue;
// parameter line
assert(tokens.size() == 2 && "Only allow two tokens in one line");
options[tokens[0]] = tokens[1];
if (tokens[0] == "channels") {
channels = atoi(tokens[1].c_str());
} else if (tokens[0] == "ranks") {
ranks = atoi(tokens[1].c_str());
} else if (tokens[0] == "subarrays") {
subarrays = atoi(tokens[1].c_str());
} else if (tokens[0] == "cpu_tick") {
cpu_tick = atoi(tokens[1].c_str());
} else if (tokens[0] == "mem_tick") {
mem_tick = atoi(tokens[1].c_str());
} else if (tokens[0] == "expected_limit_insts") {
expected_limit_insts = atoi(tokens[1].c_str());
} else if (tokens[0] == "warmup_insts") {
warmup_insts = atoi(tokens[1].c_str());
}
}
file.close();
}
#ifndef __CONFIG_H
#define __CONFIG_H
#include <string>
#include <fstream>
#include <vector>
#include <map>
#include <iostream>
#include <cassert>
namespace ramulator
{
class Config {
private:
std::map<std::string, std::string> options;
int channels;
int ranks;
int subarrays;
int cpu_tick;
int mem_tick;
int core_num = 0;
long expected_limit_insts = 0;
long warmup_insts = 0;
public:
Config() {}
Config(const std::string& fname);
void parse(const std::string& fname);
std::string operator [] (const std::string& name) const {
if (options.find(name) != options.end()) {
return (options.find(name))->second;
} else {
return "";
}
}
bool contains(const std::string& name) const {
if (options.find(name) != options.end()) {
return true;
} else {
return false;
}
}
void add (const std::string& name, const std::string& value) {
if (!contains(name)) {
options.insert(make_pair(name, value));
} else {
printf("ramulator::Config::add options[%s] already set.\n", name.c_str());
}
}
void set_core_num(int _core_num) {core_num = _core_num;}
int get_channels() const {return channels;}
int get_subarrays() const {return subarrays;}
int get_ranks() const {return ranks;}
int get_cpu_tick() const {return cpu_tick;}
int get_mem_tick() const {return mem_tick;}
int get_core_num() const {return core_num;}
long get_expected_limit_insts() const {return expected_limit_insts;}
long get_warmup_insts() const {return warmup_insts;}
bool has_l3_cache() const {
if (options.find("cache") != options.end()) {
const std::string& cache_option = (options.find("cache"))->second;
return (cache_option == "all") || (cache_option == "L3");
} else {
return false;
}
}
bool has_core_caches() const {
if (options.find("cache") != options.end()) {
const std::string& cache_option = (options.find("cache"))->second;
return (cache_option == "all" || cache_option == "L1L2");
} else {
return false;
}
}
bool is_early_exit() const {
// the default value is true
if (options.find("early_exit") != options.end()) {
if ((options.find("early_exit"))->second == "off") {
return false;
}
return true;
}
return true;
}
bool calc_weighted_speedup() const {
return (expected_limit_insts != 0);
}
bool record_cmd_trace() const {
// the default value is false
if (options.find("record_cmd_trace") != options.end()) {
if ((options.find("record_cmd_trace"))->second == "on") {
return true;
}
return false;
}
return false;
}
bool print_cmd_trace() const {
// the default value is false
if (options.find("print_cmd_trace") != options.end()) {
if ((options.find("print_cmd_trace"))->second == "on") {
return true;
}
return false;
}
return false;
}
};
} /* namespace ramulator */
#endif /* _CONFIG_H */
#include "Controller.h"
#include "SALP.h"
#include "ALDRAM.h"
#include "TLDRAM.h"
using namespace ramulator;
namespace ramulator
{
static vector<int> get_offending_subarray(DRAM<SALP>* channel, vector<int> & addr_vec){
int sa_id = 0;
auto rank = channel->children[addr_vec[int(SALP::Level::Rank)]];
auto bank = rank->children[addr_vec[int(SALP::Level::Bank)]];
auto sa = bank->children[addr_vec[int(SALP::Level::SubArray)]];
for (auto sa_other : bank->children)
if (sa != sa_other && sa_other->state == SALP::State::Opened){
sa_id = sa_other->id;
break;
}
vector<int> offending = addr_vec;
offending[int(SALP::Level::SubArray)] = sa_id;
offending[int(SALP::Level::Row)] = -1;
return offending;
}
template <>
vector<int> Controller<SALP>::get_addr_vec(SALP::Command cmd, list<Request>::iterator req){
if (cmd == SALP::Command::PRE_OTHER)
return get_offending_subarray(channel, req->addr_vec);
else
return req->addr_vec;
}
template <>
bool Controller<SALP>::is_ready(list<Request>::iterator req){
SALP::Command cmd = get_first_cmd(req);
if (cmd == SALP::Command::PRE_OTHER){
vector<int> addr_vec = get_offending_subarray(channel, req->addr_vec);
return channel->check(cmd, addr_vec.data(), clk);
}
else return channel->check(cmd, req->addr_vec.data(), clk);
}
template <>
void Controller<ALDRAM>::update_temp(ALDRAM::Temp current_temperature){
channel->spec->aldram_timing(current_temperature);
}
template <>
void Controller<TLDRAM>::tick(){
clk++;
req_queue_length_sum += readq.size() + writeq.size();
read_req_queue_length_sum += readq.size();
write_req_queue_length_sum += writeq.size();
/*** 1. Serve completed reads ***/
if (pending.size()) {
Request& req = pending[0];
if (req.depart <= clk) {
if (req.depart - req.arrive > 1) {
read_latency_sum += req.depart - req.arrive;
channel->update_serving_requests(
req.addr_vec.data(), -1, clk);
}
req.callback(req);
pending.pop_front();
}
}
/*** 2. Should we schedule refreshes? ***/
refresh->tick_ref();
/*** 3. Should we schedule writes? ***/
if (!write_mode) {
// yes -- write queue is almost full or read queue is empty
if (writeq.size() >= int(0.8 * writeq.max) /*|| readq.size() == 0*/)
write_mode = true;
}
else {
// no -- write queue is almost empty and read queue is not empty
if (writeq.size() <= int(0.2 * writeq.max) && readq.size() != 0)
write_mode = false;
}
/*** 4. Find the best command to schedule, if any ***/
Queue* queue = !write_mode ? &readq : &writeq;
if (otherq.size())
queue = &otherq; // "other" requests are rare, so we give them precedence over reads/writes
auto req = scheduler->get_head(queue->q);
if (req == queue->q.end() || !is_ready(req)) {
// we couldn't find a command to schedule -- let's try to be speculative
auto cmd = TLDRAM::Command::PRE;
vector<int> victim = rowpolicy->get_victim(cmd);
if (!victim.empty()){
issue_cmd(cmd, victim);
}
return; // nothing more to be done this cycle
}
if (req->is_first_command) {
int coreid = req->coreid;
req->is_first_command = false;
if (req->type == Request::Type::READ || req->type == Request::Type::WRITE) {
channel->update_serving_requests(req->addr_vec.data(), 1, clk);
}
int tx = (channel->spec->prefetch_size * channel->spec->channel_width / 8);
if (req->type == Request::Type::READ) {
if (is_row_hit(req)) {
++read_row_hits[coreid];
++row_hits;
} else if (is_row_open(req)) {
++read_row_conflicts[coreid];
++row_conflicts;
} else {
++read_row_misses[coreid];
++row_misses;
}
read_transaction_bytes += tx;
} else if (req->type == Request::Type::WRITE) {
if (is_row_hit(req)) {
++write_row_hits[coreid];
++row_hits;
} else if (is_row_open(req)) {
++write_row_conflicts[coreid];
++row_conflicts;
} else {
++write_row_misses[coreid];
++row_misses;
}
write_transaction_bytes += tx;
}
}
/*** 5. Change a read request to a migration request ***/
if (req->type == Request::Type::READ) {
req->type = Request::Type::EXTENSION;
}
// issue command on behalf of request
auto cmd = get_first_cmd(req);
issue_cmd(cmd, get_addr_vec(cmd, req));
// check whether this is the last command (which finishes the request)
if (cmd != channel->spec->translate[int(req->type)])
return;
// set a future completion time for read requests
if (req->type == Request::Type::READ || req->type == Request::Type::EXTENSION) {
req->depart = clk + channel->spec->read_latency;
pending.push_back(*req);
}
if (req->type == Request::Type::WRITE) {
channel->update_serving_requests(req->addr_vec.data(), -1, clk);
}
// remove request from queue
queue->q.erase(req);
}
template<>
void Controller<TLDRAM>::cmd_issue_autoprecharge(typename TLDRAM::Command& cmd,
const vector<int>& addr_vec) {
//TLDRAM currently does not have autoprecharge commands
return;
}
} /* namespace ramulator */
#ifndef __DDR3_H
#define __DDR3_H
#include "DRAM.h"
#include "Request.h"
#include <vector>
#include <map>
#include <string>
#include <functional>
using namespace std;
namespace ramulator
{
class DDR3
{
public:
static string standard_name;
enum class Org;
enum class Speed;
DDR3(Org org, Speed speed);
DDR3(const string& org_str, const string& speed_str);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
/*** Level ***/
enum class Level : int
{
Channel, Rank, Bank, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/*** Command ***/
enum class Command : int
{
ACT, PRE, PREA,
RD, WR, RDA, WRA,
REF, PDE, PDX, SRE, SRX,
MAX
};
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PREA",
"RD", "WR", "RDA", "WRA",
"REF", "PDE", "PDX", "SRE", "SRX"
};
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PREA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PDE, Command::SRE
};
/* Prerequisite */
function<Command(DRAM<DDR3>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<DDR3>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<DDR3>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<DDR3>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{
DDR3_512Mb_x4, DDR3_512Mb_x8, DDR3_512Mb_x16,
DDR3_1Gb_x4, DDR3_1Gb_x8, DDR3_1Gb_x16,
DDR3_2Gb_x4, DDR3_2Gb_x8, DDR3_2Gb_x16,
DDR3_4Gb_x4, DDR3_4Gb_x8, DDR3_4Gb_x16,
DDR3_8Gb_x4, DDR3_8Gb_x8, DDR3_8Gb_x16,
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
{ 512, 4, {0, 0, 8, 1<<13, 1<<11}}, { 512, 8, {0, 0, 8, 1<<13, 1<<10}}, { 512, 16, {0, 0, 8, 1<<12, 1<<10}},
{1<<10, 4, {0, 0, 8, 1<<14, 1<<11}}, {1<<10, 8, {0, 0, 8, 1<<14, 1<<10}}, {1<<10, 16, {0, 0, 8, 1<<13, 1<<10}},
{2<<10, 4, {0, 0, 8, 1<<15, 1<<11}}, {2<<10, 8, {0, 0, 8, 1<<15, 1<<10}}, {2<<10, 16, {0, 0, 8, 1<<14, 1<<10}},
{4<<10, 4, {0, 0, 8, 1<<16, 1<<11}}, {4<<10, 8, {0, 0, 8, 1<<16, 1<<10}}, {4<<10, 16, {0, 0, 8, 1<<15, 1<<10}},
{8<<10, 4, {0, 0, 8, 1<<16, 1<<12}}, {8<<10, 8, {0, 0, 8, 1<<16, 1<<11}}, {8<<10, 16, {0, 0, 8, 1<<16, 1<<10}}
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
DDR3_800D, DDR3_800E,
DDR3_1066E, DDR3_1066F, DDR3_1066G,
DDR3_1333G, DDR3_1333H,
DDR3_1600H, DDR3_1600J, DDR3_1600K,
DDR3_1866K, DDR3_1866L,
DDR3_2133L, DDR3_2133M,
MAX
};
int prefetch_size = 8; // 8n prefetch DDR
int channel_width = 64;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCD, nRTRS;
int nCL, nRCD, nRP, nCWL;
int nRAS, nRC;
int nRTP, nWTR, nWR;
int nRRD, nFAW;
int nRFC, nREFI;
int nPD, nXP, nXPDLL;
int nCKESR, nXS, nXSDLL;
} speed_table[int(Speed::MAX)] = {
{800, (400.0/3)*3, (3/0.4)/3, 4, 4, 2, 5, 5, 5, 5, 15, 20, 4, 4, 6, 0, 0, 0, 3120, 3, 3, 10, 4, 0, 512},
{800, (400.0/3)*3, (3/0.4)/3, 4, 4, 2, 6, 6, 6, 5, 15, 21, 4, 4, 6, 0, 0, 0, 3120, 3, 3, 10, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 6, 6, 6, 6, 20, 26, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 7, 7, 7, 6, 20, 27, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1066, (400.0/3)*4, (3/0.4)/4, 4, 4, 2, 8, 8, 8, 6, 20, 28, 4, 4, 8, 0, 0, 0, 4160, 3, 4, 13, 4, 0, 512},
{1333, (400.0/3)*5, (3/0.4)/5, 4, 4, 2, 8, 8, 8, 7, 24, 32, 5, 5, 10, 0, 0, 0, 5200, 4, 4, 16, 5, 0, 512},
{1333, (400.0/3)*5, (3/0.4)/5, 4, 4, 2, 9, 9, 9, 7, 24, 33, 5, 5, 10, 0, 0, 0, 5200, 4, 4, 16, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 9, 9, 9, 8, 28, 37, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 10, 10, 10, 8, 28, 38, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 11, 11, 11, 8, 28, 39, 6, 6, 12, 0, 0, 0, 6240, 4, 5, 20, 5, 0, 512},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 2, 11, 11, 11, 9, 32, 43, 7, 7, 14, 0, 0, 0, 7280, 5, 6, 23, 6, 0, 512},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 2, 12, 12, 12, 9, 32, 44, 7, 7, 14, 0, 0, 0, 7280, 5, 6, 23, 6, 0, 512},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 2, 12, 12, 12, 10, 36, 48, 8, 8, 16, 0, 0, 0, 8320, 6, 7, 26, 7, 0, 512},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 2, 13, 13, 13, 10, 36, 49, 8, 8, 16, 0, 0, 0, 8320, 6, 7, 26, 7, 0, 512}
}, speed_entry;
int read_latency;
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing();
};
} /*namespace ramulator*/
#endif /*__DDR3_H*/
#ifndef __DDR4_H
#define __DDR4_H
#include "DRAM.h"
#include "Request.h"
#include <vector>
#include <functional>
using namespace std;
namespace ramulator
{
class DDR4
{
public:
static string standard_name;
enum class Org;
enum class Speed;
DDR4(Org org, Speed speed);
DDR4(const string& org_str, const string& speed_str);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
/* Level */
enum class Level : int
{
Channel, Rank, BankGroup, Bank, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/* Command */
enum class Command : int
{
ACT, PRE, PREA,
RD, WR, RDA, WRA,
REF, PDE, PDX, SRE, SRX,
MAX
};
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PREA",
"RD", "WR", "RDA", "WRA",
"REF", "PDE", "PDX", "SRE", "SRX"
};
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PREA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::MAX, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PDE, Command::SRE
};
/* Prereq */
function<Command(DRAM<DDR4>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<DDR4>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<DDR4>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<DDR4>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{
DDR4_2Gb_x4, DDR4_2Gb_x8, DDR4_2Gb_x16,
DDR4_4Gb_x4, DDR4_4Gb_x8, DDR4_4Gb_x16,
DDR4_8Gb_x4, DDR4_8Gb_x8, DDR4_8Gb_x16,
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
{2<<10, 4, {0, 0, 4, 4, 1<<15, 1<<10}}, {2<<10, 8, {0, 0, 4, 4, 1<<14, 1<<10}}, {2<<10, 16, {0, 0, 2, 4, 1<<14, 1<<10}},
{4<<10, 4, {0, 0, 4, 4, 1<<16, 1<<10}}, {4<<10, 8, {0, 0, 4, 4, 1<<15, 1<<10}}, {4<<10, 16, {0, 0, 2, 4, 1<<15, 1<<10}},
{8<<10, 4, {0, 0, 4, 4, 1<<17, 1<<10}}, {8<<10, 8, {0, 0, 4, 4, 1<<16, 1<<10}}, {8<<10, 16, {0, 0, 2, 4, 1<<16, 1<<10}}
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
DDR4_1600K, DDR4_1600L,
DDR4_1866M, DDR4_1866N,
DDR4_2133P, DDR4_2133R,
DDR4_2400R, DDR4_2400U,
DDR4_3200,
MAX
};
enum class RefreshMode : int
{
Refresh_1X,
Refresh_2X,
Refresh_4X,
MAX
} refresh_mode = RefreshMode::Refresh_1X;
int prefetch_size = 8; // 8n prefetch DDR
int channel_width = 64;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCDS, nCCDL, nRTRS;
int nCL, nRCD, nRP, nCWL;
int nRAS, nRC;
int nRTP, nWTRS, nWTRL, nWR;
int nRRDS, nRRDL, nFAW;
int nRFC, nREFI;
int nPD, nXP, nXPDLL; // XPDLL not found in DDR4??
int nCKESR, nXS, nXSDLL; // nXSDLL TBD (nDLLK), nXS = (tRFC+10ns)/tCK
} speed_table[int(Speed::MAX)] = {
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 5, 2, 11, 11, 11, 9, 28, 39, 6, 2, 6, 12, 0, 0, 0, 0, 0, 4, 5, 0, 5, 0, 0},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 5, 2, 12, 12, 12, 9, 28, 40, 6, 2, 6, 12, 0, 0, 0, 0, 0, 4, 5, 0, 5, 0, 0},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 5, 2, 13, 13, 13, 10, 32, 45, 7, 3, 7, 14, 0, 0, 0, 0, 0, 5, 6, 0, 6, 0, 0},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 5, 2, 14, 14, 14, 10, 32, 46, 7, 3, 7, 14, 0, 0, 0, 0, 0, 5, 6, 0, 6, 0, 0},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 6, 2, 15, 15, 15, 11, 36, 51, 8, 3, 8, 16, 0, 0, 0, 0, 0, 6, 7, 0, 7, 0, 0},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 6, 2, 16, 16, 16, 11, 36, 52, 8, 3, 8, 16, 0, 0, 0, 0, 0, 6, 7, 0, 7, 0, 0},
{2400, (400.0/3)*9, (3/0.4)/9, 4, 4, 6, 2, 16, 16, 16, 12, 39, 55, 9, 3, 9, 18, 0, 0, 0, 0, 0, 6, 8, 0, 7, 0, 0},
{2400, (400.0/3)*9, (3/0.4)/9, 4, 4, 6, 2, 18, 18, 18, 12, 39, 57, 9, 3, 9, 18, 0, 0, 0, 0, 0, 6, 8, 0, 7, 0, 0},
{3200, 1600, 0.625, prefetch_size/2/*DDR*/, 4, 10, 2, 22, 22, 22, 16, 56, 78, 12, 4, 12, 24, 8, 10, 40, 0, 0, 8, 10, 0, 8, 0, 0}
//rate, freq, tCK, nBL, nCCDS nCCDL nRTRS nCL nRCD nRP nCWL nRAS nRC nRTP nWTRS nWTRL nWR nRRDS nRRDL nFAW nRFC nREFI nPD nXP nXPDLL nCKESR nXS nXSDLL
}, speed_entry;
int read_latency;
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing();
};
} /*namespace ramulator*/
#endif /*__DDR4_H*/
/*
* DSARP.h
*
* This a re-implementation of the refresh mechanisms proposed in Chang et al.,
* "Improving DRAM Performance by Parallelizing Refreshes with Accesses", HPCA
* 2014.
*
* Note: the re-implementation of DSARP has not been widely tested across
* different benchmarks and parameters. However, timing violations of
* SARP/DSARP have been checked.
*
* Usage: The "type" determines the refresh mechanisms.
* Examples:
* DSARP::Org test_org = DSARP::Org::DSARP_8Gb_x8;
*
* DSARP* dsddr3_ab = new DSARP(test_org,
* DSARP::Speed::DSARP_1333, DSARP::Type::REFAB, 8);
*
* DSARP* dsddr3_pb = new DSARP(test_org,
* DSARP::Speed::DSARP_1333, DSARP::Type::REFPB, 8);
*
* DSARP* dsddr3_darp = new DSARP(test_org,
* DSARP::Speed::DSARP_1333, DSARP::Type::DARP, 8);
*
* DSARP* dsddr3_sarp = new DSARP(test_org,
* DSARP::Speed::DSARP_1333, DSARP::Type::SARP, 8);
*
* DSARP* dsddr3_dsarp = new DSARP(test_org,
* DSARP::Speed::DSARP_1333, DSARP::Type::DSARP, 8);
*
* Created on: Mar 16, 2015
* Author: kevincha
*/
#ifndef DSARP_H_
#define DSARP_H_
#include <vector>
#include <functional>
#include "DRAM.h"
#include "Request.h"
using namespace std;
namespace ramulator
{
class DSARP
{
public:
static string standard_name;
enum class Org;
enum class Speed;
enum class Type;
DSARP(Org org, Speed speed, Type type, int n_sa);
DSARP(const string& org_str, const string& speed_str, Type type, int n_sa);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
enum class Type : int
{
REFAB, REFPB, DARP, SARP, DSARP, MAX
} type;
/* Level */
// NOTE: Although there's subarray, there's no SALP at all. This is used
// for parallelizing REF and demand accesses.
enum class Level : int
{
Channel, Rank, Bank, SubArray, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/* Command */
enum class Command : int
{
ACT, PRE, PREA,
RD, WR, RDA, WRA,
REF, REFPB, PDE, PDX, SRE, SRX,
MAX
};
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PREA",
"RD", "WR", "RDA", "WRA",
"REF", "REFPB",
"PDE", "PDX", "SRE", "SRX"
};
// SubArray scope for REFPB to propagate the timings
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Bank,
Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PREA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
case int(Command::REFPB):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PDE, Command::SRE
};
/* Prerequisite */
function<Command(DRAM<DSARP>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<DSARP>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<DSARP>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<DSARP>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{
// These are the configurations used in the original paper, essentially DDR3
DSARP_8Gb_x8,
DSARP_16Gb_x8,
DSARP_32Gb_x8,
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
// IMPORTANT: Do not change the count for channel/rank, where is set to
// 0 now. 0 means that this a flexible configuration that is not part
// of the spec, but rather something to change at a higher level
// (main.cpp).
{8<<10, 8, {0, 0, 8, 0, 1<<16, 1<<11}},
{16<<10, 8, {0, 0, 8, 0, 1<<17, 1<<11}},
{32<<10, 8, {0, 0, 8, 0, 1<<18, 1<<11}},
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
DSARP_1333,
MAX
};
enum class RefreshMode : int
{
Refresh_1X,
MAX
} refresh_mode = RefreshMode::Refresh_1X;
int prefetch_size = 16; // 16n prefetch DDR
int channel_width = 32;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCD, nRTRS;
int nCL, nRCD, nRPpb, nRPab, nCWL;
int nRAS, nRC;
int nRTP, nWTR, nWR;
int nRRD, nFAW;
int nRFCab, nRFCpb, nREFI, nREFIpb;
int nPD, nXP, nXPDLL;
int nCKESR, nXS, nXSDLL;
//int nCKE, nXP; // CKE value n/a
//int nSR, nXSR; // tXSR = tRFCab + 7.5ns
} speed_table[int(Speed::MAX)] = {
{1333,
(400.0/3)*5, (3/0.4)/5,
4, 4, 2,
9, 9, 8, 9, 7,
24, 33,
5, 5, 10,
5, 30,
0, 0, 0, 0, // set in DSARP.cpp
4, 4, 16,
5, 114, 512},
}, speed_entry;
int read_latency;
// Number of subarrays -- mainly for SARP. Doesn't affect others.
int n_sa;
// Refresh rank?
bool b_ref_rank;
// Increase RRD b/w REF and ACT when they go to the same bank (SARP)
double nRRD_factor = 1.138;
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing();
};
} /*namespace ramulator*/
#endif /* DSARP_H_ */
#ifndef __GDDR5_H
#define __GDDR5_H
#include "DRAM.h"
#include "Request.h"
#include <vector>
#include <functional>
using namespace std;
namespace ramulator
{
class GDDR5
{
public:
static string standard_name;
enum class Org;
enum class Speed;
GDDR5(Org org, Speed speed);
GDDR5(const string& org_str, const string& speed_str);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
/*** Level ***/
enum class Level : int
{
Channel, Rank, BankGroup, Bank, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/*** Command ***/
enum class Command : int
{
ACT, PRE, PREA,
RD, WR, RDA, WRA,
REF, PDE, PDX, SRE, SRX,
MAX
};
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PREA",
"RD", "WR", "RDA", "WRA",
"REF", "PDE", "PDX", "SRE", "SRX"
};
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PREA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::MAX, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PDE, Command::SRE
};
/* Prerequisite */
function<Command(DRAM<GDDR5>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<GDDR5>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<GDDR5>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<GDDR5>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{
GDDR5_512Mb_x16, GDDR5_512Mb_x32,
GDDR5_1Gb_x16, GDDR5_1Gb_x32,
GDDR5_2Gb_x16, GDDR5_2Gb_x32,
GDDR5_4Gb_x16, GDDR5_4Gb_x32,
GDDR5_8Gb_x16, GDDR5_8Gb_x32,
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
// fixed to have 1 rank
// in GDDR5 the column address is unique for a burst. e.g. 64 column addresses correspond with
// 256 column addresses actually. So we multiply 8 to the original address bit number in JEDEC standard
{ 512, 16, {0, 1, 4, 2, 1<<12, 1<<(7+3)}}, { 512, 32, {0, 1, 4, 2, 1<<12, 1<<(6+3)}},
{1<<10, 16, {0, 1, 4, 4, 1<<12, 1<<(7+3)}}, {1<<10, 32, {0, 1, 4, 4, 1<<12, 1<<(6+3)}},
{2<<10, 16, {0, 1, 4, 4, 1<<13, 1<<(7+3)}}, {2<<10, 32, {0, 1, 4, 4, 1<<13, 1<<(6+3)}},
{4<<10, 16, {0, 1, 4, 4, 1<<14, 1<<(7+3)}}, {2<<10, 32, {0, 1, 4, 4, 1<<14, 1<<(6+3)}},
{8<<10, 16, {0, 1, 4, 4, 1<<14, 1<<(8+3)}}, {8<<10, 32, {0, 1, 4, 4, 1<<14, 1<<(7+3)}}
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
GDDR5_4000, GDDR5_4500,
GDDR5_5000, GDDR5_5500,
GDDR5_6000, GDDR5_6500,
GDDR5_7000,
MAX
};
int prefetch_size = 8; // 8n prefetch QDR
int channel_width = 64;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCDS, nCCDL;
int nCL, nRCDR, nRCDW, nRP, nCWL;
int nRAS, nRC;
int nPPD, nRTP, nWTR, nWR;
int nRRD, nFAW, n32AW;
int nRFC, nREFI;
int nPD, nXPN, nLK;
int nCKESR, nXS, nXSDLL;
} speed_table[int(Speed::MAX)] = {
{4000, 8*500/4, 8.0/8, 2, 2, 3, 12, 12, 10, 12, 3, 28, 40, 1, 2, 5, 12, 6, 23, 184, 0, 0, 10, 10, 0, 0, 0, 0},
{4500, 9*500/4, 8.0/9, 2, 2, 3, 14, 14, 12, 14, 4, 32, 46, 2, 2, 6, 14, 7, 26, 207, 0, 0, 10, 10, 0, 0, 0, 0},
{5000, 10*500/4, 8.0/10, 2, 2, 3, 15, 15, 13, 15, 4, 35, 50, 2, 2, 7, 15, 7, 29, 230, 0, 0, 10, 10, 0, 0, 0, 0},
{5500, 11*500/4, 8.0/11, 2, 2, 3, 17, 17, 14, 17, 5, 39, 56, 2, 2, 7, 17, 8, 32, 253, 0, 0, 10, 10, 0, 0, 0, 0},
{6000, 12*500/4, 8.0/12, 2, 2, 3, 18, 18, 15, 18, 5, 42, 60, 2, 2, 8, 18, 9, 35, 276, 0, 0, 10, 10, 0, 0, 0, 0},
{6500, 13*500/4, 8.0/13, 2, 2, 3, 20, 20, 17, 20, 5, 46, 66, 2, 2, 9, 20, 9, 38, 299, 0, 0, 10, 10, 0, 0, 0, 0},
{7000, 14*500/4, 8.0/14, 2, 2, 3, 21, 21, 18, 21, 6, 49, 70, 2, 2, 9, 21, 10, 41, 322, 0, 0, 10, 10, 0, 0, 0, 0}
}, speed_entry;
int read_latency;
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing();
};
} /*namespace ramulator*/
#endif /*__GDDR5_H*/
#include <map>
#include "Gem5Wrapper.h"
#include "Config.h"
#include "Request.h"
#include "MemoryFactory.h"
#include "Memory.h"
#include "DDR3.h"
#include "DDR4.h"
#include "LPDDR3.h"
#include "LPDDR4.h"
#include "GDDR5.h"
#include "WideIO.h"
#include "WideIO2.h"
#include "HBM.h"
#include "SALP.h"
using namespace ramulator;
static map<string, function<MemoryBase *(const Config&, int)> > name_to_func = {
{"DDR3", &MemoryFactory<DDR3>::create}, {"DDR4", &MemoryFactory<DDR4>::create},
{"LPDDR3", &MemoryFactory<LPDDR3>::create}, {"LPDDR4", &MemoryFactory<LPDDR4>::create},
{"GDDR5", &MemoryFactory<GDDR5>::create},
{"WideIO", &MemoryFactory<WideIO>::create}, {"WideIO2", &MemoryFactory<WideIO2>::create},
{"HBM", &MemoryFactory<HBM>::create},
{"SALP-1", &MemoryFactory<SALP>::create}, {"SALP-2", &MemoryFactory<SALP>::create}, {"SALP-MASA", &MemoryFactory<SALP>::create},
};
Gem5Wrapper::Gem5Wrapper(const Config& configs, int cacheline)
{
const string& std_name = configs["standard"];
assert(name_to_func.find(std_name) != name_to_func.end() && "unrecognized standard name");
mem = name_to_func[std_name](configs, cacheline);
tCK = mem->clk_ns();
}
Gem5Wrapper::~Gem5Wrapper() {
delete mem;
}
void Gem5Wrapper::tick()
{
mem->tick();
}
bool Gem5Wrapper::send(Request req)
{
return mem->send(req);
}
void Gem5Wrapper::finish(void) {
mem->finish();
}
#ifndef __GEM5_WRAPPER_H
#define __GEM5_WRAPPER_H
#include <string>
#include "Config.h"
using namespace std;
namespace ramulator
{
class Request;
class MemoryBase;
class Gem5Wrapper
{
private:
MemoryBase *mem;
public:
double tCK;
Gem5Wrapper(const Config& configs, int cacheline);
~Gem5Wrapper();
void tick();
bool send(Request req);
void finish(void);
};
} /*namespace ramulator*/
#endif /*__GEM5_WRAPPER_H*/
#ifndef __HBM_H
#define __HBM_H
#include "DRAM.h"
#include "Request.h"
#include <vector>
#include <functional>
using namespace std;
namespace ramulator
{
class HBM
{
public:
static string standard_name;
enum class Org;
enum class Speed;
HBM(Org org, Speed speed);
HBM(const string& org_str, const string& speed_str);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
/* Level */
enum class Level : int
{
Channel, Rank, BankGroup, Bank, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/* Command */
enum class Command : int
{
ACT, PRE, PREA,
RD, WR, RDA, WRA,
REF, REFSB, PDE, PDX, SRE, SRX,
MAX
};
// REFSB and REF is not compatible, choose one or the other.
// REFSB can be issued to banks in any order, as long as REFI1B
// is satisfied for all banks
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PREA",
"RD", "WR", "RDA", "WRA",
"REF", "REFSB", "PDE", "PDX", "SRE", "SRX"
};
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Bank, Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PREA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
case int(Command::REFSB):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::MAX, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PDE, Command::SRE
};
/* Prereq */
function<Command(DRAM<HBM>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<HBM>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<HBM>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<HBM>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{ // per channel density here. Each stack comes with 8 channels
HBM_1Gb,
HBM_2Gb,
HBM_4Gb,
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
{1<<10, 128, {0, 0, 4, 2, 1<<13, 1<<(6+1)}},
{2<<10, 128, {0, 0, 4, 2, 1<<14, 1<<(6+1)}},
{4<<10, 128, {0, 0, 4, 4, 1<<14, 1<<(6+1)}},
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
HBM_1Gbps,
MAX
};
int prefetch_size = 4; // burst length could be 2 and 4 (choose 4 here), 2n prefetch
int channel_width = 128;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCDS, nCCDL;
int nCL, nRCDR, nRCDW, nRP, nCWL;
int nRAS, nRC;
int nRTP, nWTRS, nWTRL, nWR;
int nRRDS, nRRDL, nFAW;
int nRFC, nREFI, nREFI1B;
int nPD, nXP;
int nCKESR, nXS;
} speed_table[int(Speed::MAX)] = {
{1000, 500, 2.0, 2, 2, 3, 7, 7, 6, 7, 4, 17, 24, 7, 2, 4, 8, 4, 5, 20, 0, 1950, 0, 5, 5, 5, 0}
}, speed_entry;
int read_latency;
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing();
};
} /*namespace ramulator*/
#endif /*__HBM_H*/
#ifndef __LPDDR3_H
#define __LPDDR3_H
#include "DRAM.h"
#include "Request.h"
#include <vector>
#include <functional>
using namespace std;
namespace ramulator
{
class LPDDR3
{
public:
static string standard_name;
enum class Org;
enum class Speed;
LPDDR3(Org org, Speed speed);
LPDDR3(const string& org_str, const string& speed_str);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
/* Level */
enum class Level : int
{
Channel, Rank, Bank, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/* Command */
enum class Command : int
{
ACT, PRE, PRA,
RD, WR, RDA, WRA,
REF, REFPB, PD, PDX, SREF, SREFX,
MAX
};
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PRA",
"RD", "WR", "RDA", "WRA",
"REF", "REFPB", "PD", "PDX", "SREF", "SREFX"
};
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Bank, Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PRA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
case int(Command::REFPB):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PD, Command::SREF
};
/* Prerequisite */
function<Command(DRAM<LPDDR3>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<LPDDR3>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<LPDDR3>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<LPDDR3>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{
LPDDR3_4Gb_x16, LPDDR3_4Gb_x32,
LPDDR3_6Gb_x16, LPDDR3_6Gb_x32,
LPDDR3_8Gb_x16, LPDDR3_8Gb_x32,
LPDDR3_12Gb_x16, LPDDR3_12Gb_x32,
LPDDR3_16Gb_x16, LPDDR3_16Gb_x32,
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
{4<<10, 16, {0, 0, 8, 1<<14, 1<<11}}, {4<<10, 32, {0, 0, 8, 1<<14, 1<<10}},
{6<<10, 16, {0, 0, 8, 3<<13, 1<<11}}, {6<<10, 32, {0, 0, 8, 3<<13, 1<<10}},
{8<<10, 16, {0, 0, 8, 1<<15, 1<<11}}, {8<<10, 32, {0, 0, 8, 1<<15, 1<<10}},
{12<<10, 16, {0, 0, 8, 3<<13, 1<<12}}, {12<<10, 32, {0, 0, 8, 3<<13, 1<<11}},
{16<<10, 16, {0, 0, 8, 1<<15, 1<<12}}, {16<<10, 32, {0, 0, 8, 1<<15, 1<<11}},
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
LPDDR3_1333,
LPDDR3_1600,
LPDDR3_1866,
LPDDR3_2133,
MAX
};
int prefetch_size = 8; // 16n prefetch DDR
int channel_width = 64;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCD, nRTRS, nDQSCK;
int nCL, nRCD, nRPpb, nRPab, nCWL;
int nRAS, nRC;
int nRTP, nWTR, nWR;
int nRRD, nFAW;
int nRFCab, nRFCpb, nREFI;
int nCKE, nXP; // CKE is PD, LPDDR3 has no DLL
int nCKESR, nXSR; // tXSR = tRFCab + 10ns
} speed_table[int(Speed::MAX)] = {
{1333, (400.0/3)*5, (3/0.4)/5, 4, 4, 2, 2, 10, 12, 12, 14, 6, 28, 40, 5, 5, 10, 7, 34, 0, 0, 2600, 5, 5, 10, 0},
{1600, (400.0/3)*6, (3/0.4)/6, 4, 4, 2, 2, 12, 15, 15, 17, 6, 34, 48, 6, 6, 12, 8, 40, 0, 0, 3120, 6, 6, 12, 0},
{1866, (400.0/3)*7, (3/0.4)/7, 4, 4, 2, 3, 14, 17, 17, 20, 8, 40, 56, 7, 7, 14, 10, 47, 0, 0, 3640, 7, 7, 14, 0},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 2, 3, 16, 20, 20, 23, 8, 45, 64, 8, 8, 16, 11, 54, 0, 0, 4160, 8, 8, 16, 0}
}, speed_entry;
// LPDDR3 defines {fast, typical, slow} timing for tRCD and tRP. (typ)
// WL as diff. values for set A/B (A)
int read_latency;
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing();
};
} /*namespace ramulator*/
#endif /*__LPDDR3_H*/
#ifndef __LPDDR4_H
#define __LPDDR4_H
#include "DRAM.h"
#include "Request.h"
#include <vector>
#include <functional>
using namespace std;
namespace ramulator
{
class LPDDR4
{
public:
static string standard_name;
enum class Org;
enum class Speed;
LPDDR4(Org org, Speed speed);
LPDDR4(const string& org_str, const string& speed_str);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
/* Level */
enum class Level : int
{
Channel, Rank, Bank, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/* Command */
enum class Command : int
{
ACT, PRE, PREA,
RD, WR, RDA, WRA,
REF, REFPB, PDE, PDX, SREF, SREFX,
MAX
};
// Due to multiplexing on the cmd/addr bus:
// ACT, RD, WR, RDA, WRA take 4 cycles
// PRE, PREA, REF, REFPB, PDE, PDX, SREF, SREFX take 2 cycles
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PREA",
"RD", "WR", "RDA", "WRA",
"REF", "REFPB", "PDE", "PDX", "SREF", "SREFX"
};
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Bank, Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PREA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
case int(Command::REFPB):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PDE, Command::SREF
};
/* Prerequisite */
function<Command(DRAM<LPDDR4>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<LPDDR4>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<LPDDR4>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<LPDDR4>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{
// this is per-die density, actual per-chan density is half
LPDDR4_4Gb_x16,
LPDDR4_6Gb_x16,
LPDDR4_8Gb_x16,
// LPDDR4_12Gb_x16, // tRFC TBD
// LPDDR4_16Gb_x16, // tRFC TBD
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
{2<<10, 16, {0, 0, 8, 1<<14, 1<<10}},
{3<<10, 16, {0, 0, 8, 3<<13, 1<<10}},
{4<<10, 16, {0, 0, 8, 1<<15, 1<<10}},
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
LPDDR4_1600,
LPDDR4_2400,
LPDDR4_3200,
MAX
};
enum class RefreshMode : int
{
Refresh_1X,
Refresh_2X,
Refresh_4X,
MAX
} refresh_mode = RefreshMode::Refresh_1X;
int prefetch_size = 16; // 16n prefetch DDR
int channel_width = 32;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCD, nRTRS, nDQSCK;
int nCL, nRCD, nRPpb, nRPab, nCWL;
int nRAS, nRC;
int nRTP, nWTR, nWR;
int nPPD, nRRD, nFAW;
int nRFCab, nRFCpb, nREFI;
int nCKE, nXP; // CKE value n/a
int nSR, nXSR; // tXSR = tRFCab + 7.5ns
} speed_table[int(Speed::MAX)] = {
// LPDDR4 is 16n prefetch. Latencies in JESD209-4 counts from and to
// the end of each command, I've converted them as if all commands take
// only 1 cycle like other standards
// CL-RCD-RPpb are set to the same value althrough CL is not explicitly specified.
// CWL is made up, half of CL.
// calculated from 10.2 core timing table 89
{1600, 400.0*2, 2.5/2, 8, 8, 2, 1, 15+3, 15, 15-2, 17-2, 8+3, 34, 47, 8+2, 8, 15-1, 4, 8, 32, 0, 0, 0, 0, 6, 12, 0},
{2400, 400.0*3, 2.5/3, 8, 8, 2, 2, 22+3, 22, 22-2, 26-2, 11+3, 51, 71, 9+2, 12, 22-1, 4, 12, 48, 0, 0, 0, 0, 9, 18, 0},
{3200, 400.0*4, 2.5/4, 8, 8, 2, 3, 29+3, 29, 29-2, 34-2, 15+3, 68, 95, 12+2, 16, 29-1, 4, 16, 64, 0, 0, 0, 0, 12, 24, 0}
}, speed_entry;
// LPDDR4 defines {fast, typical, slow} timing for tRCD and tRP. (typ)
// WL as diff. values for set A/B (A)
int read_latency;
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing();
};
} /*namespace ramulator*/
#endif /*__LPDDR4_H*/
#include "Processor.h"
#include "Config.h"
#include "Controller.h"
#include "SpeedyController.h"
#include "Memory.h"
#include "DRAM.h"
#include "Statistics.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <stdlib.h>
#include <functional>
#include <map>
/* Standards */
#include "Gem5Wrapper.h"
#include "DDR3.h"
#include "DDR4.h"
#include "DSARP.h"
#include "GDDR5.h"
#include "LPDDR3.h"
#include "LPDDR4.h"
#include "WideIO.h"
#include "WideIO2.h"
#include "HBM.h"
#include "SALP.h"
#include "ALDRAM.h"
#include "TLDRAM.h"
#include "STTMRAM.h"
#include "PCM.h"
using namespace std;
using namespace ramulator;
template <typename T>
void run_cpu(const Config& configs, Memory<T, Controller>& memory)
{
int cpu_tick = configs.get_cpu_tick();
int mem_tick = configs.get_mem_tick();
auto send = bind(&Memory<T, Controller>::send, &memory, placeholders::_1);
Processor proc(configs, send, memory);
printf("Starting the simulation...\n");
int tick_mult = cpu_tick * mem_tick;
for (long i = 0; i < 10000000000 ; i++) {
if (((i % tick_mult) % mem_tick) == 0) { // When the CPU is ticked cpu_tick times,
// the memory controller should be ticked mem_tick times
proc.tick();
}
if (((i % tick_mult) % cpu_tick) == 0) { // TODO_hasan: Better if the processor ticks the memory controller
memory.tick();
}
if (proc.recieve_num >= 1000){
cout << "total cycles: " << proc.clk << endl;
break;
}
}
memory.finish();
}
template<typename T>
void start_run(const Config& configs, T* spec) {
// initiate controller and memory
int C = configs.get_channels(), R = configs.get_ranks();
// Check and Set channel, rank number
spec->set_channel_number(C);
spec->set_rank_number(R);
std::vector<Controller<T>*> ctrls;
for (int c = 0 ; c < C ; c++) {
DRAM<T>* channel = new DRAM<T>(spec, T::Level::Channel);
channel->id = c;
channel->regStats("");//not usefull
Controller<T>* ctrl = new Controller<T>(configs, channel);
ctrls.push_back(ctrl);
}
Memory<T, Controller> memory(configs, ctrls);
//assert(files.size() != 0);
//if (configs["trace_type"] == "CPU") {
run_cpu(configs, memory);
//} else if (configs["trace_type"] == "DRAM") {
// run_dramtrace(configs, memory, files[0]);
//}
}
int main(int argc, const char *argv[])
{
if (argc < 3) {
printf("Usage: %s <configs-file>\n"
"Example: %s ramulator-configs.cfg\n", argv[0], argv[0]);
return 0;
}
Config configs(argv[1]);
configs.add("req_addr_interval", argv[2]);
//DRAM standard
const std::string& standard = configs["standard"];
assert(standard != "" || "DRAM standard should be specified.");
//not usefull
configs.add("trace_type", "CPU");
//mapping strategy
configs.add("mapping", "defaultmapping");
//not usefull
configs.set_core_num(1);
//run simulation according to the memory standard.
if (standard == "DDR3") {
DDR3* ddr3 = new DDR3(configs["org"], configs["speed"]);
start_run(configs, ddr3);
} else if (standard == "DDR4") {
DDR4* ddr4 = new DDR4(configs["org"], configs["speed"]);
start_run(configs, ddr4);
} else if (standard == "SALP-MASA") {
SALP* salp8 = new SALP(configs["org"], configs["speed"], "SALP-MASA", configs.get_subarrays());
start_run(configs, salp8);
} else if (standard == "LPDDR3") {
LPDDR3* lpddr3 = new LPDDR3(configs["org"], configs["speed"]);
start_run(configs, lpddr3);
} else if (standard == "LPDDR4") {
// total cap: 2GB, 1/2 of others
LPDDR4* lpddr4 = new LPDDR4(configs["org"], configs["speed"]);
start_run(configs, lpddr4);
} else if (standard == "GDDR5") {
GDDR5* gddr5 = new GDDR5(configs["org"], configs["speed"]);
start_run(configs, gddr5);
} else if (standard == "HBM") {
HBM* hbm = new HBM(configs["org"], configs["speed"]);
start_run(configs, hbm);
} else if (standard == "WideIO") {
// total cap: 1GB, 1/4 of others
WideIO* wio = new WideIO(configs["org"], configs["speed"]);
start_run(configs, wio);
} else if (standard == "WideIO2") {
// total cap: 2GB, 1/2 of others
WideIO2* wio2 = new WideIO2(configs["org"], configs["speed"], configs.get_channels());
wio2->channel_width *= 2;
start_run(configs, wio2);
} else if (standard == "STTMRAM") {
STTMRAM* sttmram = new STTMRAM(configs["org"], configs["speed"]);
start_run(configs, sttmram);
} else if (standard == "PCM") {
PCM* pcm = new PCM(configs["org"], configs["speed"]);
start_run(configs, pcm);
}
// Various refresh mechanisms
else if (standard == "DSARP") {
DSARP* dsddr3_dsarp = new DSARP(configs["org"], configs["speed"], DSARP::Type::DSARP, configs.get_subarrays());
start_run(configs, dsddr3_dsarp);
} else if (standard == "ALDRAM") {
ALDRAM* aldram = new ALDRAM(configs["org"], configs["speed"]);
start_run(configs, aldram);
} else if (standard == "TLDRAM") {
TLDRAM* tldram = new TLDRAM(configs["org"], configs["speed"], configs.get_subarrays());
start_run(configs, tldram);
}
printf("Simulation done.\n");
return 0;
}
#include "MemoryFactory.h"
#include "LPDDR4.h"
#include "WideIO.h"
#include "WideIO2.h"
#include "HBM.h"
#include "SALP.h"
using namespace ramulator;
namespace ramulator
{
template <>
void MemoryFactory<LPDDR4>::validate(int channels, int ranks, const Config& configs) {
assert(channels >= 2 && "LPDDR4 requires 2, 4, 8 ... channels");
}
template <>
void MemoryFactory<WideIO>::validate(int channels, int ranks, const Config& configs) {
assert(channels == 4 && "WideIO comes with 4 channels");
}
template <>
void MemoryFactory<WideIO2>::validate(int channels, int ranks, const Config& configs) {
assert((channels == 4 || channels == 8) && "WideIO2 comes with 4 or 8 channels");
assert((ranks == 1 || ranks == 2) && "WideIO2 comes with 1 or 2 ranks");
}
template <>
void MemoryFactory<HBM>::validate(int channels, int ranks, const Config& configs) {
assert(channels == 8 && "HBM comes with 8 channels");
}
template <>
MemoryBase *MemoryFactory<WideIO2>::create(const Config& configs, int cacheline) {
int channels = stoi(configs["channels"], NULL, 0);
int ranks = stoi(configs["ranks"], NULL, 0);
validate(channels, ranks, configs);
const string& org_name = configs["org"];
const string& speed_name = configs["speed"];
WideIO2 *spec = new WideIO2(org_name, speed_name, channels);
extend_channel_width(spec, cacheline);
return (MemoryBase *)populate_memory(configs, spec, channels, ranks);
}
template <>
MemoryBase *MemoryFactory<SALP>::create(const Config& configs, int cacheline) {
int channels = stoi(configs["channels"], NULL, 0);
int ranks = stoi(configs["ranks"], NULL, 0);
int subarrays = stoi(configs["subarrays"], NULL, 0);
validate(channels, ranks, configs);
const string& std_name = configs["standard"];
const string& org_name = configs["org"];
const string& speed_name = configs["speed"];
SALP *spec = new SALP(org_name, speed_name, std_name, subarrays);
extend_channel_width(spec, cacheline);
return (MemoryBase *)populate_memory(configs, spec, channels, ranks);
}
}
// This function can be used by autoconf AC_CHECK_LIB since
// apparently it can't detect C++ functions.
// Basically just an entry in the symbol table
extern "C"
{
void libramulator_is_present(void)
{
;
}
}
#ifndef __MEMORY_FACTORY_H
#define __MEMORY_FACTORY_H
#include <map>
#include <string>
#include <cassert>
#include "Config.h"
#include "Memory.h"
#include "WideIO2.h"
#include "SALP.h"
using namespace std;
namespace ramulator
{
template <typename T>
class MemoryFactory {
public:
static void extend_channel_width(T* spec, int cacheline)
{
int channel_unit = spec->prefetch_size * spec->channel_width / 8;
int gang_number = cacheline / channel_unit;
assert(gang_number >= 1 &&
"cacheline size must be greater or equal to minimum channel width");
assert(cacheline == gang_number * channel_unit &&
"cacheline size must be a multiple of minimum channel width");
spec->channel_width *= gang_number;
}
static Memory<T> *populate_memory(const Config& configs, T *spec, int channels, int ranks) {
int& default_ranks = spec->org_entry.count[int(T::Level::Rank)];
int& default_channels = spec->org_entry.count[int(T::Level::Channel)];
if (default_channels == 0) default_channels = channels;
if (default_ranks == 0) default_ranks = ranks;
vector<Controller<T> *> ctrls;
for (int c = 0; c < channels; c++){
DRAM<T>* channel = new DRAM<T>(spec, T::Level::Channel);
channel->id = c;
channel->regStats("");
ctrls.push_back(new Controller<T>(configs, channel));
}
return new Memory<T>(configs, ctrls);
}
static void validate(int channels, int ranks, const Config& configs) {
assert(channels > 0 && ranks > 0);
}
static MemoryBase *create(const Config& configs, int cacheline)
{
int channels = stoi(configs["channels"], NULL, 0);
int ranks = stoi(configs["ranks"], NULL, 0);
validate(channels, ranks, configs);
const string& org_name = configs["org"];
const string& speed_name = configs["speed"];
T *spec = new T(org_name, speed_name);
extend_channel_width(spec, cacheline);
return (MemoryBase *)populate_memory(configs, spec, channels, ranks);
}
};
template <>
MemoryBase *MemoryFactory<WideIO2>::create(const Config& configs, int cacheline);
template <>
MemoryBase *MemoryFactory<SALP>::create(const Config& configs, int cacheline);
} /*namespace ramulator*/
#endif /*__MEMORY_FACTORY_H*/
/*
*
* The timing parameters used in this file are provided by the following study:
* Benjamin C. Lee, Engin Ipek, Onur Mutlu, and Doug Burger. 2009.
* Architecting phase change memory as a scalable dram alternative.
* In Proceedings of the 36th annual international symposium on Computer architecture (ISCA '09).
* ACM, New York, NY, USA, 2-13.
* DOI: https://doi.org/10.1145/1555754.1555758
*
*/
#ifndef __PCM_H
#define __PCM_H
#include "DRAM.h"
#include "Request.h"
#include <vector>
#include <map>
#include <string>
#include <functional>
using namespace std;
namespace ramulator
{
class PCM
{
public:
static string standard_name;
enum class Org;
enum class Speed;
PCM(Org org, Speed speed);
PCM(const string& org_str, const string& speed_str);
static map<string, enum Org> org_map;
static map<string, enum Speed> speed_map;
/*** Level ***/
enum class Level : int
{
Channel, Rank, Bank, Row, Column, MAX
};
static std::string level_str [int(Level::MAX)];
/*** Command ***/
enum class Command : int
{
ACT, PRE, PREA,
RD, WR, RDA, WRA,
REF, PDE, PDX, SRE, SRX,
MAX
};
string command_name[int(Command::MAX)] = {
"ACT", "PRE", "PREA",
"RD", "WR", "RDA", "WRA",
"REF", "PDE", "PDX", "SRE", "SRX"
};
Level scope[int(Command::MAX)] = {
Level::Row, Level::Bank, Level::Rank,
Level::Column, Level::Column, Level::Column, Level::Column,
Level::Rank, Level::Rank, Level::Rank, Level::Rank, Level::Rank
};
bool is_opening(Command cmd)
{
switch(int(cmd)) {
case int(Command::ACT):
return true;
default:
return false;
}
}
bool is_accessing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RD):
case int(Command::WR):
case int(Command::RDA):
case int(Command::WRA):
return true;
default:
return false;
}
}
bool is_closing(Command cmd)
{
switch(int(cmd)) {
case int(Command::RDA):
case int(Command::WRA):
case int(Command::PRE):
case int(Command::PREA):
return true;
default:
return false;
}
}
bool is_refreshing(Command cmd)
{
switch(int(cmd)) {
case int(Command::REF):
return true;
default:
return false;
}
}
/* State */
enum class State : int
{
Opened, Closed, PowerUp, ActPowerDown, PrePowerDown, SelfRefresh, MAX
} start[int(Level::MAX)] = {
State::MAX, State::PowerUp, State::Closed, State::Closed, State::MAX
};
/* Translate */
Command translate[int(Request::Type::MAX)] = {
Command::RD, Command::WR,
Command::REF, Command::PDE, Command::SRE
};
/* Prerequisite */
function<Command(DRAM<PCM>*, Command cmd, int)> prereq[int(Level::MAX)][int(Command::MAX)];
// SAUGATA: added function object container for row hit status
/* Row hit */
function<bool(DRAM<PCM>*, Command cmd, int)> rowhit[int(Level::MAX)][int(Command::MAX)];
function<bool(DRAM<PCM>*, Command cmd, int)> rowopen[int(Level::MAX)][int(Command::MAX)];
/* Timing */
struct TimingEntry
{
Command cmd;
int dist;
int val;
bool sibling;
};
vector<TimingEntry> timing[int(Level::MAX)][int(Command::MAX)];
/* Lambda */
function<void(DRAM<PCM>*, int)> lambda[int(Level::MAX)][int(Command::MAX)];
/* Organization */
enum class Org : int
{
PCM_512Mb_x4, PCM_512Mb_x8, PCM_512Mb_x16,
PCM_1Gb_x4, PCM_1Gb_x8, PCM_1Gb_x16,
PCM_2Gb_x4, PCM_2Gb_x8, PCM_2Gb_x16,
PCM_4Gb_x4, PCM_4Gb_x8, PCM_4Gb_x16,
PCM_8Gb_x4, PCM_8Gb_x8, PCM_8Gb_x16,
MAX
};
struct OrgEntry {
int size;
int dq;
int count[int(Level::MAX)];
} org_table[int(Org::MAX)] = {
{ 512, 4, {0, 0, 8, 1<<13, 1<<11}}, { 512, 8, {0, 0, 8, 1<<13, 1<<10}}, { 512, 16, {0, 0, 8, 1<<12, 1<<10}},
{1<<10, 4, {0, 0, 8, 1<<14, 1<<11}}, {1<<10, 8, {0, 0, 8, 1<<14, 1<<10}}, {1<<10, 16, {0, 0, 8, 1<<13, 1<<10}},
{2<<10, 4, {0, 0, 8, 1<<15, 1<<11}}, {2<<10, 8, {0, 0, 8, 1<<15, 1<<10}}, {2<<10, 16, {0, 0, 8, 1<<14, 1<<10}},
{4<<10, 4, {0, 0, 8, 1<<16, 1<<11}}, {4<<10, 8, {0, 0, 8, 1<<16, 1<<10}}, {4<<10, 16, {0, 0, 8, 1<<15, 1<<10}},
{8<<10, 4, {0, 0, 8, 1<<16, 1<<12}}, {8<<10, 8, {0, 0, 8, 1<<16, 1<<11}}, {8<<10, 16, {0, 0, 8, 1<<16, 1<<10}}
}, org_entry;
void set_channel_number(int channel);
void set_rank_number(int rank);
/* Speed */
enum class Speed : int
{
PCM_800D,
MAX
};
int prefetch_size = 8; // 8n prefetch DDR
int channel_width = 64;
struct SpeedEntry {
int rate;
double freq, tCK;
int nBL, nCCD, nRTRS;
int nCL, nRCD, nRP, nCWL;
int nRAS, nRC;
int nRTP, nWTR, nWR;
int nRRDact, nRRDpre, nFAW;
int nRFC, nREFI;
int nPD, nXP, nXPDLL;
int nCKESR, nXS, nXSDLL;
} speed_table[int(Speed::MAX)] = {
{800, (400.0/3)*3, 2.5, 4, 4, 2, 5, 22, 60, 5, 22, 60, 3, 3, 6, 2, 11, 0, 0, 3900, 0, 3, 10, 4, 0, 512},
}, speed_entry;
int read_latency;
private:
void init_speed();
void init_lambda();
void init_prereq();
void init_rowhit(); // SAUGATA: added function to check for row hits
void init_rowopen();
void init_timing();
};
} /*namespace ramulator*/
#endif /*__PCM_H*/
#include "Processor.h"
#include <cassert>
#include <string>
#include <iostream>
using namespace std;
using namespace ramulator;
Processor::Processor(const Config& configs,
function<bool(Request)> send_memory,
MemoryBase& memory):memory(memory)
{
//cpu behavior can be defined by configs
//configs are not used here
send = send_memory;
callback = std::bind(&Processor::receive, this, placeholders::_1);//callback is triggered when a request is issued in memory.
req_addr_interval = stoi(configs["req_addr_interval"]);
}
void Processor::receive(Request& req) {
//this function indicates the processor's following behavior when a memory request is issued and received.
waiting = false;
recieve_num ++;
processing += 0; //randomize or indicate pricessing cycles
//cout << "id: " << recieve_num << "request received at clk " << clk << ", waiting for " << clk-send_clk << endl;
if (last_access_time != clk-send_clk && last_access_time > 0){
cout << "access id: " << recieve_num << ", cycles: " << last_access_time << " (" << same_access_time_count << ")" << endl;
same_access_time_count = 1;
}else{
same_access_time_count ++;
}
last_access_time = clk-send_clk;
}
void Processor::tick(){
clk++;
if(processing > 0){
processing--;
}
else if(!waiting){
req_addr = req_addr == -1 ? 20734016 : req_addr + req_addr_interval; //randomize or indicating request addr
req_type = Request::Type::READ; //randomize or indicate request type
req_addr = memory.page_allocator(req_addr, 0);
Request req(req_addr, req_type, callback, 0);
waiting = true;
//cout<<"request send at clk "<<clk<<endl;
send_clk = clk;
send(req);//send request to memory. request's callback will be called when issued.
}
else{}
}
#ifndef __PROCESSOR_H
#define __PROCESSOR_H
#include "Cache.h"
#include "Config.h"
#include "Memory.h"
#include "Request.h"
#include "Statistics.h"
#include <iostream>
#include <vector>
#include <fstream>
#include <string>
#include <ctype.h>
#include <functional>
namespace ramulator
{
class Processor{
public:
//processor clock, for example, usually faster than memory clock
long clk = 0;
//this function send request to memory
function<bool(Request)> send;
//this function is called when a request is issued in memory
function<void(Request&)> callback;
Processor(const Config& configs, function<bool(Request)> send, MemoryBase& memory);
//simulate 1 clock cycle of processor
void tick();
//processor behaviors when a request is issued in memory
void receive(Request& req);
int recieve_num = 0;
private:
//processor status
bool waiting = false;
int processing = 0;
int send_clk = 0;
int last_access_time = -1;
int same_access_time_count = 0;
//info of the request sending to memory
long req_addr = -1;
long req_addr_interval = 0;
Request::Type req_type;
//simulated memory
MemoryBase& memory;
};
}
#endif /* __PROCESSOR_H */
/*
* Refresh.cpp
*
* Mainly DSARP specialization at the moment.
*
* Created on: Mar 17, 2015
* Author: kevincha
*/
#include <stdlib.h>
#include "Refresh.h"
#include "Controller.h"
#include "DRAM.h"
#include "DSARP.h"
using namespace std;
using namespace ramulator;
namespace ramulator {
/**** DSARP specialization ****/
template<>
Refresh<DSARP>::Refresh(Controller<DSARP>* ctrl) : ctrl(ctrl) {
clk = refreshed = 0;
max_rank_count = ctrl->channel->children.size();
max_bank_count = ctrl->channel->spec->org_entry.count[(int)DSARP::Level::Bank];
max_sa_count = ctrl->channel->spec->org_entry.count[(int)DSARP::Level::SubArray];
// Init refresh counters
for (int r = 0; r < max_rank_count; r++) {
bank_ref_counters.push_back(0);
bank_refresh_backlog.push_back(new vector<int>(max_bank_count, 0));
vector<int> sa_counters(ctrl->channel->spec->org_entry.count[(int)DSARP::Level::SubArray], 0);
subarray_ref_counters.push_back(sa_counters);
}
level_chan = (int)DSARP::Level::Channel;
level_rank = (int)DSARP::Level::Rank;
level_bank = (int)DSARP::Level::Bank;
level_sa = (int)DSARP::Level::SubArray;
}
template<>
void Refresh<DSARP>::early_inject_refresh() {
// Only enabled during reads
if (ctrl->write_mode)
return;
// OoO bank-level refresh
vector<bool> is_bank_occupied(max_rank_count * max_bank_count, false);
Controller<DSARP>::Queue& rdq = ctrl->readq;
// Figure out which banks are idle in order to refresh one of them
for (auto req: rdq.q)
{
assert(req.addr_vec[level_chan] == ctrl->channel->id);
int ridx = req.addr_vec[level_rank] * max_bank_count;
int bidx = req.addr_vec[level_bank];
is_bank_occupied[ridx+bidx] = true;
}
// Try to pick an idle bank to refresh per rank
for (int r = 0; r < max_rank_count; r++) {
// Randomly pick a bank to examine
int bidx_start = rand() % max_bank_count;
for (int b = 0; b < max_bank_count; b++)
{
int bidx = (bidx_start + b) % max_bank_count;
// Idle cycle only
if (is_bank_occupied[(r * max_bank_count) + bidx])
continue;
// Pending refresh
bool pending_ref = false;
for (Request req : ctrl->otherq.q)
if (req.type == Request::Type::REFRESH
&& req.addr_vec[level_chan] == ctrl->channel->id
&& req.addr_vec[level_rank] == r && req.addr_vec[level_bank] == bidx)
pending_ref = true;
if (pending_ref)
continue;
// Only pull in refreshes when we are almost running out of credits
if ((*(bank_refresh_backlog[r]))[bidx] >= backlog_early_pull_threshold ||
ctrl->otherq.q.size() >= ctrl->otherq.max)
continue;
// Refresh now
refresh_target(ctrl, r, bidx, subarray_ref_counters[r][bidx]);
// One credit for delaying a future ref
(*(bank_refresh_backlog[r]))[bidx]++;
subarray_ref_counters[r][bidx] = (subarray_ref_counters[r][bidx]+1) % max_sa_count;
break;
}
}
}
template<>
void Refresh<DSARP>::inject_refresh(bool b_ref_rank) {
// Rank-level refresh
if (b_ref_rank)
for (auto rank : ctrl->channel->children)
refresh_target(ctrl, rank->id, -1, -1);
// Bank-level refresh. Simultaneously issue to all ranks (better performance than staggered refreshes).
else {
for (auto rank : ctrl->channel->children) {
int rid = rank->id;
int bid = bank_ref_counters[rid];
// Behind refresh schedule by 1 ref
(*(bank_refresh_backlog[rid]))[bid]--;
// Next time, refresh the next bank in the same bank
bank_ref_counters[rid] = (bank_ref_counters[rid] + 1) % max_bank_count;
// Check to see if we can skip a refresh
if (ctrl->channel->spec->type == DSARP::Type::DARP ||
ctrl->channel->spec->type == DSARP::Type::DSARP) {
bool ref_now = false;
// 1. Any pending refrehes?
bool pending_ref = false;
for (Request req : ctrl->otherq.q) {
if (req.type == Request::Type::REFRESH) {
pending_ref = true;
break;
}
}
// 2. Track readq
if (!pending_ref && ctrl->readq.size() == 0)
ref_now = true;
// 3. Track log status. If we are too behind the schedule, then we need to refresh now.
if ((*(bank_refresh_backlog[rid]))[bid] <= backlog_min)
ref_now = true;
// Otherwise skip refresh
if (!ref_now)
continue;
}
refresh_target(ctrl, rid, bid, subarray_ref_counters[rid][bid]);
// Get 1 ref credit
(*(bank_refresh_backlog[rid]))[bid]++;
// Next time, refresh the next sa in the same bank
subarray_ref_counters[rid][bid] = (subarray_ref_counters[rid][bid]+1) % max_sa_count;
}
}
refreshed = clk;
}
// first = wrq.count; second = bank idx
typedef pair<int, int> wrq_idx;
bool wrq_comp (wrq_idx l, wrq_idx r)
{
return l.first < r.first;
}
// WRP
template<>
void Refresh<DSARP>::wrp() {
for (int ref_rid = 0; ref_rid < max_rank_count; ref_rid++)
{
// Pending refresh in the rank?
bool pending_ref = false;
for (Request req : ctrl->otherq.q) {
if (req.type == Request::Type::REFRESH && req.addr_vec[level_rank] == ref_rid) {
pending_ref = true;
break;
}
}
if (pending_ref)
continue;
// Find the bank with the lowest number of writes+reads
vector<wrq_idx> sorted_bank_demand;
for (int b = 0; b < max_bank_count; b++)
sorted_bank_demand.push_back(wrq_idx(0,b));
// Filter out all the writes to this rank
int total_wr = 0;
for (auto req : ctrl->writeq.q) {
if (req.addr_vec[level_rank] == ref_rid) {
sorted_bank_demand[req.addr_vec[level_bank]].first++;
total_wr++;
}
}
// If there's no write, just skip.
if (total_wr == 0)
continue;
// Add read
for (auto req : ctrl->readq.q)
if (req.addr_vec[level_rank] == ref_rid)
sorted_bank_demand[req.addr_vec[level_bank]].first++;
// Sort based on the entries
std::sort(sorted_bank_demand.begin(), sorted_bank_demand.end(), wrq_comp);
// Randomly select an idle bank to refresh
int top_idle_idx = 0;
for (int i = 0; i < max_bank_count; i++) {
if (sorted_bank_demand[i].second != 0) {
top_idle_idx = i;
break;
}
}
// Select a bank to ref
int ref_bid_idx = (top_idle_idx == 0) ? 0 : rand() % top_idle_idx;
int ref_bid = sorted_bank_demand[ref_bid_idx].second;
// Make sure we don't exceed the credit
if ((*(bank_refresh_backlog[ref_rid]))[ref_bid] < backlog_max
&& ctrl->otherq.q.size() < ctrl->otherq.max) {
refresh_target(ctrl, ref_rid, ref_bid, subarray_ref_counters[ref_rid][ref_bid]);
// Get 1 ref credit
(*(bank_refresh_backlog[ref_rid]))[ref_bid]++;
subarray_ref_counters[ref_rid][ref_bid] = (subarray_ref_counters[ref_rid][ref_bid]+1) % max_sa_count;
}
}
}
// OoO refresh of DSARP
template<>
void Refresh<DSARP>::tick_ref() {
clk++;
bool b_ref_rank = ctrl->channel->spec->b_ref_rank;
int refresh_interval =
(b_ref_rank) ?
ctrl->channel->spec->speed_entry.nREFI :
ctrl->channel->spec->speed_entry.nREFIpb;
// DARP
if (ctrl->channel->spec->type == DSARP::Type::DARP ||
ctrl->channel->spec->type == DSARP::Type::DSARP) {
// Write-Refresh Parallelization. Issue refreshes when the controller enters writeback mode
if (!ctrl_write_mode && ctrl->write_mode)
wrp();
// Record write mode
ctrl_write_mode = ctrl->write_mode;
// Inject early to pull in some refreshes during read mode
early_inject_refresh();
}
// Time to schedule a refresh and also try to skip some refreshes
if ((clk - refreshed) >= refresh_interval)
inject_refresh(b_ref_rank);
}
/**** End DSARP specialization ****/
} /* namespace ramulator */
/*
* Refresh.h
*
* This is a refresh scheduler. A list of refresh policies implemented:
*
* 1. All-bank refresh
* 2. Per-bank refresh (only DSARP memory module has been completed to work with REFpb).
* The other modules (LPDDRx) have not been updated to pass a knob to turn on/off REFpb.
* 3. A re-implementation of DSARP from the refresh mechanisms proposed in Chang et al.,
* "Improving DRAM Performance by Parallelizing Refreshes with Accesses", HPCA 2014.
*
* Created on: Mar 17, 2015
* Author: kevincha
*/
#ifndef __REFRESH_H_
#define __REFRESH_H_
#include <stddef.h>
#include <cassert>
#include <iostream>
#include <vector>
#include "Request.h"
#include "DSARP.h"
#include "ALDRAM.h"
using namespace std;
namespace ramulator {
template <typename T>
class Controller;
template <typename T>
class Refresh {
public:
Controller<T>* ctrl;
long clk, refreshed;
// Per-bank refresh counter to track the refresh progress for each rank
vector<int> bank_ref_counters;
int max_rank_count, max_bank_count;
int level_chan, level_rank, level_bank, level_sa;
// ctor
Refresh(Controller<T>* ctrl) : ctrl(ctrl) {
clk = refreshed = 0;
max_rank_count = ctrl->channel->children.size();
max_bank_count = ctrl->channel->spec->org_entry.count[(int)T::Level::Bank];
// Init refresh counters
for (int r = 0; r < max_rank_count; r++) {
bank_ref_counters.push_back(0);
bank_refresh_backlog.push_back(new vector<int>(max_bank_count, 0));
}
level_chan = (int)T::Level::Channel;
level_rank = (int)T::Level::Rank;
level_bank = (int)T::Level::Bank;
level_sa = -1; // Most DRAM doesn't have subarray level
}
// dtor
virtual ~Refresh() {
// Clean up backlog
for (unsigned int i = 0; i < bank_refresh_backlog.size(); i++)
delete bank_refresh_backlog[i];
}
// Basic refresh scheduling for all bank refresh that is applicable to all DRAM types
void tick_ref() {
clk++;
int refresh_interval = ctrl->channel->spec->speed_entry.nREFI;
// Time to schedule a refresh
if ((clk - refreshed) >= refresh_interval) {
inject_refresh(true);
// ALDRAM: update timing parameters based on temperatures
ALDRAM::Temp current_temperature = ALDRAM::Temp::COLD;
ctrl->update_temp(current_temperature);
}
}
private:
// Keeping track of refresh status of every bank: + means ahead of schedule, - means behind schedule
vector<vector<int>*> bank_refresh_backlog;
// Keeping track of which subarray to refresh next
vector<vector<int>> subarray_ref_counters;
int max_sa_count = 0;
// As defined in the standards
int backlog_max = 8;
int backlog_min = -8;
int backlog_early_pull_threshold = -6;
bool ctrl_write_mode = false;
// Refresh based on the specified address
void refresh_target(Controller<T>* ctrl, int rank, int bank, int sa)
{
vector<int> addr_vec(int(T::Level::MAX), -1);
addr_vec[0] = ctrl->channel->id;
addr_vec[1] = rank;
addr_vec[2] = bank;
addr_vec[3] = sa;
Request req(addr_vec, Request::Type::REFRESH, NULL);
bool res = ctrl->enqueue(req);
assert(res);
}
// Inject refresh at either rank or bank level
void inject_refresh(bool b_ref_rank) {
// Rank-level refresh
if (b_ref_rank) {
for (auto rank : ctrl->channel->children)
refresh_target(ctrl, rank->id, -1, -1);
}
// Bank-level refresh. Simultaneously issue to all ranks (better performance than staggered refreshes).
else {
for (auto rank : ctrl->channel->children)
refresh_target(ctrl, rank->id, bank_ref_counters[rank->id], -1);
}
refreshed = clk;
}
// DSARP
void early_inject_refresh();
void wrp();
};
// Declaration of specialized constructor and tick_ref, so the compiler knows
// where to look for these definitions when controller calls them!
template<> Refresh<DSARP>::Refresh(Controller<DSARP>* ctrl);
template<> void Refresh<DSARP>::tick_ref();
} /* namespace ramulator */
#endif /* SRC_REFRESH_H_ */
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment