Commit a050c444 by lvzhengyang

train super-edge

parent d00bbe81
......@@ -2,7 +2,5 @@ place_parser
data/asap7
*/__pycache__
*.log
build_model/runs
build_model/weights
cell_delay/runs
cell_delay/weights
*/runs
*/weights
......@@ -1188,6 +1188,9 @@ if __name__ == '__main__':
save_dir = os.path.join(techlib_dir, "parsed_lib")
parse_libs(libs, save_dir)
# debug
blocks = "gcd".split()
for block in blocks:
print(f'-------- {block} --------')
block_dir = os.path.join(raw_data_dir, tag, block)
......
TODO get pin layer
DONE rebuild pin_cap, currently some pin_cap is 0
DONE get gates driving strength
\ No newline at end of file
import numpy as np
import pickle
import json
import os
from utils import *
import torch
from torch.utils.data import Dataset, DataLoader
import pdb
def load_topos(data_dir, block):
with open(os.path.join(data_dir, f'{block}.topo.dict'), 'rb') as f:
topos = pickle.load(f)
f.close()
return topos
def build_dataset(topos, dataset):
# dataset is classified by num_fanout
for topo in topos:
num_fanout = len(topo['fanin'])
if not num_fanout in dataset:
dataset[num_fanout] = {
'pin_features': [],
'transition': [],
'transition_incr': [],
'delay': []
}
# make all the pins as a matrix
# each row is a pin, with the last row as the fanout pin
# pin_features {name: #dim (descrption)}
# dir: 1 (0 for input, 1 for output)
# caps: 4
# loc: 2
# layer: (TO BE IMPLEMENTED)
# libcell_driving strength: 1
# num_fanout: 1
# * tot_dim = 1 + 4 + 2 + 1 + 1 = 9
dtype = np.float32
pin_features = np.zeros([num_fanout + 1, 9], dtype=dtype)
def get_pin_feat(pin_dict):
pin_feat = np.zeros(9)
if pin_dict['dir'] == 'out':
pin_feat[0] = 1
else:
pin_feat[0] = 0
pin_feat[1:5] = pin_dict['caps']
pin_feat[5:7] = pin_dict['loc']
pin_feat[7] = pin_dict['num_fanout']
pin_feat[8] = pin_dict['libcell_drive_strength']
return pin_feat
for i in range(num_fanout):
pin_dict = topo['fanin'][i]
pin_features[i] = get_pin_feat(pin_dict)
pin_features[-1] = get_pin_feat(topo['fanout'][0])
transition = np.zeros([num_fanout + 1, 4], dtype=dtype)
transition_incr = np.zeros([num_fanout + 1, 4], dtype=dtype)
delay = np.zeros([num_fanout + 1, 4], dtype=dtype)
for i in range(num_fanout):
pin_dict = topo['fanin'][i]
transition[i] = pin_dict['transition']
transition_incr[i] = pin_dict['transition_incr']
delay[i] = pin_dict['delay_from_fanout']
transition[-1] = topo['fanout'][0]['transition']
# transition_incr and delay of fanout pin are set to 0
# mask them for training/testing
dataset[num_fanout]['pin_features'].append(pin_features)
dataset[num_fanout]['transition'].append(transition)
dataset[num_fanout]['transition_incr'].append(transition_incr)
dataset[num_fanout]['delay'].append(delay)
class myDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data['pin_features'])
def __getitem__(self, idx):
return self.data['pin_features'][idx], \
self.data['transition'][idx], \
self.data['transition_incr'][idx], \
self.data['delay'][idx]
def build_loaders():
pdk = "asap7"
tag = "no_timing_opt"
raw_data_dir = f"../data/{pdk}"
techlib_dir = os.path.join(raw_data_dir, "techlib")
lib_dir = os.path.join(techlib_dir, 'parsed_lib')
blocks = "aes aes-mbff gcd ibex jpeg uart".split()
test_blocks = {'aes', 'uart'}
train_blocks = set()
for block in blocks:
if not block in test_blocks:
train_blocks.add(block)
rebuild_dataset = 0
dataset_dir = 'dataset'
if rebuild_dataset:
train_dataset = dict()
for block in train_blocks:
print(f'-------- {block} --------')
block_dir = os.path.join(raw_data_dir, tag, block)
data_dir = os.path.join(block_dir, 'parsed')
topos = load_topos(data_dir, block)
build_dataset(topos, train_dataset)
test_dataset = dict()
for block in test_blocks:
print(f'-------- {block} --------')
block_dir = os.path.join(raw_data_dir, tag, block)
data_dir = os.path.join(block_dir, 'parsed')
topos = load_topos(data_dir, block)
build_dataset(topos, test_dataset)
os.makedirs(dataset_dir, exist_ok=True)
with open(os.path.join(dataset_dir, 'train.pkl'), 'wb') as f:
pickle.dump(train_dataset, f)
f.close()
with open(os.path.join(dataset_dir, 'test.pkl'), 'wb') as f:
pickle.dump(test_dataset, f)
f.close()
else:
with open(os.path.join(dataset_dir, 'train.pkl'), 'rb') as f:
train_dataset = pickle.load(f)
f.close()
with open(os.path.join(dataset_dir, 'test.pkl'), 'rb') as f:
test_dataset = pickle.load(f)
f.close()
# build loaders
batch_size = 1024
max_fanout = 32
train_loaders = dict()
for key in train_dataset:
if key > max_fanout:
continue
train_loaders[key] = DataLoader(myDataset(train_dataset[key]), batch_size=batch_size, num_workers=4, shuffle=True)
test_loaders = dict()
for key in test_dataset:
if key > max_fanout:
continue
test_loaders[key] = DataLoader(myDataset(test_dataset[key]), batch_size=batch_size, num_workers=4)
return train_loaders, test_loaders
if __name__ == '__main__':
train_loaders, test_loaders = build_loaders()
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import pdb
class MLP(torch.nn.Module):
def __init__(self, *sizes, batchnorm=False, dropout=False):
super().__init__()
fcs = []
for i in range(1, len(sizes)):
fcs.append(torch.nn.Linear(sizes[i - 1], sizes[i]))
if i < len(sizes) - 1:
fcs.append(torch.nn.LeakyReLU(negative_slope=0.2))
if dropout: fcs.append(torch.nn.Dropout(p=0.2))
if batchnorm: fcs.append(torch.nn.BatchNorm1d(sizes[i]))
self.layers = torch.nn.Sequential(*fcs)
def forward(self, x):
return self.layers(x)
class SelfAtt(nn.Module):
def __init__(self, input_size, n_heads, hidden_size_per_head):
super().__init__()
self.n_heads = n_heads
self.input_size = input_size
self.hidden_size_per_head = hidden_size_per_head
self.query = MLP(input_size, n_heads * hidden_size_per_head)
self.key = MLP(input_size, n_heads * hidden_size_per_head)
self.value = MLP(input_size, n_heads * hidden_size_per_head)
self.reduce_heads = MLP(n_heads * hidden_size_per_head, hidden_size_per_head)
# @param x: [#batch, #num_inputs, #n_heads*hidden_size_per_head]
def _transpose(self, x):
x = x.view(x.shape[0], x.shape[1], self.n_heads, self.hidden_size_per_head)
return x.permute(0, 2, 1, 3)
# @param input: [#batch, #num_inputs, #features]
def forward(self, input):
query = self.query(input)
key = self.key(input)
value = self.value(input)
query = self._transpose(query)
key = self._transpose(key)
value = self._transpose(value)
att_scores = torch.matmul(query, key.transpose(-1, -2))
att_scores = att_scores / math.sqrt(self.hidden_size_per_head)
att_probs = nn.Softmax(dim=-1)(att_scores)
context = torch.matmul(att_probs, value)
context = context.permute(0, 2, 1, 3).contiguous()
context = context.view(context.shape[0], context.shape[1], self.n_heads * self.hidden_size_per_head)
output = self.reduce_heads(context)
return output
class NetPred(nn.Module):
def __init__(self, input_size, output_size, n_heads, hidden_size_per_head):
super().__init__()
self.input_size = input_size
self.output_size = output_size
self.n_heads = n_heads
self.hidden_size_per_head = hidden_size_per_head
self.att0 = SelfAtt(input_size, n_heads, hidden_size_per_head)
self.att1 = SelfAtt(hidden_size_per_head, n_heads, hidden_size_per_head)
self.att2 = SelfAtt(hidden_size_per_head, n_heads, hidden_size_per_head)
self.delay_pred_mlp = MLP(output_size + hidden_size_per_head, 64, output_size)
self.transition_pred_mlp = MLP(output_size + hidden_size_per_head, 64, output_size)
def forward(self, pin_feat, fanout_transition):
x = self.att0(pin_feat)
x = self.att1(x)
x = self.att2(x)
x = x[:, :-1, :]
num_fanout = x.size(1)
trans = fanout_transition.unsqueeze(1).repeat(1, num_fanout, 1)
x = torch.cat([x, trans], dim=-1)
delay_pred = self.delay_pred_mlp(x)
transition_pred = self.transition_pred_mlp(x)
return delay_pred, transition_pred
\ No newline at end of file
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import time
from torch.utils.tensorboard import SummaryWriter
from models import NetPred
from gather_data import build_loaders
import pdb
def train(train_loaders, test_loaders):
input_dim, output_dim, n_heads, hidden_size_per_head = 9, 4, 8, 64
model = NetPred(input_dim, output_dim, n_heads, hidden_size_per_head)
model.cuda()
writer = SummaryWriter()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
num_fanin_pins_train = len(train_loaders)
num_fanin_pins_test = len(test_loaders)
best_train_loss, best_test_loss = 1e9, 1e9
os.makedirs('weights', exist_ok=True)
start_time = time.time()
for e in range(100000):
epoch_start_time = time.time()
tot_delay_loss, tot_transition_loss = 0, 0
model.train()
optimizer.zero_grad()
for key, loader in train_loaders.items():
for batch_idx, (pin_features, transition, transition_incr, delay) in enumerate(loader):
fanout_transition = transition[:, -1].cuda()
pin_features = pin_features.cuda()
transition_incr = transition_incr[:, :-1, :].cuda()
delay = delay[:, :-1, :].cuda()
# fanout_transition is extremely big for large fanout net
# we exclude super-large fanout net here
fanout_transition.log_()
delay_pred, transition_incr_pred = model(pin_features, fanout_transition)
# delay is too small (1e-2 ~ 1e0)
delay.add_(1e-6).mul_(1e2).log_()
delay_loss = F.mse_loss(delay, delay_pred)
# transition_incr is too small (1e-3 ~ 1e0)
transition_incr.add_(1e-6).mul_(1e3).log_()
transition_loss = F.mse_loss(transition_incr, transition_incr_pred)
# tot_delay_loss += delay_loss.item() / num_fanin_pins_train
# tot_transition_loss += transition_loss.item() / num_fanin_pins_train
tot_delay_loss += delay_loss.item()
tot_transition_loss += transition_loss.item()
(delay_loss + transition_loss).backward()
optimizer.step()
tot_delay_loss /= num_fanin_pins_train
tot_transition_loss /= num_fanin_pins_train
writer.add_scalar('delay_loss/train: ', tot_delay_loss)
writer.add_scalar('transition_loss/train: ', tot_transition_loss)
if e >= 20:
tot_loss = tot_delay_loss + tot_transition_loss
if tot_loss < best_train_loss:
best_train_loss = tot_loss
save_path = os.path.join('weights', 'e-%d-train_delay_loss-%.3f-train_transition_loss-%.3f.pt' %(e + 1, tot_delay_loss, tot_transition_loss))
torch.save(model.state_dict(), save_path)
print(f'NOTE: save model to {save_path}')
if (e + 1) % 20 == 0:
# evaluation
model.eval()
with torch.no_grad():
tot_delay_loss_test, tot_transition_loss_test = 0, 0
for key, loader in test_loaders.items():
for batch_idx, (pin_features, transition, transition_incr, delay) in enumerate(loader):
fanout_transition = transition[:, -1].cuda()
pin_features = pin_features.cuda()
transition_incr = transition_incr[:, :-1, :].cuda()
delay = delay[:, :-1, :].cuda()
# fanout_transition is extremely big for large fanout net
# we exclude super-large fanout net here
fanout_transition.log_()
delay_pred, transition_incr_pred = model(pin_features, fanout_transition)
# delay is too small (1e-2 ~ 1e0)
delay.add_(1e-6).mul_(1e2).log_()
delay_loss = F.mse_loss(delay, delay_pred)
# transition_incr is too small (1e-3 ~ 1e0)
transition_incr.add_(1e-6).mul_(1e3).log_()
transition_loss = F.mse_loss(transition_incr, transition_incr_pred)
# tot_delay_loss_test += delay_loss.item() / num_fanin_pins_test
# tot_transition_loss_test += transition_loss.item() / num_fanin_pins_test
tot_delay_loss_test += delay_loss.item()
tot_transition_loss_test += transition_loss.item()
tot_delay_loss_test /= num_fanin_pins_test
tot_transition_loss_test /= num_fanin_pins_test
writer.add_scalar('delay_loss/test: ', tot_delay_loss_test)
writer.add_scalar('transition_loss/test: ', tot_transition_loss_test)
print("epoch: {}\n (Train)\t tot_loss: {:.6f}\t delay_loss: {:.6f}\t transition_loss: {:.6f}\n (Test) \t tot_loss: {:.6f}\t delay_loss: {:.6f}\t transition_loss: {:.6f}".format(
e + 1,
tot_delay_loss + tot_transition_loss, tot_delay_loss, tot_transition_loss,
tot_delay_loss_test + tot_transition_loss_test, tot_delay_loss_test, tot_transition_loss_test
))
tot_loss = tot_delay_loss_test + tot_transition_loss_test
if tot_loss < best_test_loss:
best_test_loss = tot_loss
save_path = os.path.join('weights', 'e-%d-test_delay_loss-%.3f-test_transition_loss-%.3f.pt' %(e + 1, tot_delay_loss_test, tot_transition_loss_test))
torch.save(model.state_dict(), save_path)
print(f'NOTE: save model to {save_path}')
if __name__ == '__main__':
train_loaders, test_loaders = build_loaders()
train(train_loaders, test_loaders)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment