Commit f4bb5ddb by lvzhengyang

train model

parent 124ce875
......@@ -4,3 +4,5 @@ data/asap7
*.log
build_model/runs
build_model/weights
cell_delay/runs
cell_delay/weights
......@@ -1023,6 +1023,12 @@ def create_graph6(data_dir, parsed_libs_dir, parsed_sdf_dir, save_dir, block_nam
pin_y = place_node_y + place_pin_y_off
pin_loc[i] = np.array([pin_x - xl, pin_y - yl, xh - pin_x, yh - pin_y])
# store node names for reference
node_names = valid_pin_list
with open(os.path.join(save_dir, f'{block_name}.graph.node_names.pkl'), 'wb') as f:
pickle.dump(node_names, f)
f.close()
ndata = dict()
ndata["n_rats"] = torch.zeros(num_pins, 4)
ndata["n_ats"] = torch.zeros(num_pins, 4)
......
tag: 1
tensorboard: Jan23_11-57-52_gpu-t01
description: 'aes' as test
tag: 2
tensorboard: Jan23_11-57-07_gpu-t01
description: 'aes-mbff' as test
tag: 3
tensorboard: Jan23_11-56-29_gpu-t00
description: 'gcd' as test
tag: 4
tensorboard: Jan23_11-55-38_gpu-t00
description: 'ibex' as test
tag: 5
tensorboard: Jan23_11-54-37_gpu-t00
description: 'jpeg' as test
tag: 6
tensorboard: Jan23_11-53-55_gpu-t00
description: 'uart' as test
import torch
import numpy as np
import dgl
import torch.nn.functional as F
import random
import pdb
import time
import argparse
import os
from sklearn.metrics import r2_score
from model import PredModel
from load_graph import load_data
import matplotlib.pyplot as plt
import pickle
pdk = "asap7"
tag = "no_timing_opt"
dir_prefix = f"../data/{pdk}/{tag}"
# blocks = "aes aes-mbff ethmac gcd ibex jpeg mock-alu uart".split()
blocks = "aes aes-mbff gcd ibex jpeg uart".split()
blocks_test = set('uart'.split())
tag = '6'
blocks_train = set()
for block in blocks:
if not block in blocks_test:
blocks_train.add(block)
def load_model(model_path):
model = PredModel()
model.load_state_dict(torch.load(model_path))
model.cuda()
return model
def check(data, model):
model.eval()
with torch.no_grad():
for block, (g, ts) in data.items():
print(f'-------- {block} --------')
pred_net_delays, pred_cell_delays, pred_atslew = model(g, ts)
# transfer back
pred_net_delays.sub_(7.6)
pred_net_delays = torch.exp(pred_net_delays)
pred_cell_delays = torch.exp(pred_cell_delays)
# check delays only
true_net_delays = g.ndata['n_net_delays']
true_cell_delays = g.edges['cell_out'].data['e_delay']
# error rate
# true_net_delays[~ts['mask']] = 1 # net delay is defined on fanin pins only
# pred_net_delays[~ts['mask']] = 1
# filter out invalid net delays
mask = true_net_delays == 0
true_net_delays[mask] = 1
pred_net_delays[mask] = 1
err_net_delays = pred_net_delays.sub(true_net_delays).div(true_net_delays+1e-9) # avoid div-0
err_cell_delays = pred_cell_delays.sub(true_cell_delays).div(true_cell_delays+1e-9)
# check net delays
corner_names = {0: 'ER', 1: 'EF', 2: 'LR', 3: 'LF'}
truth = true_net_delays[~mask].view(-1, 4)
pred = pred_net_delays[~mask].view(-1, 4)
for corner in range(4):
tt = truth[:, corner].cpu().numpy()
pp = pred[:, corner].cpu().numpy()
minv = min(tt.min(), pp.min()) - 0.2
maxv = max(tt.max(), pp.max()) + 0.2
maxv = min(2000, maxv)
plt.axis("square")
plt.title(f'net delay prediction ({corner_names[corner]}) of block {block}')
plt.xlabel('Truth/ns')
plt.ylabel('Predicted/ns')
plt.xlim(minv - 10, maxv + 10)
plt.ylim(minv - 10, maxv + 10)
plt.axline((minv, minv), (maxv, maxv), color='r')
plt.axline((500, minv), (500, maxv), color='black', linestyle='-.')
plt.scatter(tt, pp, s=10, c='b')
save_dir = os.path.join('figures', block)
os.makedirs(save_dir, exist_ok=True)
plt.savefig(os.path.join(save_dir, f'{block}.{corner_names[corner]}.png'))
def extract_large_delay_nets(block, g, ts):
with open(os.path.join(dir_prefix, block, 'parsed', f'{block}.graph.node_names.pkl'), 'rb') as f:
node_names = pickle.load(f)
f.close()
truth = g.ndata['n_net_delays']
# check the corner 0
truth = truth[:, 0]
nodes = torch.where(truth > 500)[0]
src, dst = g.in_edges(nodes, etype='net_out') # dst == nodes
pdb.set_trace()
if __name__ == '__main__':
model_path = './weights/5/4200-0.601-0.685-392.834-31.378.pt'
model = load_model(model_path)
data = load_data()
# test_block = 'jpeg'
# extract_large_delay_nets(test_block, data[test_block][0], data[test_block][1])
check(data, model)
......@@ -20,8 +20,8 @@ dir_prefix = f"../data/{pdk}/{tag}"
# blocks = "aes aes-mbff ethmac gcd ibex jpeg mock-alu uart".split()
blocks = "aes aes-mbff gcd ibex jpeg uart".split()
blocks_test = set('uart'.split())
tag = '6'
blocks_test = set('aes'.split())
tag = '1'
blocks_train = set()
for block in blocks:
if not block in blocks_test:
......@@ -62,7 +62,7 @@ def load_data():
mask = torch.zeros(g.nodes().size(0), dtype=torch.bool, device='cuda')
mask[ts['input_nodes']] = 1
ts['mask'] = mask
g.ndata['n_net_delays_log'][mask] = 0
g.ndata['n_net_delays_log'][~mask] = 0
data[block] = g, ts
return data
......@@ -84,7 +84,7 @@ def train(model, data_train, data_test):
# net delays are defined on pins
# add mask to select fanin pins only
pred_net_delays[ts['mask']] = 0
pred_net_delays[~ts['mask']] = 0
loss_net_delays = F.mse_loss(pred_net_delays, g.ndata['n_net_delays_log'])
train_loss_tot_net_delays += loss_net_delays.item()
train_loss_epoch_net_delays += loss_net_delays.item()
......@@ -108,6 +108,7 @@ def train(model, data_train, data_test):
test_loss_tot_net_delays, test_loss_tot_cell_delays, test_loss_tot_ats = 0, 0, 0
for k, (g, ts) in data_test.items():
pred_net_delays, pred_cell_delays, pred_atslew = model(g, ts)
pred_net_delays[~ts['mask']] = 0
test_loss_tot_net_delays += F.mse_loss(pred_net_delays, g.ndata['n_net_delays_log']).item()
test_loss_tot_cell_delays += F.mse_loss(pred_cell_delays, g.edges['cell_out'].data['e_cell_delays_log']).item()
print('epoch: {}, net_delay_loss (train): {:.6e}, cell_delay_loss (train): {:.6e}, net_delay_loss (test): {:.6e}, cell_delay_loss (test): {:.6e}'.format(
......
import torch
import numpy as np
import dgl
import os
import pickle
from utils import *
pdk = "asap7"
tag = "no_timing_opt"
dir_prefix = f"../data/{pdk}/{tag}"
blocks = "aes aes-mbff gcd ibex jpeg uart".split()
parsed_lib_dir = f'../data/{pdk}/techlib/parsed_lib'
# now we have got the dgl graph
def load_dgl_graphs():
graphs = {}
for block in blocks:
graph_path = os.path.join(dir_prefix, block, "parsed", f"{block}.graph.bin")
g = dgl.load_graphs(graph_path)[0][0]
graphs[block] = g
return graphs
def load_sdf_dict(save_dir):
inst_time = None
pin_time = None
with open(os.path.join(save_dir, "inst_time.dict.pkl"), 'rb') as f:
inst_time = pickle.load(f)
f.close()
with open(os.path.join(save_dir, "pin_time.dict.pkl"), 'rb') as f:
pin_time = pickle.load(f)
f.close()
return inst_time, pin_time
def load_libdata(save_dir):
with open(os.path.join(save_dir, "lib_data.pkl"), 'rb') as f:
libdata = pickle.load(f)
f.close()
with open(os.path.join(save_dir, "pin_caps.pkl"), 'rb') as f:
pincaps = pickle.load(f)
f.close()
return libdata, pincaps
def extract_libcell_cases(graphs):
libcell2inst_map = dict()
lib_data, pin_caps = load_libdata(parsed_lib_dir)
libcell_delays = dict()
libcell_topos = dict()
for block in blocks:
libcell2inst_map[block] = dict()
graph = graphs[block]
block_dir = os.path.join(dir_prefix, block, 'parsed')
with open(os.path.join(block_dir, f'{block}.graph.node_names.pkl'), 'rb') as f:
graph_node_names = pickle.load(f)
f.close()
graph_node_name2id_map = dict()
cnt = 0
for name in graph_node_names:
graph_node_name2id_map[name] = cnt
cnt += 1
inst_names = np.load(os.path.join(block_dir, 'node_names.npy'))
inst_x = np.load(os.path.join(block_dir, 'node_x.npy'))
inst_y = np.load(os.path.join(block_dir, 'node_y.npy'))
inst_pin_offset_x = np.load(os.path.join(block_dir, 'pin_offset_x.npy'))
inst_pin_offset_y = np.load(os.path.join(block_dir, 'pin_offset_y.npy'))
inst_pin_names_raw = np.load(os.path.join(block_dir, 'pin_names.npy'))
inst_pin_direct = np.load(os.path.join(block_dir, 'pin_direct.npy'))
inst2pin_map = np.load(os.path.join(block_dir, 'node2pin_map.npy'), allow_pickle=True)
net2pin_map = np.load(os.path.join(block_dir, 'net2pin_map.npy'), allow_pickle=True)
pin2inst_map = np.load(os.path.join(block_dir, 'pin2node_map.npy'))
pin2net_map = np.load(os.path.join(block_dir, 'pin2net_map.npy'))
inst_name2id_map = None
with open(os.path.join(block_dir, 'node_name2id_map.pkl'), 'rb') as f:
inst_name2id_map = pickle.load(f)
f.close()
inst2libcell_map = None
with open(os.path.join(block_dir, 'inst2libcell_map.pkl'), 'rb') as f:
inst2libcell_map = pickle.load(f)
f.close()
for inst, libcell in inst2libcell_map.items():
if not libcell in libcell2inst_map[block]:
libcell2inst_map[block][libcell] = []
libcell2inst_map[block][libcell].append(inst)
inst_time, pin_time = load_sdf_dict(block_dir)
# check libcell "INVx1_ASAP7_75t_R"
libcell = 'INVx1_ASAP7_75t_R'
insts = libcell2inst_map[block][libcell]
if not libcell in libcell_delays:
libcell_delays[libcell] = dict()
libcell_topos[libcell] = dict()
for inst in insts:
paths = inst_time[inst].iopath
for path in paths:
key = path.src + '-' + path.dst
if key not in libcell_delays[libcell]:
libcell_delays[libcell][key] = []
libcell_topos[libcell][key] = []
libcell_delays[libcell][key].append(path.value)
# extract fanin net and fanout net
inst_id = inst_name2id_map[inst]
src_pin_id, dst_pin_id = -1, -1
inst_pins = inst2pin_map[inst_id]
for pi in range(inst_pins.size):
pin_id = inst_pins[pi]
pin_name = inst_pin_names_raw[pin_id]
if pin_name.decode() == path.src:
src_pin_id = pin_id
elif pin_name.decode() == path.dst:
dst_pin_id = pin_id
if src_pin_id >= 0 and dst_pin_id >= 0:
break
assert(src_pin_id >= 0 and dst_pin_id >= 0)
fanin_net = pin2net_map[src_pin_id]
fanout_net = pin2net_map[dst_pin_id]
def deal_net(net_id, pin_id):
pins = net2pin_map[net_id]
# idx: pins_info[idx] is the corresponding pin_id's pin_info
idx = np.where(pins == pin_id)[0][0]
# extract pin info
pins_info = []
for pi in range(pins.size):
pin_id = pins[pi]
inst_id = pin2inst_map[pin_id]
pin_x = inst_x[inst_id] + inst_pin_offset_x[pin_id]
pin_y = inst_y[inst_id] + inst_pin_offset_y[pin_id]
inst_name = inst_names[inst_id].decode()
pin_name = inst_pin_names_raw[pin_id].decode()
info = np.zeros(7)
info[0] = pin_x
info[1] = pin_y
info[2] = pin_caps['fast'][inst2libcell_map[inst_name]][pin_name]['rise_capacitance']
info[3] = pin_caps['fast'][inst2libcell_map[inst_name]][pin_name]['fall_capacitance']
info[4] = pin_caps['fast'][inst2libcell_map[inst_name]][pin_name]['rise_capacitance']
info[5] = pin_caps['fast'][inst2libcell_map[inst_name]][pin_name]['fall_capacitance']
if inst_pin_direct[pin_id] == b'OUTPUT':
info[6] = 1
pins_info.append(info)
pins_info = np.stack(pins_info)
return pins_info, idx
fanin_info, fanin_id = deal_net(fanin_net, src_pin_id)
fanout_info, fanout_id = deal_net(fanout_net, dst_pin_id)
libcell_topos[libcell][key].append(((fanin_info, fanin_id), (fanout_info, fanout_id)))
libcell = 'INVx1_ASAP7_75t_R'
key = 'A-Y'
delays = np.stack(libcell_delays[libcell][key])
delayes_log = np.log(delays)
dataset = {'delays': libcell_delays, 'topos': libcell_topos}
with open(os.path.join('.', 'libcell_delay_dataset.pkl'), 'wb') as f:
pickle.dump(dataset, f)
f.close()
if __name__ == '__main__':
graphs = load_dgl_graphs()
extract_libcell_cases(graphs)
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
import math
import pdb
class MLP(torch.nn.Module):
def __init__(self, *sizes, batchnorm=False, dropout=False):
super().__init__()
fcs = []
for i in range(1, len(sizes)):
fcs.append(torch.nn.Linear(sizes[i - 1], sizes[i]))
if i < len(sizes) - 1:
fcs.append(torch.nn.LeakyReLU(negative_slope=0.2))
if dropout: fcs.append(torch.nn.Dropout(p=0.2))
if batchnorm: fcs.append(torch.nn.BatchNorm1d(sizes[i]))
self.layers = torch.nn.Sequential(*fcs)
def forward(self, x):
return self.layers(x)
class SelfAtt(nn.Module):
def __init__(self, input_size, n_heads, hidden_size_per_head):
super().__init__()
self.n_heads = n_heads
self.input_size = input_size
self.hidden_size_per_head = hidden_size_per_head
self.query = MLP(input_size, n_heads * hidden_size_per_head)
self.key = MLP(input_size, n_heads * hidden_size_per_head)
self.value = MLP(input_size, n_heads * hidden_size_per_head)
self.reduce_heads = MLP(n_heads * hidden_size_per_head, hidden_size_per_head)
# @param x: [#batch, #num_inputs, #n_heads*hidden_size_per_head]
def _transpose(self, x):
x = x.view(x.shape[0], x.shape[1], self.n_heads, self.hidden_size_per_head)
return x.permute(0, 2, 1, 3)
# @param input: [#batch, #num_inputs, #features]
def forward(self, input):
query = self.query(input)
key = self.key(input)
value = self.value(input)
query = self._transpose(query)
key = self._transpose(key)
value = self._transpose(value)
att_scores = torch.matmul(query, key.transpose(-1, -2))
att_scores = att_scores / math.sqrt(self.hidden_size_per_head)
att_probs = nn.Softmax(dim=-1)(att_scores)
context = torch.matmul(att_probs, value)
context = context.permute(0, 2, 1, 3).contiguous()
context = context.view(context.shape[0], context.shape[1], self.n_heads * self.hidden_size_per_head)
output = self.reduce_heads(context)
return output
class CellDelayPred(nn.Module):
def __init__(self, input_size, n_heads, hidden_size_per_head):
super().__init__()
self.input_size = input_size
self.n_heads = n_heads
self.hidden_size_per_head = hidden_size_per_head
self.fanin_att0 = SelfAtt(input_size, n_heads, hidden_size_per_head)
self.fanout_att0 = SelfAtt(input_size, n_heads, hidden_size_per_head)
self.fanin_att1 = SelfAtt(hidden_size_per_head, n_heads, hidden_size_per_head)
self.fanout_att1 = SelfAtt(hidden_size_per_head, n_heads, hidden_size_per_head)
# self.fanin_att2 = SelfAtt(hidden_size_per_head, n_heads, hidden_size_per_head)
# self.fanout_att2 = SelfAtt(hidden_size_per_head, n_heads, hidden_size_per_head)
self.delay_pred_mlp = MLP(2 * hidden_size_per_head, 64, 4)
def forward(self, fanin_topo, fanout_topo, fanin_id, fanout_id):
fanin = self.fanin_att0(fanin_topo)
fanin = self.fanin_att1(fanin)
# fanin = self.fanin_att2(fanin)
fanin = fanin[:, fanin_id]
fanout = self.fanin_att0(fanout_topo)
fanout = self.fanin_att1(fanout)
# fanout = self.fanin_att2(fanout)
fanout = fanout[:, fanout_id]
outer_feat = torch.cat([fanin, fanout], dim=-1)
pred = self.delay_pred_mlp(outer_feat)
return pred
def load_cell_delay_data():
data_path = './libcell_delay_dataset.pkl'
with open(data_path, 'rb') as f:
data = pickle.load(f)
f.close()
return data
def preprocess(data):
libcell = 'INVx1_ASAP7_75t_R'
key = 'A-Y'
delays = np.stack(data['delays'][libcell][key])
delays = torch.from_numpy(delays).cuda().float()
delays_log = torch.log(delays)
topos = data['topos'][libcell][key]
fanin_topos, fanin_ids, fanout_topos, fanout_ids = [], [], [], []
for topo in topos:
fanin_topos.append(torch.tensor(topo[0][0]).float())
fanin_ids.append(topo[0][1])
fanout_topos.append(torch.tensor(topo[1][0]).float())
fanout_ids.append(topo[1][1])
num_data = len(topos)
model = CellDelayPred(fanin_topos[0].size(-1), 4, 32)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
min_loss = 1e9
for e in range(1000):
model.train()
tot_train_loss = 0
for di in range(num_data):
optimizer.zero_grad()
fanin = fanin_topos[di]
fanin = fanin.cuda()
fanin.unsqueeze_(0)
fanout = fanout_topos[di]
fanout = fanout.cuda()
fanout.unsqueeze_(0)
pred = model(fanin, fanout, fanin_ids[di], fanout_ids[di])
truth = delays_log[di].unsqueeze(0)
loss = F.mse_loss(pred, truth)
loss.backward()
optimizer.step()
tot_train_loss += loss.item()
# if e == 0 or (e + 1) % 100 == 0:
if True:
print("epoch {} loss {:.3f}".format(e + 1, tot_train_loss))
# save model
if e >= 0 and tot_train_loss < min_loss:
min_loss = tot_train_loss
print('-------- Save Model --------')
tag = ''
save_dir = os.path.join('weights', tag)
save_path = os.path.join(save_dir, 'e-{}-loss-{:.3f}.pt'.format(e + 1, min_loss))
os.makedirs(save_dir, exist_ok=True)
torch.save(model.state_dict(), save_path)
pdb.set_trace()
if __name__ == '__main__':
data = load_cell_delay_data()
preprocess(data)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment