Commit 91cd7fac by lvzhengyang

Initial commit

parents
place_parser
def check(debug_file):
with open(debug_file, 'r') as f:
lines = f.readlines()
f.close()
diff_node_cnt = 0
for line in lines:
words = line.split()
src_inst = words[0].split('/')[0]
dst_inst = words[2].split('/')[0]
if src_inst != dst_inst:
print(line)
diff_node_cnt += 1
print(diff_node_cnt)
if __name__ == '__main__':
check('debug.log')
import numpy as np
import torch
import dgl
import os
import pickle
import re
import pdb
from utils import *
""" check the difference of timing arcs between different stages
1. dump DEF for different stages
2. parse by DREAMPlace
3. parse by this file
4. compare the timing graph
"""
def create_timing_graph(def_path, data_dir):
parse_inst2libcell_map(def_path, data_dir)
######## Load Data ########
node_names = np.load(os.path.join(data_dir, 'node_names.npy'))
node_x = np.load(os.path.join(data_dir, 'node_x.npy'))
node_y = np.load(os.path.join(data_dir, 'node_y.npy'))
pin_offset_x = np.load(os.path.join(data_dir, 'pin_offset_x.npy'))
pin_offset_y = np.load(os.path.join(data_dir, 'pin_offset_y.npy'))
pin_names_raw = np.load(os.path.join(data_dir, 'pin_names.npy'))
pin_direct = np.load(os.path.join(data_dir, 'pin_direct.npy'))
node2pin_map = np.load(os.path.join(data_dir, 'node2pin_map.npy'), allow_pickle=True)
net2pin_map = np.load(os.path.join(data_dir, 'net2pin_map.npy'), allow_pickle=True)
pin2node_map = np.load(os.path.join(data_dir, 'pin2node_map.npy'))
xl = 0
yl = 0
xh = 0
yh = 0
with open(os.path.join(data_dir, 'scalars.txt'), 'r') as f:
lines = f.readlines()
xl = float(lines[3].split()[1])
yl = float(lines[4].split()[1])
xh = float(lines[5].split()[1])
yh = float(lines[6].split()[1])
f.close()
node_name2id_map = None
with open(os.path.join(data_dir, 'node_name2id_map.pkl'), 'rb') as f:
node_name2id_map = pickle.load(f)
f.close()
inst2libcell_map = None
with open(os.path.join(data_dir, 'inst2libcell_map.pkl'), 'rb') as f:
inst2libcell_map = pickle.load(f)
f.close()
######## End Load Data ########
######## build timing arcs from placed def ########
timing_graph = TimingGraph(inst2libcell_map)
# wire arc
num_nets = net2pin_map.size
for ni in range(num_nets):
pins = net2pin_map[ni]
if pins.size <= 1:
continue
# find input/output pins
output_pins = []
input_pins = []
for pi in range(pins.size):
pin_idx = pins[pi]
if pin_direct[pin_idx] == b'OUTPUT':
output_pins.append(pin_idx)
else:
input_pins.append(pin_idx)
for op in output_pins:
# skip instances of tap cell
op_node_idx = pin2node_map[op]
if node2pin_map[op_node_idx].size <= 1:
if node_names[pin2node_map[op]] != pin_names_raw[op]:
# instance not in timing graph, e.g. TAP cell
continue
# else: is a port
if node_names[pin2node_map[op]] != pin_names_raw[op]:
op_full_name = node_names[pin2node_map[op]].decode() + '/' + pin_names_raw[op].decode()
else:
op_full_name = node_names[pin2node_map[op]].decode()
for ip in input_pins:
# skip instances of tap cell
ip_node_idx = pin2node_map[ip]
if node2pin_map[ip_node_idx].size <= 1:
if node_names[pin2node_map[ip]] != pin_names_raw[ip]:
# instance not in timing graph, e.g. TAP cell
continue
# else: is a port
if node_names[pin2node_map[ip]] != pin_names_raw[ip]:
ip_full_name = node_names[pin2node_map[ip]].decode() + '/' + pin_names_raw[ip].decode()
else:
ip_full_name = node_names[pin2node_map[ip]].decode()
timing_graph.add_arc(TimingArc(op_full_name, ip_full_name))
# cell arc
# there may be some pins of an instance not connected to any net
pin_valid = np.zeros(pin_names_raw.size, dtype=bool)
for ni in range(num_nets):
pins = net2pin_map[ni]
if pins.size <= 1:
continue
for pi in range(pins.size):
pin_valid[pins[pi]] = 1
num_nodes = node_names.size
for ni in range(num_nodes):
node_pins = node2pin_map[ni]
if node_pins.size <= 1: # port
continue
inst = node_names[ni].decode()
input_pins = []
output_pins = []
for pi in range(node_pins.size):
pin_idx = node_pins[pi]
if pin_valid[pin_idx]:
if pin_direct[pin_idx] == b'OUTPUT':
output_pins.append(pin_idx)
else:
input_pins.append(pin_idx)
libcell = inst2libcell_map[inst]
if 'FF' in libcell or 'LL' in libcell:
for ip in input_pins:
ip_full_name = node_names[pin2node_map[ip]].decode() + '/' + pin_names_raw[ip].decode()
if not 'D' in ip_full_name and not 'RESET' in ip_full_name and not 'SET' in ip_full_name:
for op in output_pins:
op_full_name = node_names[pin2node_map[op]].decode() + '/' + pin_names_raw[op].decode()
timing_graph.add_arc(TimingArc(ip_full_name, op_full_name))
else:
for ip in input_pins:
ip_full_name = node_names[pin2node_map[ip]].decode() + '/' + pin_names_raw[ip].decode()
for op in output_pins:
op_full_name = node_names[pin2node_map[op]].decode() + '/' + pin_names_raw[op].decode()
timing_graph.add_arc(TimingArc(ip_full_name, op_full_name))
######## End build timing arcs from placed def ########
return timing_graph
if __name__ == '__main__':
pdk = "asap7"
# tag = "limit_timing_opt"
tag = "no_timing_opt"
blocks = "aes".split()
raw_data_dir = f"/cyberpi/OpenROAD-flow-scripts/flow/parse/{pdk}"
# stage_tag = None
stage_tags = [None, 'final']
graphs = []
for block in blocks:
print(f'-------- {block} --------')
block_dir = os.path.join(raw_data_dir, tag, block)
for stage_tag in stage_tags:
if stage_tag == None:
def_path = os.path.join(block_dir, 'place.def')
data_dir = os.path.join(block_dir, 'parsed')
else:
def_path = os.path.join(block_dir, f'{stage_tag}.def')
data_dir = os.path.join(block_dir, 'parsed', stage_tag)
graph = create_timing_graph(def_path, data_dir)
graphs.append(graph)
srcdst2arc_maps = []
src2arc_maps = []
for graph in graphs:
srcdst2arc_maps.append(dict())
src2arc_maps.append(dict())
for arc in graph.all_arcs:
srcdst2arc_maps[-1][arc.src+' '+arc.dst] = arc
if arc.src not in src2arc_maps[-1]:
src2arc_maps[-1][arc.src] = []
src2arc_maps[-1][arc.src].append(arc)
place_pins = set()
for arc in graphs[0].all_arcs:
place_pins.add(arc.src)
place_pins.add(arc.dst)
# find the corresponding path in other stages
num_mismatch = 0
for arc in graphs[0].all_arcs:
chain, flag = dfs(arc.src, arc.dst, place_pins, src2arc_maps[1], True)
if not flag:
num_mismatch += 1
"""
num_mismatch = 0
for arc_key in srcdst2arc_maps[0]:
if arc_key not in srcdst2arc_maps[1]:
print(arc_key)
num_mismatch += 1
"""
print(f"num_mismatch: {num_mismatch}")
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import torch
import numpy as np
import dgl
import torch.nn.functional as F
import random
import pdb
import time
import argparse
import os
from sklearn.metrics import r2_score
# import tee
from model import PredModel
pdk = "asap7"
tag = "no_timing_opt"
dir_prefix = f"/cyberpi/OpenROAD-flow-scripts/flow/parse/{pdk}/{tag}"
# blocks = "aes aes-mbff ethmac gcd ibex jpeg mock-alu uart".split()
blocks = "aes aes-mbff gcd ibex jpeg uart".split()
# blocks_test = set(('aes', 'ibex'))
blocks_test = set()
blocks_train = set()
for block in blocks:
if not block in blocks_test:
blocks_train.add(block)
# train config
netdelay = True
celldelay = True
groundtruth = True
def gen_topo(g_hetero):
torch.cuda.synchronize()
time_s = time.time()
# na, nb = g_hetero.edges(etype='fanout2fanin', form='uv')
# ca, cb = g_hetero.edges(etype='in2out', form='uv')
# ffa, ffb = g_hetero.edges(etype='clk2q', form='uv')
# eda, edb = g_hetero.edges(etype='fanout2d', form='uv')
# g = dgl.graph((torch.cat([na, ca, ffa, eda]).cpu(), torch.cat([nb, cb, ffb, edb]).cpu()))
# na, nb = g_hetero.edges(etype='net_out', form='uv')
# ca, cb = g_hetero.edges(etype='cell_out', form='uv')
# g = dgl.graph((torch.cat([na, ca]).cpu(), torch.cat([nb, cb]).cpu()))
na, nb = g_hetero.edges(etype='fanout2fanin', form='uv')
ca, cb = g_hetero.edges(etype='in2out', form='uv')
pa, pb = g_hetero.edges(etype='_2outport', form='uv')
g = dgl.graph((torch.cat([na, ca, pa]).cpu(), torch.cat([nb, cb, pb]).cpu()))
topo = dgl.topological_nodes_generator(g) # this seems like topo-sort
ret = [t.cuda() for t in topo]
torch.cuda.synchronize()
time_e = time.time()
return ret, time_e - time_s
def load_data():
data = {}
for block in blocks:
graph_path = os.path.join(dir_prefix, block, "parsed", f"{block}.2.graph.bin")
g = dgl.load_graphs(graph_path)[0][0].to('cuda')
topo, topo_time = gen_topo(g)
ts = {
# 'fanin_nodes': g.edges(etype='fanout2fanin')[1].long(),
# 'fanout_nodes': g.edges(etype='fanout2fanin')[0].long(),
'input_nodes': (g.ndata['nf'][:, 1] < 0.5).nonzero().flatten().type(torch.int64),
'output_nodes': (g.ndata['nf'][:, 1] > 0.5).nonzero().flatten().type(torch.int64),
'topo': topo,
}
data[block] = g, ts
return data
def train(model, data_train):
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
# debug: load graph one by one
for k, (g, ts) in data_train.items():
print(f'-------- {k} --------')
try:
pred_net_delays, pred_cell_delays, pred_atslew = model(g, ts)
except:
print('error')
pdb.set_trace()
# batch_size = 5
batch_size = 1
for e in range(100000):
model.train()
train_loss_tot_net_delays, train_loss_tot_cell_delays, train_loss_tot_ats = 0, 0, 0
for k, (g, ts) in random.sample(data_train.items(), batch_size):
pred_net_delays, pred_cell_delays, pred_atslew = model(g, ts)
pdb.set_trace()
if __name__ == "__main__":
data = load_data()
data_train = {k: t for k, t in data.items() if k in blocks_train}
data_test = {k: t for k, t in data.items() if k in blocks_test}
model = PredModel()
model.cuda()
train(model, data_train)
import torch
import torch.nn.functional as F
import dgl
import dgl.function as fn
import functools
import pdb
class MLP(torch.nn.Module):
def __init__(self, *sizes, batchnorm=False, dropout=False):
super().__init__()
fcs = []
for i in range(1, len(sizes)):
fcs.append(torch.nn.Linear(sizes[i - 1], sizes[i]))
if i < len(sizes) - 1:
fcs.append(torch.nn.LeakyReLU(negative_slope=0.2))
if dropout: fcs.append(torch.nn.Dropout(p=0.2))
if batchnorm: fcs.append(torch.nn.BatchNorm1d(sizes[i]))
self.layers = torch.nn.Sequential(*fcs)
def forward(self, x):
return self.layers(x)
class NetConv(torch.nn.Module):
def __init__(self, in_nf, in_ef, out_nf, h1=32, h2=32):
super().__init__()
self.in_nf = in_nf
self.in_ef = in_ef
self.out_nf = out_nf
self.h1 = h1
self.h2 = h2
self.MLP_msg_i2o = MLP(self.in_nf * 2 + self.in_ef, 64, 64, 64, 1 + self.h1 + self.h2)
self.MLP_reduce_o = MLP(self.in_nf + self.h1 + self.h2, 64, 64, 64, self.out_nf)
self.MLP_msg_o2i = MLP(self.in_nf * 2 + self.in_ef, 64, 64, 64, 64, self.out_nf)
def edge_msg_i(self, edges):
x = torch.cat([edges.src['nf'], edges.dst['nf'], edges.data['ef']], dim=1)
x = self.MLP_msg_o2i(x)
return {'efi': x}
def edge_msg_o(self, edges):
x = torch.cat([edges.src['nf'], edges.dst['nf'], edges.data['ef']], dim=1)
x = self.MLP_msg_i2o(x)
k, f1, f2 = torch.split(x, [1, self.h1, self.h2], dim=1)
k = torch.sigmoid(k)
return {'efo1': f1 * k, 'efo2': f2 * k}
def node_reduce_o(self, nodes):
x = torch.cat([nodes.data['nf'], nodes.data['nfo1'], nodes.data['nfo2']], dim=1)
x = self.MLP_reduce_o(x)
return {'new_nf': x}
def forward(self, g, ts, nf):
with g.local_scope():
g.ndata['nf'] = nf
# # fanin nodes
# g.update_all(self.edge_msg_i, fn.sum('efi', 'new_nf'), etype='fanout2fanin')
# # fanout nodes
# g.apply_edges(self.edge_msg_o, etype='fanin2fanout')
# g.update_all(fn.copy_e('efo1', 'efo1'), fn.sum('efo1', 'nfo1'), etype='fanin2fanout')
# g.update_all(fn.copy_e('efo2', 'efo2'), fn.max('efo2', 'nfo2'), etype='fanin2fanout')
# g.apply_nodes(self.node_reduce_o, ts['fanout_nodes'])
# input nodes
g.update_all(self.edge_msg_i, fn.sum('efi', 'new_nf'), etype='net_out')
# output nodes
g.apply_edges(self.edge_msg_o, etype='net_in')
g.update_all(fn.copy_e('efo1', 'efo1'), fn.sum('efo1', 'nfo1'), etype='net_in')
g.update_all(fn.copy_e('efo2', 'efo2'), fn.max('efo2', 'nfo2'), etype='net_in')
g.apply_nodes(self.node_reduce_o, ts['output_nodes'])
return g.ndata['new_nf']
class SignalProp(torch.nn.Module):
def __init__(self, in_nf, in_cell_num_luts, in_cell_lut_sz, out_nf, out_cef, h1=32, h2=32, lut_dup=4):
super().__init__()
self.in_nf = in_nf
self.in_cell_num_luts = in_cell_num_luts
self.in_cell_lut_sz = in_cell_lut_sz
self.out_nf = out_nf
self.out_cef = out_cef
self.h1 = h1
self.h2 = h2
self.lut_dup = lut_dup
self.MLP_netprop = MLP(self.out_nf + 2 * self.in_nf, 64, 64, 64, 64, self.out_nf)
self.MLP_lut_query = MLP(self.out_nf + 2 * self.in_nf, 64, 64, 64, self.in_cell_num_luts * lut_dup * 2)
self.MLP_lut_attention = MLP(1 + 2 + self.in_cell_lut_sz * 2, 64, 64, 64, self.in_cell_lut_sz * 2)
self.MLP_cellarc_msg = MLP(self.out_nf + 2 * self.in_nf + self.in_cell_num_luts * self.lut_dup, 64, 64, 64, 1 + self.h1 + self.h2 + self.out_cef)
self.MLP_cellreduce = MLP(self.in_nf + self.h1 + self.h2, 64, 64, 64, self.out_nf)
def edge_msg_net(self, edges, groundtruth=False):
if groundtruth:
last_nf = edges.src['n_atslew']
else:
last_nf = edges.src['new_nf']
x = torch.cat([last_nf, edges.src['nf'], edges.dst['nf']], dim=1)
x = self.MLP_netprop(x)
return {'efn': x}
def edge_msg_cell(self, edges, groundtruth=False):
# generate lut axis query
if groundtruth:
last_nf = edges.src['n_atslew']
else:
last_nf = edges.src['new_nf']
q = torch.cat([last_nf, edges.src['nf'], edges.dst['nf']], dim=1)
q = self.MLP_lut_query(q)
q = q.reshape(-1, 2)
# answer lut axis query
axis_len = self.in_cell_num_luts * (1 + 2 * self.in_cell_lut_sz)
axis = edges.data['ef'][:, :axis_len]
axis = axis.reshape(-1, 1 + 2 * self.in_cell_lut_sz)
axis = axis.repeat(1, self.lut_dup).reshape(-1, 1 + 2 * self.in_cell_lut_sz)
a = self.MLP_lut_attention(torch.cat([q, axis], dim=1))
# transform answer to answer mask matrix
a = a.reshape(-1, 2, self.in_cell_lut_sz)
ax, ay = torch.split(a, [1, 1], dim=1)
a = torch.matmul(ax.reshape(-1, self.in_cell_lut_sz, 1), ay.reshape(-1, 1, self.in_cell_lut_sz)) # batch tensor product
# look up answer matrix in lut
tables_len = self.in_cell_num_luts * self.in_cell_lut_sz ** 2
tables = edges.data['ef'][:, axis_len:axis_len + tables_len]
r = torch.matmul(tables.reshape(-1, 1, 1, self.in_cell_lut_sz ** 2), a.reshape(-1, 4, self.in_cell_lut_sz ** 2, 1)) # batch dot product
# construct final msg
r = r.reshape(len(edges), self.in_cell_num_luts * self.lut_dup)
x = torch.cat([last_nf, edges.src['nf'], edges.dst['nf'], r], dim=1)
x = self.MLP_cellarc_msg(x)
k, f1, f2, cef = torch.split(x, [1, self.h1, self.h2, self.out_cef], dim=1)
k = torch.sigmoid(k)
return {'efc1': f1 * k, 'efc2': f2 * k, 'efce': cef}
def node_reduce_o(self, nodes):
x = torch.cat([nodes.data['nf'], nodes.data['nfc1'], nodes.data['nfc2']], dim=1)
x = self.MLP_cellreduce(x)
return {'new_nf': x}
def node_skip_level_o(self, nodes):
return {'new_nf': nodes.data['n_atslew']}
def forward(self, g, ts, nf):
assert len(ts['topo']) % 2 == 0, 'The number of logic levels must be even (net, cell, net)'
with g.local_scope():
# init level 0 with ground truth features
g.ndata['nf'] = nf
g.ndata['new_nf'] = torch.zeros(g.num_nodes(), self.out_nf, device='cuda', dtype=nf.dtype)
# no primary io port considered here
# g.apply_nodes(self.node_skip_level_o, ts['pi_nodes'])
def prop_net(nodes, groundtruth):
g.pull(nodes, functools.partial(self.edge_msg_net, groundtruth=groundtruth), fn.sum('efn', 'new_nf'), etype='fanout2fanin')
def prop_cell(nodes, groundtruth):
# cell_edges = ['in2out', 'clk2q', 'fanout2d']
# for etype in cell_edges:
# es = g.in_edges(nodes, etype=etype)
# g.apply_edges(functools.partial(self.edge_msg_cell, groundtruth=groundtruth), es, etype=etype)
# g.send_and_recv(es, fn.copy_e('efc1', 'efc1'), fn.sum('efc1', 'nfc1'), etype=etype)
# g.send_and_recv(es, fn.copy_e('efc2', 'efc2'), fn.max('efc2', 'nfc2'), etype=etype)
etype = 'cell_out'
es = g.in_edges(nodes, etype=etype)
g.apply_edges(functools.partial(self.edge_msg_cell, groundtruth=groundtruth), es, etype=etype)
g.send_and_recv(es, fn.copy_e('efc1', 'efc1'), fn.sum('efc1', 'nfc1'), etype=etype)
g.send_and_recv(es, fn.copy_e('efc2', 'efc2'), fn.max('efc2', 'nfc2'), etype=etype)
g.apply_nodes(self.node_reduce_o, nodes)
groundtruth = False # What does it mean???
if groundtruth:
# don't need to propagate.
prop_net(ts['input_nodes'], groundtruth)
prop_cell(ts['output_nodes_nonpi'], groundtruth)
else:
# propagate
for i in range(1, len(ts['topo'])):
if i % 2 == 1:
prop_net(ts['topo'][i], groundtruth)
else:
prop_cell(ts['topo'][i], groundtruth)
return g.ndata['new_nf'], g.edges['cell_out'].data['efce']
class PredModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.nc1 = NetConv(10, 2, 32)
self.nc2 = NetConv(32, 2, 32)
self.nc3 = NetConv(32, 2, 16) # 16 = 4x delay + 12x arbitrary (might include cap, beta)
self.prop = SignalProp(10 + 16, 8, 7, 8, 4)
def forward(self, g, ts):
nf0 = g.ndata['nf']
x = self.nc1(g, ts, nf0)
x = self.nc2(g, ts, x)
x = self.nc3(g, ts, x)
net_delays = x[:, :4]
nf1 = torch.cat([nf0, x], dim=1)
nf2, cell_delays = self.prop(g, ts, nf1)
return net_delays, cell_delays, nf2
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment