Commit 36da3345 by Huang Di

s1 s2 exchange

parent ed27460a
......@@ -47,9 +47,12 @@ def Probability_distribution(D1,D2,Z1,Z2):
i=i2+i3
if i1_5 <= 0 or i1_5 <= i:
i1=i1_1 + i1_2 + i1_3 + i1_4
if i1_5 > 0 and i1_5 >= i:
M_PD=i/i1
elif i1_5 > 0 and i1_5 >= i:
i1=i1_5
M_PD=i/i1
M_PD=i/i1
else:
M_PD = -1
if M_PD > 1:
M_PD=1
else:
......
......@@ -112,9 +112,9 @@ print()
print('Analyzing the minimum Z...')
# get all edges
bottom_threshold = 0
D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s1_record \
D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s2_record \
= read_data(data_path, bottom_threshold, populations)
builder = TreeBuilder(populations, triangles, s1_record)
builder = TreeBuilder(populations, triangles, s2_record)
builder.D2graph(D_statistics)
g = builder.g
scc0 = g.SCC()
......@@ -154,13 +154,13 @@ if threshold < bottom_threshold:
ValueError('the given Z is smaller than the minimum Z!')
#---------------------------------------------------------------#
D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s1_record \
D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s2_record \
= read_data(data_path, threshold, populations)
if DEBUG:
if min_redstar:
print('min red star:', min(min_redstar))
print(adjusted_D_values)
builder = TreeBuilder(populations, triangles, s1_record)
builder = TreeBuilder(populations, triangles, s2_record)
builder.D2graph(D_statistics)
g = builder.g
......@@ -444,7 +444,7 @@ print('Building the tree...')
builder.build_tree(eq_scc=eq_scc, D=None, **args)
print()
print('Plotting...')
builder.export_graph(save_path, print_gene_flow=True)
builder.export_graph(save_path, print_gene_flow=print_gene_flow)
print()
print('Saved to %s' % save_path)
......
......@@ -60,7 +60,7 @@ def adjust_D_bak(triangle, threshold):
adjusted_value.append((adjust_idx, adjust_threshold))
return adjusted_value, adjusted_data
def adjust_D(key, triangle, threshold, min_redstar, s1_record):
def adjust_D(key, triangle, threshold, min_redstar, s2_record):
adjusted_data = []
to_adjust = []
for i, value in enumerate(triangle):
......@@ -79,22 +79,38 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record):
redstar_idx = redstar_index[index]
def add_s1_record(s2, tar, s1):
if (key[s2], key[tar]) not in s1_record:
s1_record[(key[s2], key[tar])] = []
s1_record[(key[s2], key[tar])].append(key[s1])
# def add_s1_record(s2, tar, s1):
# if (key[s2], key[tar]) not in s1_record:
# s1_record[(key[s2], key[tar])] = []
# s1_record[(key[s2], key[tar])].append(key[s1])
# T = 0
# A = 1
# B = 2
# if redstar_idx == 2:
# add_s1_record(T, A, B)
# add_s1_record(B, A, T)
# elif redstar_idx == 1:
# add_s1_record(T, B, A)
# add_s1_record(A, B, T)
# elif redstar_idx == 0:
# add_s1_record(A, T, B)
# add_s1_record(B, T, A)
def add_s2_record(s1, tar, s2):
if (key[s1], key[tar]) not in s2_record:
s2_record[(key[s1], key[tar])] = []
s2_record[(key[s1], key[tar])].append(key[s2])
T = 0
A = 1
B = 2
if redstar_idx == 2:
add_s1_record(T, A, B)
add_s1_record(B, A, T)
add_s2_record(T, A, B)
# add_s2_record(B, A, T)
elif redstar_idx == 1:
add_s1_record(T, B, A)
add_s1_record(A, B, T)
# add_s2_record(T, B, A)
add_s2_record(A, B, T)
elif redstar_idx == 0:
add_s1_record(A, T, B)
add_s1_record(B, T, A)
# add_s2_record(A, T, B)
add_s2_record(B, T, A)
if DEBUG:
print('------------------------------')
print(key)
......@@ -105,7 +121,7 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record):
adjusted_data[redstar_idx] = 0
return None, adjusted_data, redstar_idx
def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, C, f4, value, threshold, min_redstar, redstar_indices, key_visited, s1_record):
def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, C, f4, value, threshold, min_redstar, redstar_indices, key_visited, s2_record):
key = tuple(sorted([A, B, C]))
if key in key_visited: return
pos = key.index(C)
......@@ -120,7 +136,7 @@ def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B,
if None not in triangles[key]['D']:
key_visited.append(key)
adjusted_value, adjusted_data, redstar_index = adjust_D(key, triangles[key]['D'], threshold, min_redstar, s1_record)
adjusted_value, adjusted_data, redstar_index = adjust_D(key, triangles[key]['D'], threshold, min_redstar, s2_record)
redstar_indices[key] = redstar_index
# if len(adjusted_value) > 0:
# for idx, value in adjusted_value:
......
......@@ -10,12 +10,12 @@ from geneflow_P import Probability_distribution
DEBUG = False
class TreeBuilder(object):
def __init__(self, populations, triangles, s1_record):
def __init__(self, populations, triangles, s2_record):
self.triangles = triangles
self.populations = sorted(populations)
self.num_populations = len(self.populations)
self.relations = list(combinations(self.populations, 2))
self.s1_record = s1_record
self.s2_record = s2_record
self.root = None
self.gene_flow = None
......@@ -414,13 +414,13 @@ class TreeBuilder(object):
propotion = {}
print('Calculating gene flows...')
gf_cnt = 0
for s2, tar in gene_flow:
for s1, tar in gene_flow:
if tar in tar_block_list: continue
propotion[(s2, tar)] = []
if (s2, tar) not in self.s1_record:
if s1 in s1_block_list: continue
propotion[(s1, tar)] = []
if (s1, tar) not in self.s2_record:
continue
for s1 in self.s1_record[(s2, tar)]:
if s1 in s1_block_list: continue
for s2 in self.s2_record[(s1, tar)]:
if DEBUG:
print('-----------------------------------------------')
# pd = False to make it fast
......@@ -438,20 +438,20 @@ class TreeBuilder(object):
mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
else:
m_pd_bc = float('nan')
propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc))
propotion[(s1, tar)].append((s2, mix_bc, m_pd_bc))
gf_cnt += 1
print()
gf_cnt = 0
if bidirectional:
print('Calculating reversed gene flows...')
for tar, s2 in gene_flow:
for tar, s1 in gene_flow:
if s1 in s1_block_list: continue
if tar in tar_block_list: continue
propotion[(s2, tar)] = []
if (s2, tar) not in self.s1_record:
propotion[(s1, tar)] = []
if (s1, tar) not in self.s2_record:
continue
for s1 in self.s1_record[(s2, tar)]:
if s1 in s1_block_list: continue
for s2 in self.s2_record[(s1, tar)]:
mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# print('mix_bc, mix_a1b', mix_bc, mix_a1b)
......@@ -464,8 +464,60 @@ class TreeBuilder(object):
mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
else:
m_pd_bc = float('nan')
propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc))
propotion[(s1, tar)].append((s2, mix_bc, m_pd_bc))
gf_cnt += 1
# for s2, tar in gene_flow:
# if tar in tar_block_list: continue
# propotion[(s2, tar)] = []
# if (s2, tar) not in self.s1_record:
# continue
# for s1 in self.s1_record[(s2, tar)]:
# if s1 in s1_block_list: continue
# if DEBUG:
# print('-----------------------------------------------')
# # pd = False to make it fast
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# if DEBUG:
# print('mix:', mix_bc)
# # mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# # print('mix:', mix_a1b)
# # if mix_bc is None or mix_a1b is None: continue
# if mix_bc is None: continue
# # if mix_bc > 1 or mix_bc < 0 or mix_a1b > 1 or mix_a1b < 0: continue
# if mix_bc > 1 or mix_bc < 0: continue
# print('Calculating gene flow %d' % gf_cnt)
# if calculate_pd:
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
# else:
# m_pd_bc = float('nan')
# propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc))
# gf_cnt += 1
# print()
# gf_cnt = 0
# if bidirectional:
# print('Calculating reversed gene flows...')
# for tar, s2 in gene_flow:
# if tar in tar_block_list: continue
# propotion[(s2, tar)] = []
# if (s2, tar) not in self.s1_record:
# continue
# for s1 in self.s1_record[(s2, tar)]:
# if s1 in s1_block_list: continue
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# # mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# # print('mix_bc, mix_a1b', mix_bc, mix_a1b)
# # if mix_bc is None or mix_a1b is None: continue
# if mix_bc is None: continue
# if mix_bc > 1 or mix_bc < 0: continue
# # if mix_bc > 1 or mix_bc < 0 or mix_a1b > 1 or mix_a1b < 0: continue
# print('Calculating gene flow %d' % gf_cnt)
# if calculate_pd:
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
# else:
# m_pd_bc = float('nan')
# propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc))
# gf_cnt += 1
print()
print('---------------------------------------------------------------')
......@@ -478,16 +530,16 @@ class TreeBuilder(object):
m_pd_display_threshold = kwargs.get('gene_flow_display_confidence', 0.0)
gene_flows_checked_by_propotion = []
for gene_flow, m1s in propotion.items():
s2, tar = gene_flow
s1, tar = gene_flow
for m1 in m1s:
s1, mix, m_pd = m1
# print(s2, tar, s1, mix, m_pd)
print(mat.format(s2, tar, s1, mix, m_pd))
if (s2, tar) not in gene_flows_checked_by_propotion:
if (s1, tar) not in gene_flows_checked_by_propotion:
if not (calculate_pd and m_pd < m_pd_display_threshold):
gene_flows_checked_by_propotion.append((s2, tar))
print(',,,', gene_flows_checked_by_propotion)
print(m_pd_display_threshold)
gene_flows_checked_by_propotion.append((s1, tar))
# print(',,,', gene_flows_checked_by_propotion)
# print(m_pd_display_threshold)
return gene_flows_checked_by_propotion
def calculate_propotion_bak2(self, gene_flow, order, visited_leaves, bidirectional=True):
......
......@@ -10,7 +10,7 @@ def read_data(data_path, threshold, populations):
key_visited = []
eq_scc = None
min_redstar = []
s1_record = {}
s2_record = {}
with open(data_path) as f:
for l in f.readlines():
if not l.startswith('result'): continue
......@@ -18,10 +18,10 @@ def read_data(data_path, threshold, populations):
if A not in populations or B not in populations or C not in populations: continue
f4 = float(l.split()[5])
value = float(l.split()[6])
get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, C, f4, value, threshold, min_redstar, redstar_indices, key_visited, s1_record)
get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, C, f4, value, threshold, min_redstar, redstar_indices, key_visited, s2_record)
if len(D_statistics) >= len(populations) * (len(populations) - 1) * (len(populations) - 2) / 2:
break
return D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s1_record
return D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s2_record
def equal_vertices_judgement(g, vertices):
edges = list(combinations(vertices, 2))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment