Commit 36da3345 by Huang Di

s1 s2 exchange

parent ed27460a
...@@ -47,9 +47,12 @@ def Probability_distribution(D1,D2,Z1,Z2): ...@@ -47,9 +47,12 @@ def Probability_distribution(D1,D2,Z1,Z2):
i=i2+i3 i=i2+i3
if i1_5 <= 0 or i1_5 <= i: if i1_5 <= 0 or i1_5 <= i:
i1=i1_1 + i1_2 + i1_3 + i1_4 i1=i1_1 + i1_2 + i1_3 + i1_4
if i1_5 > 0 and i1_5 >= i: M_PD=i/i1
elif i1_5 > 0 and i1_5 >= i:
i1=i1_5 i1=i1_5
M_PD=i/i1 M_PD=i/i1
else:
M_PD = -1
if M_PD > 1: if M_PD > 1:
M_PD=1 M_PD=1
else: else:
......
...@@ -112,9 +112,9 @@ print() ...@@ -112,9 +112,9 @@ print()
print('Analyzing the minimum Z...') print('Analyzing the minimum Z...')
# get all edges # get all edges
bottom_threshold = 0 bottom_threshold = 0
D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s1_record \ D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s2_record \
= read_data(data_path, bottom_threshold, populations) = read_data(data_path, bottom_threshold, populations)
builder = TreeBuilder(populations, triangles, s1_record) builder = TreeBuilder(populations, triangles, s2_record)
builder.D2graph(D_statistics) builder.D2graph(D_statistics)
g = builder.g g = builder.g
scc0 = g.SCC() scc0 = g.SCC()
...@@ -154,13 +154,13 @@ if threshold < bottom_threshold: ...@@ -154,13 +154,13 @@ if threshold < bottom_threshold:
ValueError('the given Z is smaller than the minimum Z!') ValueError('the given Z is smaller than the minimum Z!')
#---------------------------------------------------------------# #---------------------------------------------------------------#
D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s1_record \ D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s2_record \
= read_data(data_path, threshold, populations) = read_data(data_path, threshold, populations)
if DEBUG: if DEBUG:
if min_redstar: if min_redstar:
print('min red star:', min(min_redstar)) print('min red star:', min(min_redstar))
print(adjusted_D_values) print(adjusted_D_values)
builder = TreeBuilder(populations, triangles, s1_record) builder = TreeBuilder(populations, triangles, s2_record)
builder.D2graph(D_statistics) builder.D2graph(D_statistics)
g = builder.g g = builder.g
...@@ -444,7 +444,7 @@ print('Building the tree...') ...@@ -444,7 +444,7 @@ print('Building the tree...')
builder.build_tree(eq_scc=eq_scc, D=None, **args) builder.build_tree(eq_scc=eq_scc, D=None, **args)
print() print()
print('Plotting...') print('Plotting...')
builder.export_graph(save_path, print_gene_flow=True) builder.export_graph(save_path, print_gene_flow=print_gene_flow)
print() print()
print('Saved to %s' % save_path) print('Saved to %s' % save_path)
......
...@@ -60,7 +60,7 @@ def adjust_D_bak(triangle, threshold): ...@@ -60,7 +60,7 @@ def adjust_D_bak(triangle, threshold):
adjusted_value.append((adjust_idx, adjust_threshold)) adjusted_value.append((adjust_idx, adjust_threshold))
return adjusted_value, adjusted_data return adjusted_value, adjusted_data
def adjust_D(key, triangle, threshold, min_redstar, s1_record): def adjust_D(key, triangle, threshold, min_redstar, s2_record):
adjusted_data = [] adjusted_data = []
to_adjust = [] to_adjust = []
for i, value in enumerate(triangle): for i, value in enumerate(triangle):
...@@ -79,22 +79,38 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record): ...@@ -79,22 +79,38 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record):
redstar_idx = redstar_index[index] redstar_idx = redstar_index[index]
def add_s1_record(s2, tar, s1): # def add_s1_record(s2, tar, s1):
if (key[s2], key[tar]) not in s1_record: # if (key[s2], key[tar]) not in s1_record:
s1_record[(key[s2], key[tar])] = [] # s1_record[(key[s2], key[tar])] = []
s1_record[(key[s2], key[tar])].append(key[s1]) # s1_record[(key[s2], key[tar])].append(key[s1])
# T = 0
# A = 1
# B = 2
# if redstar_idx == 2:
# add_s1_record(T, A, B)
# add_s1_record(B, A, T)
# elif redstar_idx == 1:
# add_s1_record(T, B, A)
# add_s1_record(A, B, T)
# elif redstar_idx == 0:
# add_s1_record(A, T, B)
# add_s1_record(B, T, A)
def add_s2_record(s1, tar, s2):
if (key[s1], key[tar]) not in s2_record:
s2_record[(key[s1], key[tar])] = []
s2_record[(key[s1], key[tar])].append(key[s2])
T = 0 T = 0
A = 1 A = 1
B = 2 B = 2
if redstar_idx == 2: if redstar_idx == 2:
add_s1_record(T, A, B) add_s2_record(T, A, B)
add_s1_record(B, A, T) # add_s2_record(B, A, T)
elif redstar_idx == 1: elif redstar_idx == 1:
add_s1_record(T, B, A) # add_s2_record(T, B, A)
add_s1_record(A, B, T) add_s2_record(A, B, T)
elif redstar_idx == 0: elif redstar_idx == 0:
add_s1_record(A, T, B) # add_s2_record(A, T, B)
add_s1_record(B, T, A) add_s2_record(B, T, A)
if DEBUG: if DEBUG:
print('------------------------------') print('------------------------------')
print(key) print(key)
...@@ -105,7 +121,7 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record): ...@@ -105,7 +121,7 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record):
adjusted_data[redstar_idx] = 0 adjusted_data[redstar_idx] = 0
return None, adjusted_data, redstar_idx return None, adjusted_data, redstar_idx
def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, C, f4, value, threshold, min_redstar, redstar_indices, key_visited, s1_record): def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, C, f4, value, threshold, min_redstar, redstar_indices, key_visited, s2_record):
key = tuple(sorted([A, B, C])) key = tuple(sorted([A, B, C]))
if key in key_visited: return if key in key_visited: return
pos = key.index(C) pos = key.index(C)
...@@ -120,7 +136,7 @@ def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, ...@@ -120,7 +136,7 @@ def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B,
if None not in triangles[key]['D']: if None not in triangles[key]['D']:
key_visited.append(key) key_visited.append(key)
adjusted_value, adjusted_data, redstar_index = adjust_D(key, triangles[key]['D'], threshold, min_redstar, s1_record) adjusted_value, adjusted_data, redstar_index = adjust_D(key, triangles[key]['D'], threshold, min_redstar, s2_record)
redstar_indices[key] = redstar_index redstar_indices[key] = redstar_index
# if len(adjusted_value) > 0: # if len(adjusted_value) > 0:
# for idx, value in adjusted_value: # for idx, value in adjusted_value:
......
...@@ -10,12 +10,12 @@ from geneflow_P import Probability_distribution ...@@ -10,12 +10,12 @@ from geneflow_P import Probability_distribution
DEBUG = False DEBUG = False
class TreeBuilder(object): class TreeBuilder(object):
def __init__(self, populations, triangles, s1_record): def __init__(self, populations, triangles, s2_record):
self.triangles = triangles self.triangles = triangles
self.populations = sorted(populations) self.populations = sorted(populations)
self.num_populations = len(self.populations) self.num_populations = len(self.populations)
self.relations = list(combinations(self.populations, 2)) self.relations = list(combinations(self.populations, 2))
self.s1_record = s1_record self.s2_record = s2_record
self.root = None self.root = None
self.gene_flow = None self.gene_flow = None
...@@ -414,13 +414,13 @@ class TreeBuilder(object): ...@@ -414,13 +414,13 @@ class TreeBuilder(object):
propotion = {} propotion = {}
print('Calculating gene flows...') print('Calculating gene flows...')
gf_cnt = 0 gf_cnt = 0
for s2, tar in gene_flow: for s1, tar in gene_flow:
if tar in tar_block_list: continue if tar in tar_block_list: continue
propotion[(s2, tar)] = [] if s1 in s1_block_list: continue
if (s2, tar) not in self.s1_record: propotion[(s1, tar)] = []
if (s1, tar) not in self.s2_record:
continue continue
for s1 in self.s1_record[(s2, tar)]: for s2 in self.s2_record[(s1, tar)]:
if s1 in s1_block_list: continue
if DEBUG: if DEBUG:
print('-----------------------------------------------') print('-----------------------------------------------')
# pd = False to make it fast # pd = False to make it fast
...@@ -438,20 +438,20 @@ class TreeBuilder(object): ...@@ -438,20 +438,20 @@ class TreeBuilder(object):
mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True) mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
else: else:
m_pd_bc = float('nan') m_pd_bc = float('nan')
propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc)) propotion[(s1, tar)].append((s2, mix_bc, m_pd_bc))
gf_cnt += 1 gf_cnt += 1
print() print()
gf_cnt = 0 gf_cnt = 0
if bidirectional: if bidirectional:
print('Calculating reversed gene flows...') print('Calculating reversed gene flows...')
for tar, s2 in gene_flow: for tar, s1 in gene_flow:
if s1 in s1_block_list: continue
if tar in tar_block_list: continue if tar in tar_block_list: continue
propotion[(s2, tar)] = [] propotion[(s1, tar)] = []
if (s2, tar) not in self.s1_record: if (s1, tar) not in self.s2_record:
continue continue
for s1 in self.s1_record[(s2, tar)]: for s2 in self.s2_record[(s1, tar)]:
if s1 in s1_block_list: continue
mix_bc, m_pd_bc = calculate_m1(s1, tar, s2) mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1) # mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# print('mix_bc, mix_a1b', mix_bc, mix_a1b) # print('mix_bc, mix_a1b', mix_bc, mix_a1b)
...@@ -464,8 +464,60 @@ class TreeBuilder(object): ...@@ -464,8 +464,60 @@ class TreeBuilder(object):
mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True) mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
else: else:
m_pd_bc = float('nan') m_pd_bc = float('nan')
propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc)) propotion[(s1, tar)].append((s2, mix_bc, m_pd_bc))
gf_cnt += 1 gf_cnt += 1
# for s2, tar in gene_flow:
# if tar in tar_block_list: continue
# propotion[(s2, tar)] = []
# if (s2, tar) not in self.s1_record:
# continue
# for s1 in self.s1_record[(s2, tar)]:
# if s1 in s1_block_list: continue
# if DEBUG:
# print('-----------------------------------------------')
# # pd = False to make it fast
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# if DEBUG:
# print('mix:', mix_bc)
# # mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# # print('mix:', mix_a1b)
# # if mix_bc is None or mix_a1b is None: continue
# if mix_bc is None: continue
# # if mix_bc > 1 or mix_bc < 0 or mix_a1b > 1 or mix_a1b < 0: continue
# if mix_bc > 1 or mix_bc < 0: continue
# print('Calculating gene flow %d' % gf_cnt)
# if calculate_pd:
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
# else:
# m_pd_bc = float('nan')
# propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc))
# gf_cnt += 1
# print()
# gf_cnt = 0
# if bidirectional:
# print('Calculating reversed gene flows...')
# for tar, s2 in gene_flow:
# if tar in tar_block_list: continue
# propotion[(s2, tar)] = []
# if (s2, tar) not in self.s1_record:
# continue
# for s1 in self.s1_record[(s2, tar)]:
# if s1 in s1_block_list: continue
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# # mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# # print('mix_bc, mix_a1b', mix_bc, mix_a1b)
# # if mix_bc is None or mix_a1b is None: continue
# if mix_bc is None: continue
# if mix_bc > 1 or mix_bc < 0: continue
# # if mix_bc > 1 or mix_bc < 0 or mix_a1b > 1 or mix_a1b < 0: continue
# print('Calculating gene flow %d' % gf_cnt)
# if calculate_pd:
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
# else:
# m_pd_bc = float('nan')
# propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc))
# gf_cnt += 1
print() print()
print('---------------------------------------------------------------') print('---------------------------------------------------------------')
...@@ -478,16 +530,16 @@ class TreeBuilder(object): ...@@ -478,16 +530,16 @@ class TreeBuilder(object):
m_pd_display_threshold = kwargs.get('gene_flow_display_confidence', 0.0) m_pd_display_threshold = kwargs.get('gene_flow_display_confidence', 0.0)
gene_flows_checked_by_propotion = [] gene_flows_checked_by_propotion = []
for gene_flow, m1s in propotion.items(): for gene_flow, m1s in propotion.items():
s2, tar = gene_flow s1, tar = gene_flow
for m1 in m1s: for m1 in m1s:
s1, mix, m_pd = m1 s1, mix, m_pd = m1
# print(s2, tar, s1, mix, m_pd) # print(s2, tar, s1, mix, m_pd)
print(mat.format(s2, tar, s1, mix, m_pd)) print(mat.format(s2, tar, s1, mix, m_pd))
if (s2, tar) not in gene_flows_checked_by_propotion: if (s1, tar) not in gene_flows_checked_by_propotion:
if not (calculate_pd and m_pd < m_pd_display_threshold): if not (calculate_pd and m_pd < m_pd_display_threshold):
gene_flows_checked_by_propotion.append((s2, tar)) gene_flows_checked_by_propotion.append((s1, tar))
print(',,,', gene_flows_checked_by_propotion) # print(',,,', gene_flows_checked_by_propotion)
print(m_pd_display_threshold) # print(m_pd_display_threshold)
return gene_flows_checked_by_propotion return gene_flows_checked_by_propotion
def calculate_propotion_bak2(self, gene_flow, order, visited_leaves, bidirectional=True): def calculate_propotion_bak2(self, gene_flow, order, visited_leaves, bidirectional=True):
......
...@@ -10,7 +10,7 @@ def read_data(data_path, threshold, populations): ...@@ -10,7 +10,7 @@ def read_data(data_path, threshold, populations):
key_visited = [] key_visited = []
eq_scc = None eq_scc = None
min_redstar = [] min_redstar = []
s1_record = {} s2_record = {}
with open(data_path) as f: with open(data_path) as f:
for l in f.readlines(): for l in f.readlines():
if not l.startswith('result'): continue if not l.startswith('result'): continue
...@@ -18,10 +18,10 @@ def read_data(data_path, threshold, populations): ...@@ -18,10 +18,10 @@ def read_data(data_path, threshold, populations):
if A not in populations or B not in populations or C not in populations: continue if A not in populations or B not in populations or C not in populations: continue
f4 = float(l.split()[5]) f4 = float(l.split()[5])
value = float(l.split()[6]) value = float(l.split()[6])
get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, C, f4, value, threshold, min_redstar, redstar_indices, key_visited, s1_record) get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B, C, f4, value, threshold, min_redstar, redstar_indices, key_visited, s2_record)
if len(D_statistics) >= len(populations) * (len(populations) - 1) * (len(populations) - 2) / 2: if len(D_statistics) >= len(populations) * (len(populations) - 1) * (len(populations) - 2) / 2:
break break
return D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s1_record return D_statistics, adjusted_D_values, triangles, redstar_indices, min_redstar, s2_record
def equal_vertices_judgement(g, vertices): def equal_vertices_judgement(g, vertices):
edges = list(combinations(vertices, 2)) edges = list(combinations(vertices, 2))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment