Commit ed27460a by Huang Di

gene flow proportion block list, display threshold

parent f47a1c00
...@@ -17,17 +17,22 @@ args = { ...@@ -17,17 +17,22 @@ args = {
'threshold2': 0, # gene flow display threshold 'threshold2': 0, # gene flow display threshold
'threshold3': 3, # gene flow merge threshold 'threshold3': 3, # gene flow merge threshold
'bidirectional': True, # calculate the reversed gene flow 'bidirectional': True, # calculate the reversed gene flow
'calculate_pd': False, # calculate the gene flow confidence 'calculate_pd': True, # calculate the gene flow confidence
'data_path': 'data/test.log', 'data_path': 'data/test.log',
'save_path': 'pictures/tree2.png', 'save_path': 'pictures/tree.png',
'draw_gene_flow': True, 'draw_gene_flow': True,
'gene_flow_display_confidence': 0.7,
'target_block_list': ['Tianyuan'],
's1_block_list': ['Boshan'],
# 'target_block_list': [],
# 's1_block_list': [],
} }
populations = ['Kostenki14', populations = ['Kostenki14',
'Tianyuan', 'Tianyuan',
'AR19K', 'AR19K',
'AR14K', 'AR14K',
'Zongri5.1k', 'Zongri5_1k',
'Boshan', 'Boshan',
'Qihe3', 'Qihe3',
'Donghulin', 'Donghulin',
......
from itertools import combinations from itertools import combinations
from data_structures import UnionFindSet, Graph from data_structures import UnionFindSet, Graph
from anytree import Node, PreOrderIter from anytree import Node, PreOrderIter, RenderTree
from anytree.exporter import DotExporter from anytree.exporter import DotExporter
from collections import defaultdict from collections import defaultdict
# from sklearn.linear_model import LinearRegression # from sklearn.linear_model import LinearRegression
...@@ -405,6 +405,8 @@ class TreeBuilder(object): ...@@ -405,6 +405,8 @@ class TreeBuilder(object):
bidirectional = kwargs.get('bidirectional', True) bidirectional = kwargs.get('bidirectional', True)
calculate_pd = kwargs.get('calculate_pd', False) calculate_pd = kwargs.get('calculate_pd', False)
tar_block_list = kwargs.get('target_block_list', [])
s1_block_list = kwargs.get('s1_block_list', [])
# calculate_m1('Dushan', 'Baojianshan') # calculate_m1('Dushan', 'Baojianshan')
# calculate_m1('Baojianshan', 'Dushan') # calculate_m1('Baojianshan', 'Dushan')
# calculate_m1('Boshan', 'Dushan') # calculate_m1('Boshan', 'Dushan')
...@@ -413,10 +415,12 @@ class TreeBuilder(object): ...@@ -413,10 +415,12 @@ class TreeBuilder(object):
print('Calculating gene flows...') print('Calculating gene flows...')
gf_cnt = 0 gf_cnt = 0
for s2, tar in gene_flow: for s2, tar in gene_flow:
if tar in tar_block_list: continue
propotion[(s2, tar)] = [] propotion[(s2, tar)] = []
if (s2, tar) not in self.s1_record: if (s2, tar) not in self.s1_record:
continue continue
for s1 in self.s1_record[(s2, tar)]: for s1 in self.s1_record[(s2, tar)]:
if s1 in s1_block_list: continue
if DEBUG: if DEBUG:
print('-----------------------------------------------') print('-----------------------------------------------')
# pd = False to make it fast # pd = False to make it fast
...@@ -442,10 +446,12 @@ class TreeBuilder(object): ...@@ -442,10 +446,12 @@ class TreeBuilder(object):
if bidirectional: if bidirectional:
print('Calculating reversed gene flows...') print('Calculating reversed gene flows...')
for tar, s2 in gene_flow: for tar, s2 in gene_flow:
if tar in tar_block_list: continue
propotion[(s2, tar)] = [] propotion[(s2, tar)] = []
if (s2, tar) not in self.s1_record: if (s2, tar) not in self.s1_record:
continue continue
for s1 in self.s1_record[(s2, tar)]: for s1 in self.s1_record[(s2, tar)]:
if s1 in s1_block_list: continue
mix_bc, m_pd_bc = calculate_m1(s1, tar, s2) mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1) # mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# print('mix_bc, mix_a1b', mix_bc, mix_a1b) # print('mix_bc, mix_a1b', mix_bc, mix_a1b)
...@@ -468,6 +474,8 @@ class TreeBuilder(object): ...@@ -468,6 +474,8 @@ class TreeBuilder(object):
print(mat.format('s2', 'tar', 's1', 'mix', 'm_pd')) print(mat.format('s2', 'tar', 's1', 'mix', 'm_pd'))
mat = "{:10}\t{:10}\t{:10}\t{:.3f}\t{:.3f}" mat = "{:10}\t{:10}\t{:10}\t{:.3f}\t{:.3f}"
m_pd_display_threshold = kwargs.get('gene_flow_display_confidence', 0.0)
gene_flows_checked_by_propotion = [] gene_flows_checked_by_propotion = []
for gene_flow, m1s in propotion.items(): for gene_flow, m1s in propotion.items():
s2, tar = gene_flow s2, tar = gene_flow
...@@ -476,7 +484,10 @@ class TreeBuilder(object): ...@@ -476,7 +484,10 @@ class TreeBuilder(object):
# print(s2, tar, s1, mix, m_pd) # print(s2, tar, s1, mix, m_pd)
print(mat.format(s2, tar, s1, mix, m_pd)) print(mat.format(s2, tar, s1, mix, m_pd))
if (s2, tar) not in gene_flows_checked_by_propotion: if (s2, tar) not in gene_flows_checked_by_propotion:
if not (calculate_pd and m_pd < m_pd_display_threshold):
gene_flows_checked_by_propotion.append((s2, tar)) gene_flows_checked_by_propotion.append((s2, tar))
print(',,,', gene_flows_checked_by_propotion)
print(m_pd_display_threshold)
return gene_flows_checked_by_propotion return gene_flows_checked_by_propotion
def calculate_propotion_bak2(self, gene_flow, order, visited_leaves, bidirectional=True): def calculate_propotion_bak2(self, gene_flow, order, visited_leaves, bidirectional=True):
...@@ -693,6 +704,9 @@ class TreeBuilder(object): ...@@ -693,6 +704,9 @@ class TreeBuilder(object):
if print_gene_flow: if print_gene_flow:
for l1, l2 in self.gene_flow: for l1, l2 in self.gene_flow:
options.append("%s -> %s [color=red]" % (l1, l2)) options.append("%s -> %s [color=red]" % (l1, l2))
print(self.populations)
print(self.relations)
print(RenderTree(self.root))
return DotExporter(self.root, options=options) return DotExporter(self.root, options=options)
def export_dot(self, filename, print_gene_flow=True): def export_dot(self, filename, print_gene_flow=True):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment