Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
treebuilder
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Huang Di
treebuilder
Commits
36da3345
Commit
36da3345
authored
Jul 11, 2023
by
Huang Di
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
s1 s2 exchange
parent
ed27460a
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
113 additions
and
42 deletions
+113
-42
geneflow_P.py
+5
-2
main.py
+5
-5
preprocess.py
+29
-13
treebuilder.py
+71
-19
utils.py
+3
-3
No files found.
geneflow_P.py
View file @
36da3345
...
@@ -47,9 +47,12 @@ def Probability_distribution(D1,D2,Z1,Z2):
...
@@ -47,9 +47,12 @@ def Probability_distribution(D1,D2,Z1,Z2):
i
=
i2
+
i3
i
=
i2
+
i3
if
i1_5
<=
0
or
i1_5
<=
i
:
if
i1_5
<=
0
or
i1_5
<=
i
:
i1
=
i1_1
+
i1_2
+
i1_3
+
i1_4
i1
=
i1_1
+
i1_2
+
i1_3
+
i1_4
if
i1_5
>
0
and
i1_5
>=
i
:
M_PD
=
i
/
i1
elif
i1_5
>
0
and
i1_5
>=
i
:
i1
=
i1_5
i1
=
i1_5
M_PD
=
i
/
i1
M_PD
=
i
/
i1
else
:
M_PD
=
-
1
if
M_PD
>
1
:
if
M_PD
>
1
:
M_PD
=
1
M_PD
=
1
else
:
else
:
...
...
main.py
View file @
36da3345
...
@@ -112,9 +112,9 @@ print()
...
@@ -112,9 +112,9 @@ print()
print
(
'Analyzing the minimum Z...'
)
print
(
'Analyzing the minimum Z...'
)
# get all edges
# get all edges
bottom_threshold
=
0
bottom_threshold
=
0
D_statistics
,
adjusted_D_values
,
triangles
,
redstar_indices
,
min_redstar
,
s
1
_record
\
D_statistics
,
adjusted_D_values
,
triangles
,
redstar_indices
,
min_redstar
,
s
2
_record
\
=
read_data
(
data_path
,
bottom_threshold
,
populations
)
=
read_data
(
data_path
,
bottom_threshold
,
populations
)
builder
=
TreeBuilder
(
populations
,
triangles
,
s
1
_record
)
builder
=
TreeBuilder
(
populations
,
triangles
,
s
2
_record
)
builder
.
D2graph
(
D_statistics
)
builder
.
D2graph
(
D_statistics
)
g
=
builder
.
g
g
=
builder
.
g
scc0
=
g
.
SCC
()
scc0
=
g
.
SCC
()
...
@@ -154,13 +154,13 @@ if threshold < bottom_threshold:
...
@@ -154,13 +154,13 @@ if threshold < bottom_threshold:
ValueError
(
'the given Z is smaller than the minimum Z!'
)
ValueError
(
'the given Z is smaller than the minimum Z!'
)
#---------------------------------------------------------------#
#---------------------------------------------------------------#
D_statistics
,
adjusted_D_values
,
triangles
,
redstar_indices
,
min_redstar
,
s
1
_record
\
D_statistics
,
adjusted_D_values
,
triangles
,
redstar_indices
,
min_redstar
,
s
2
_record
\
=
read_data
(
data_path
,
threshold
,
populations
)
=
read_data
(
data_path
,
threshold
,
populations
)
if
DEBUG
:
if
DEBUG
:
if
min_redstar
:
if
min_redstar
:
print
(
'min red star:'
,
min
(
min_redstar
))
print
(
'min red star:'
,
min
(
min_redstar
))
print
(
adjusted_D_values
)
print
(
adjusted_D_values
)
builder
=
TreeBuilder
(
populations
,
triangles
,
s
1
_record
)
builder
=
TreeBuilder
(
populations
,
triangles
,
s
2
_record
)
builder
.
D2graph
(
D_statistics
)
builder
.
D2graph
(
D_statistics
)
g
=
builder
.
g
g
=
builder
.
g
...
@@ -444,7 +444,7 @@ print('Building the tree...')
...
@@ -444,7 +444,7 @@ print('Building the tree...')
builder
.
build_tree
(
eq_scc
=
eq_scc
,
D
=
None
,
**
args
)
builder
.
build_tree
(
eq_scc
=
eq_scc
,
D
=
None
,
**
args
)
print
()
print
()
print
(
'Plotting...'
)
print
(
'Plotting...'
)
builder
.
export_graph
(
save_path
,
print_gene_flow
=
True
)
builder
.
export_graph
(
save_path
,
print_gene_flow
=
print_gene_flow
)
print
()
print
()
print
(
'Saved to
%
s'
%
save_path
)
print
(
'Saved to
%
s'
%
save_path
)
...
...
preprocess.py
View file @
36da3345
...
@@ -60,7 +60,7 @@ def adjust_D_bak(triangle, threshold):
...
@@ -60,7 +60,7 @@ def adjust_D_bak(triangle, threshold):
adjusted_value
.
append
((
adjust_idx
,
adjust_threshold
))
adjusted_value
.
append
((
adjust_idx
,
adjust_threshold
))
return
adjusted_value
,
adjusted_data
return
adjusted_value
,
adjusted_data
def
adjust_D
(
key
,
triangle
,
threshold
,
min_redstar
,
s
1
_record
):
def
adjust_D
(
key
,
triangle
,
threshold
,
min_redstar
,
s
2
_record
):
adjusted_data
=
[]
adjusted_data
=
[]
to_adjust
=
[]
to_adjust
=
[]
for
i
,
value
in
enumerate
(
triangle
):
for
i
,
value
in
enumerate
(
triangle
):
...
@@ -79,22 +79,38 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record):
...
@@ -79,22 +79,38 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record):
redstar_idx
=
redstar_index
[
index
]
redstar_idx
=
redstar_index
[
index
]
def
add_s1_record
(
s2
,
tar
,
s1
):
# def add_s1_record(s2, tar, s1):
if
(
key
[
s2
],
key
[
tar
])
not
in
s1_record
:
# if (key[s2], key[tar]) not in s1_record:
s1_record
[(
key
[
s2
],
key
[
tar
])]
=
[]
# s1_record[(key[s2], key[tar])] = []
s1_record
[(
key
[
s2
],
key
[
tar
])]
.
append
(
key
[
s1
])
# s1_record[(key[s2], key[tar])].append(key[s1])
# T = 0
# A = 1
# B = 2
# if redstar_idx == 2:
# add_s1_record(T, A, B)
# add_s1_record(B, A, T)
# elif redstar_idx == 1:
# add_s1_record(T, B, A)
# add_s1_record(A, B, T)
# elif redstar_idx == 0:
# add_s1_record(A, T, B)
# add_s1_record(B, T, A)
def
add_s2_record
(
s1
,
tar
,
s2
):
if
(
key
[
s1
],
key
[
tar
])
not
in
s2_record
:
s2_record
[(
key
[
s1
],
key
[
tar
])]
=
[]
s2_record
[(
key
[
s1
],
key
[
tar
])]
.
append
(
key
[
s2
])
T
=
0
T
=
0
A
=
1
A
=
1
B
=
2
B
=
2
if
redstar_idx
==
2
:
if
redstar_idx
==
2
:
add_s
1
_record
(
T
,
A
,
B
)
add_s
2
_record
(
T
,
A
,
B
)
add_s1
_record
(
B
,
A
,
T
)
# add_s2
_record(B, A, T)
elif
redstar_idx
==
1
:
elif
redstar_idx
==
1
:
add_s1
_record
(
T
,
B
,
A
)
# add_s2
_record(T, B, A)
add_s
1
_record
(
A
,
B
,
T
)
add_s
2
_record
(
A
,
B
,
T
)
elif
redstar_idx
==
0
:
elif
redstar_idx
==
0
:
add_s1
_record
(
A
,
T
,
B
)
# add_s2
_record(A, T, B)
add_s
1
_record
(
B
,
T
,
A
)
add_s
2
_record
(
B
,
T
,
A
)
if
DEBUG
:
if
DEBUG
:
print
(
'------------------------------'
)
print
(
'------------------------------'
)
print
(
key
)
print
(
key
)
...
@@ -105,7 +121,7 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record):
...
@@ -105,7 +121,7 @@ def adjust_D(key, triangle, threshold, min_redstar, s1_record):
adjusted_data
[
redstar_idx
]
=
0
adjusted_data
[
redstar_idx
]
=
0
return
None
,
adjusted_data
,
redstar_idx
return
None
,
adjusted_data
,
redstar_idx
def
get_adjusted_D_statistics
(
D_statistics
,
adjusted_D_values
,
triangles
,
A
,
B
,
C
,
f4
,
value
,
threshold
,
min_redstar
,
redstar_indices
,
key_visited
,
s
1
_record
):
def
get_adjusted_D_statistics
(
D_statistics
,
adjusted_D_values
,
triangles
,
A
,
B
,
C
,
f4
,
value
,
threshold
,
min_redstar
,
redstar_indices
,
key_visited
,
s
2
_record
):
key
=
tuple
(
sorted
([
A
,
B
,
C
]))
key
=
tuple
(
sorted
([
A
,
B
,
C
]))
if
key
in
key_visited
:
return
if
key
in
key_visited
:
return
pos
=
key
.
index
(
C
)
pos
=
key
.
index
(
C
)
...
@@ -120,7 +136,7 @@ def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B,
...
@@ -120,7 +136,7 @@ def get_adjusted_D_statistics(D_statistics, adjusted_D_values, triangles, A, B,
if
None
not
in
triangles
[
key
][
'D'
]:
if
None
not
in
triangles
[
key
][
'D'
]:
key_visited
.
append
(
key
)
key_visited
.
append
(
key
)
adjusted_value
,
adjusted_data
,
redstar_index
=
adjust_D
(
key
,
triangles
[
key
][
'D'
],
threshold
,
min_redstar
,
s
1
_record
)
adjusted_value
,
adjusted_data
,
redstar_index
=
adjust_D
(
key
,
triangles
[
key
][
'D'
],
threshold
,
min_redstar
,
s
2
_record
)
redstar_indices
[
key
]
=
redstar_index
redstar_indices
[
key
]
=
redstar_index
# if len(adjusted_value) > 0:
# if len(adjusted_value) > 0:
# for idx, value in adjusted_value:
# for idx, value in adjusted_value:
...
...
treebuilder.py
View file @
36da3345
...
@@ -10,12 +10,12 @@ from geneflow_P import Probability_distribution
...
@@ -10,12 +10,12 @@ from geneflow_P import Probability_distribution
DEBUG
=
False
DEBUG
=
False
class
TreeBuilder
(
object
):
class
TreeBuilder
(
object
):
def
__init__
(
self
,
populations
,
triangles
,
s
1
_record
):
def
__init__
(
self
,
populations
,
triangles
,
s
2
_record
):
self
.
triangles
=
triangles
self
.
triangles
=
triangles
self
.
populations
=
sorted
(
populations
)
self
.
populations
=
sorted
(
populations
)
self
.
num_populations
=
len
(
self
.
populations
)
self
.
num_populations
=
len
(
self
.
populations
)
self
.
relations
=
list
(
combinations
(
self
.
populations
,
2
))
self
.
relations
=
list
(
combinations
(
self
.
populations
,
2
))
self
.
s
1_record
=
s1
_record
self
.
s
2_record
=
s2
_record
self
.
root
=
None
self
.
root
=
None
self
.
gene_flow
=
None
self
.
gene_flow
=
None
...
@@ -414,13 +414,13 @@ class TreeBuilder(object):
...
@@ -414,13 +414,13 @@ class TreeBuilder(object):
propotion
=
{}
propotion
=
{}
print
(
'Calculating gene flows...'
)
print
(
'Calculating gene flows...'
)
gf_cnt
=
0
gf_cnt
=
0
for
s
2
,
tar
in
gene_flow
:
for
s
1
,
tar
in
gene_flow
:
if
tar
in
tar_block_list
:
continue
if
tar
in
tar_block_list
:
continue
propotion
[(
s2
,
tar
)]
=
[]
if
s1
in
s1_block_list
:
continue
if
(
s2
,
tar
)
not
in
self
.
s1_record
:
propotion
[(
s1
,
tar
)]
=
[]
if
(
s1
,
tar
)
not
in
self
.
s2_record
:
continue
continue
for
s1
in
self
.
s1_record
[(
s2
,
tar
)]:
for
s2
in
self
.
s2_record
[(
s1
,
tar
)]:
if
s1
in
s1_block_list
:
continue
if
DEBUG
:
if
DEBUG
:
print
(
'-----------------------------------------------'
)
print
(
'-----------------------------------------------'
)
# pd = False to make it fast
# pd = False to make it fast
...
@@ -438,20 +438,20 @@ class TreeBuilder(object):
...
@@ -438,20 +438,20 @@ class TreeBuilder(object):
mix_bc
,
m_pd_bc
=
calculate_m1
(
s1
,
tar
,
s2
,
pd
=
True
)
mix_bc
,
m_pd_bc
=
calculate_m1
(
s1
,
tar
,
s2
,
pd
=
True
)
else
:
else
:
m_pd_bc
=
float
(
'nan'
)
m_pd_bc
=
float
(
'nan'
)
propotion
[(
s
2
,
tar
)]
.
append
((
s1
,
mix_bc
,
m_pd_bc
))
propotion
[(
s
1
,
tar
)]
.
append
((
s2
,
mix_bc
,
m_pd_bc
))
gf_cnt
+=
1
gf_cnt
+=
1
print
()
print
()
gf_cnt
=
0
gf_cnt
=
0
if
bidirectional
:
if
bidirectional
:
print
(
'Calculating reversed gene flows...'
)
print
(
'Calculating reversed gene flows...'
)
for
tar
,
s2
in
gene_flow
:
for
tar
,
s1
in
gene_flow
:
if
s1
in
s1_block_list
:
continue
if
tar
in
tar_block_list
:
continue
if
tar
in
tar_block_list
:
continue
propotion
[(
s
2
,
tar
)]
=
[]
propotion
[(
s
1
,
tar
)]
=
[]
if
(
s
2
,
tar
)
not
in
self
.
s1
_record
:
if
(
s
1
,
tar
)
not
in
self
.
s2
_record
:
continue
continue
for
s1
in
self
.
s1_record
[(
s2
,
tar
)]:
for
s2
in
self
.
s2_record
[(
s1
,
tar
)]:
if
s1
in
s1_block_list
:
continue
mix_bc
,
m_pd_bc
=
calculate_m1
(
s1
,
tar
,
s2
)
mix_bc
,
m_pd_bc
=
calculate_m1
(
s1
,
tar
,
s2
)
# mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# print('mix_bc, mix_a1b', mix_bc, mix_a1b)
# print('mix_bc, mix_a1b', mix_bc, mix_a1b)
...
@@ -464,8 +464,60 @@ class TreeBuilder(object):
...
@@ -464,8 +464,60 @@ class TreeBuilder(object):
mix_bc
,
m_pd_bc
=
calculate_m1
(
s1
,
tar
,
s2
,
pd
=
True
)
mix_bc
,
m_pd_bc
=
calculate_m1
(
s1
,
tar
,
s2
,
pd
=
True
)
else
:
else
:
m_pd_bc
=
float
(
'nan'
)
m_pd_bc
=
float
(
'nan'
)
propotion
[(
s
2
,
tar
)]
.
append
((
s1
,
mix_bc
,
m_pd_bc
))
propotion
[(
s
1
,
tar
)]
.
append
((
s2
,
mix_bc
,
m_pd_bc
))
gf_cnt
+=
1
gf_cnt
+=
1
# for s2, tar in gene_flow:
# if tar in tar_block_list: continue
# propotion[(s2, tar)] = []
# if (s2, tar) not in self.s1_record:
# continue
# for s1 in self.s1_record[(s2, tar)]:
# if s1 in s1_block_list: continue
# if DEBUG:
# print('-----------------------------------------------')
# # pd = False to make it fast
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# if DEBUG:
# print('mix:', mix_bc)
# # mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# # print('mix:', mix_a1b)
# # if mix_bc is None or mix_a1b is None: continue
# if mix_bc is None: continue
# # if mix_bc > 1 or mix_bc < 0 or mix_a1b > 1 or mix_a1b < 0: continue
# if mix_bc > 1 or mix_bc < 0: continue
# print('Calculating gene flow %d' % gf_cnt)
# if calculate_pd:
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
# else:
# m_pd_bc = float('nan')
# propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc))
# gf_cnt += 1
# print()
# gf_cnt = 0
# if bidirectional:
# print('Calculating reversed gene flows...')
# for tar, s2 in gene_flow:
# if tar in tar_block_list: continue
# propotion[(s2, tar)] = []
# if (s2, tar) not in self.s1_record:
# continue
# for s1 in self.s1_record[(s2, tar)]:
# if s1 in s1_block_list: continue
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2)
# # mix_a1b, mix_pd_a1b = calculate_m1(s2, tar, s1)
# # print('mix_bc, mix_a1b', mix_bc, mix_a1b)
# # if mix_bc is None or mix_a1b is None: continue
# if mix_bc is None: continue
# if mix_bc > 1 or mix_bc < 0: continue
# # if mix_bc > 1 or mix_bc < 0 or mix_a1b > 1 or mix_a1b < 0: continue
# print('Calculating gene flow %d' % gf_cnt)
# if calculate_pd:
# mix_bc, m_pd_bc = calculate_m1(s1, tar, s2, pd=True)
# else:
# m_pd_bc = float('nan')
# propotion[(s2, tar)].append((s1, mix_bc, m_pd_bc))
# gf_cnt += 1
print
()
print
()
print
(
'---------------------------------------------------------------'
)
print
(
'---------------------------------------------------------------'
)
...
@@ -478,16 +530,16 @@ class TreeBuilder(object):
...
@@ -478,16 +530,16 @@ class TreeBuilder(object):
m_pd_display_threshold
=
kwargs
.
get
(
'gene_flow_display_confidence'
,
0.0
)
m_pd_display_threshold
=
kwargs
.
get
(
'gene_flow_display_confidence'
,
0.0
)
gene_flows_checked_by_propotion
=
[]
gene_flows_checked_by_propotion
=
[]
for
gene_flow
,
m1s
in
propotion
.
items
():
for
gene_flow
,
m1s
in
propotion
.
items
():
s
2
,
tar
=
gene_flow
s
1
,
tar
=
gene_flow
for
m1
in
m1s
:
for
m1
in
m1s
:
s1
,
mix
,
m_pd
=
m1
s1
,
mix
,
m_pd
=
m1
# print(s2, tar, s1, mix, m_pd)
# print(s2, tar, s1, mix, m_pd)
print
(
mat
.
format
(
s2
,
tar
,
s1
,
mix
,
m_pd
))
print
(
mat
.
format
(
s2
,
tar
,
s1
,
mix
,
m_pd
))
if
(
s
2
,
tar
)
not
in
gene_flows_checked_by_propotion
:
if
(
s
1
,
tar
)
not
in
gene_flows_checked_by_propotion
:
if
not
(
calculate_pd
and
m_pd
<
m_pd_display_threshold
):
if
not
(
calculate_pd
and
m_pd
<
m_pd_display_threshold
):
gene_flows_checked_by_propotion
.
append
((
s
2
,
tar
))
gene_flows_checked_by_propotion
.
append
((
s
1
,
tar
))
print
(
',,,'
,
gene_flows_checked_by_propotion
)
#
print(',,,', gene_flows_checked_by_propotion)
print
(
m_pd_display_threshold
)
#
print(m_pd_display_threshold)
return
gene_flows_checked_by_propotion
return
gene_flows_checked_by_propotion
def
calculate_propotion_bak2
(
self
,
gene_flow
,
order
,
visited_leaves
,
bidirectional
=
True
):
def
calculate_propotion_bak2
(
self
,
gene_flow
,
order
,
visited_leaves
,
bidirectional
=
True
):
...
...
utils.py
View file @
36da3345
...
@@ -10,7 +10,7 @@ def read_data(data_path, threshold, populations):
...
@@ -10,7 +10,7 @@ def read_data(data_path, threshold, populations):
key_visited
=
[]
key_visited
=
[]
eq_scc
=
None
eq_scc
=
None
min_redstar
=
[]
min_redstar
=
[]
s
1
_record
=
{}
s
2
_record
=
{}
with
open
(
data_path
)
as
f
:
with
open
(
data_path
)
as
f
:
for
l
in
f
.
readlines
():
for
l
in
f
.
readlines
():
if
not
l
.
startswith
(
'result'
):
continue
if
not
l
.
startswith
(
'result'
):
continue
...
@@ -18,10 +18,10 @@ def read_data(data_path, threshold, populations):
...
@@ -18,10 +18,10 @@ def read_data(data_path, threshold, populations):
if
A
not
in
populations
or
B
not
in
populations
or
C
not
in
populations
:
continue
if
A
not
in
populations
or
B
not
in
populations
or
C
not
in
populations
:
continue
f4
=
float
(
l
.
split
()[
5
])
f4
=
float
(
l
.
split
()[
5
])
value
=
float
(
l
.
split
()[
6
])
value
=
float
(
l
.
split
()[
6
])
get_adjusted_D_statistics
(
D_statistics
,
adjusted_D_values
,
triangles
,
A
,
B
,
C
,
f4
,
value
,
threshold
,
min_redstar
,
redstar_indices
,
key_visited
,
s
1
_record
)
get_adjusted_D_statistics
(
D_statistics
,
adjusted_D_values
,
triangles
,
A
,
B
,
C
,
f4
,
value
,
threshold
,
min_redstar
,
redstar_indices
,
key_visited
,
s
2
_record
)
if
len
(
D_statistics
)
>=
len
(
populations
)
*
(
len
(
populations
)
-
1
)
*
(
len
(
populations
)
-
2
)
/
2
:
if
len
(
D_statistics
)
>=
len
(
populations
)
*
(
len
(
populations
)
-
1
)
*
(
len
(
populations
)
-
2
)
/
2
:
break
break
return
D_statistics
,
adjusted_D_values
,
triangles
,
redstar_indices
,
min_redstar
,
s
1
_record
return
D_statistics
,
adjusted_D_values
,
triangles
,
redstar_indices
,
min_redstar
,
s
2
_record
def
equal_vertices_judgement
(
g
,
vertices
):
def
equal_vertices_judgement
(
g
,
vertices
):
edges
=
list
(
combinations
(
vertices
,
2
))
edges
=
list
(
combinations
(
vertices
,
2
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment