• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

js51 / SplitP / 7894856040

14 Feb 2024 12:52AM UTC coverage: 46.404% (-5.7%) from 52.085%
7894856040

push

github

web-flow
Merge pull request #38 from js51/SplitP-rewrite

Re-organise modules

403 of 880 new or added lines in 12 files covered. (45.8%)

413 of 890 relevant lines covered (46.4%)

1.39 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

13.19
/splitp/simulation.py
1
import itertools
3✔
2
import numpy as np
3✔
3
from warnings import warn
3✔
4
from random import choices
3✔
5
from networkx import dfs_tree
3✔
6
from splitp import constants
3✔
7

8

9
def evolve_pattern(tree, model=None):
3✔
NEW
10
    def __evolve_on_subtree(subtree, state):
×
NEW
11
        root_node = [n for n, d in subtree.in_degree() if d == 0][0]
×
NEW
12
        children = list(subtree.successors(root_node))
×
NEW
13
        if model == None: # Get the transition matrix from the phylogeny
×
NEW
14
                M = (tree.networkx_graph.nodes[root_node])["transition_matrix"]
×
15
        else:
NEW
16
            branch_length = (tree.networkx_graph.nodes[root_node])["branch_length"]
×
NEW
17
            M = model.transition_matrix(branch_length)
×
NEW
18
        probs = list(M[:, constants.DNA_state_space.index(state)])
×
NEW
19
        new_state = choices(constants.DNA_state_space, probs)[0]
×
NEW
20
        if len(children) == 0:
×
NEW
21
            return f"{str(root_node)}:{new_state}"
×
22
        else:
NEW
23
            subtrees = [dfs_tree(tree.networkx_graph, child) for child in children]
×
NEW
24
            return ",".join(
×
25
                __evolve_on_subtree(subtree, new_state) for subtree in subtrees
26
            )
27

NEW
28
    root_state = choices(constants.DNA_state_space, (1/4, 1/4, 1/4, 1/4))[0]
×
NEW
29
    root_node = [n for n, d in tree.networkx_graph.in_degree() if d == 0][0]
×
NEW
30
    children = list(tree.networkx_graph.successors(root_node))
×
NEW
31
    subtrees = [dfs_tree(tree.networkx_graph, child) for child in children]
×
NEW
32
    result_string = ",".join(
×
33
        __evolve_on_subtree(subtree, root_state) for subtree in subtrees
34
    )
NEW
35
    result = {
×
36
        pair.split(":")[0]: pair.split(":")[1] for pair in result_string.split(",")
37
    }
NEW
38
    taxa = tree.get_taxa()
×
NEW
39
    return "".join(result[k] for k in sorted(result.keys(), key=taxa.index))
×
40

41

42
def generate_alignment(tree, model, sequence_length):
3✔
NEW
43
    counts = {}
×
NEW
44
    for i in range(sequence_length):
×
NEW
45
        pattern = evolve_pattern(tree, model)
×
NEW
46
        if pattern not in counts:
×
NEW
47
            counts[pattern] = float(1)
×
48
        else:
NEW
49
            counts[pattern] += 1
×
NEW
50
    probs = {}
×
NEW
51
    for k in sorted(
×
52
        counts.keys(), key=lambda p: [constants.DNA_state_space.index(c) for c in p]
53
    ):
NEW
54
        probs[k] = counts[k] / float(sequence_length)
×
NEW
55
    return probs
×
56

57

58
def draw_from_multinomial(pattern_probabilities, n):
3✔
59
    """Use a given table of probabilities from get_pattern_probabilities() and draw from its distribution"""
NEW
60
    patterns, probs = zip(*pattern_probabilities.items())
×
NEW
61
    data = np.random.multinomial(n, probs)
×
NEW
62
    data = [d / n for d in data]
×
NEW
63
    results = {}
×
NEW
64
    for p, pattern in enumerate(patterns):
×
NEW
65
        if data[p] != 0:
×
NEW
66
            results[pattern] = data[p]
×
NEW
67
    return
×
68

69

70
def get_pattern_probabilities(tree, model=None):
3✔
71
    """Returns a full table of site-pattern probabilities (binary character set)"""
72
    # Creating a table with binary labels and calling likelihood_start() to fill it in with probabilities
NEW
73
    combinations = list(
×
74
        itertools.product(
75
            "".join(s for s in constants.DNA_state_space), repeat=tree.get_num_taxa()
76
        )
77
    )
NEW
78
    combinations = ["".join(c) for c in combinations]
×
NEW
79
    emptyArray = {
×
80
        combination: __likelihood_start(tree, combination, model) for combination in combinations
81
    }
NEW
82
    return emptyArray
×
83

84

85
def __likelihood(tree, n, likelihood_table, model):
3✔
86
    """Recursive part of the likelihood algorithm"""
NEW
87
    for b in range(4):
×
NEW
88
        L = 1
×
NEW
89
        for d in tree.get_descendants(n, return_iter=True):
×
NEW
90
            d_index = tree.node_index(d)
×
NEW
91
            if model == None: # Get the transition matrix from the phylogeny
×
NEW
92
                M = (tree.networkx_graph.nodes[d])["transition_matrix"]
×
93
            else:
NEW
94
                branch_length = (tree.networkx_graph.nodes[d])["branch_length"]
×
NEW
95
                M = model.transition_matrix(branch_length)
×
NEW
96
            s = 0
×
NEW
97
            for b2 in range(4):
×
NEW
98
                if likelihood_table[d_index, b2] == None:
×
NEW
99
                    __likelihood(tree, d, likelihood_table, model)
×
NEW
100
                s += M[b2, b] * likelihood_table[d_index, b2]
×
NEW
101
            L *= s
×
NEW
102
        likelihood_table[tree.node_index(n), b] = L
×
NEW
103
    if not tree.is_root(n):
×
NEW
104
        __likelihood(tree, tree.get_parent(n), likelihood_table, model)
×
105

106

107
def __likelihood_start(tree, pattern, model):
3✔
108
    """Starts the likelihood algorithm.
109

110
    Starts the likelihood algorithm to determine the probability of seeing a given site pattern.
111

112
    Args:
113
        pattern: The site pattern to obtain the probability for.
114

115
    Returns:
116
        A probability (range 0-1) of observing the given site pattern given the tree.
117
    """
118

NEW
119
    def _to_int(p):
×
NEW
120
        return constants.DNA_state_space.index(p)
×
121

NEW
122
    pattern = [
×
123
        _to_int(p) for p in pattern
124
    ]  # A list of indices which correspond to taxa.
NEW
125
    taxa = tree.get_taxa()  # The list of taxa.
×
126
    # Likelihood table for dynamic prog alg ~ lTable[node_index, character]
NEW
127
    likelihood_table = np.array(
×
128
        [[None for _ in range(4)] for _ in range(tree.get_num_nodes())]
129
    )
130

131
    # Encoding pattern into likelihood table
NEW
132
    for i, p in enumerate(pattern):
×
NEW
133
        likelihood_table[tree.node_index(taxa[i]), :] = [
×
134
            0 for _ in range(4)
135
        ]
NEW
136
        likelihood_table[tree.node_index(taxa[i]), p] = 1
×
137

138
    # Starting with node which has all descendants as leaves
NEW
139
    initNode = None
×
NEW
140
    for n in tree.networkx_graph.nodes:
×
NEW
141
        desc = tree.get_descendants(n)
×
NEW
142
        if desc and all(
×
143
            tree.is_leaf(d) for d in desc
144
        ):  # If the node has descendants and they are all leaves
NEW
145
            initNode = n
×
146

NEW
147
    __likelihood(tree, initNode, likelihood_table, model)  # Should fill in the entire table
×
148

NEW
149
    root_index = tree.node_index(tree.root(return_index=False))
×
NEW
150
    L = 0
×
NEW
151
    for i in range(4):
×
NEW
152
        L += (1/4) * likelihood_table[root_index, i]
×
153

NEW
154
    return L
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc