• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

js51 / SplitP / 7894856040

14 Feb 2024 12:52AM UTC coverage: 46.404% (-5.7%) from 52.085%
7894856040

push

github

web-flow
Merge pull request #38 from js51/SplitP-rewrite

Re-organise modules

403 of 880 new or added lines in 12 files covered. (45.8%)

413 of 890 relevant lines covered (46.4%)

1.39 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.13
/splitp/constructions.py
1
import numpy as np
3✔
2
from splitp.enums import FlatFormat
3✔
3
from scipy.sparse import dok_matrix, coo_matrix
3✔
4
import splitp.constants as constants
3✔
5

6

7
def flattening(
3✔
8
    split,
9
    pattern_probabilities,
10
    flattening_format=FlatFormat.sparse
11
):
12
    """
13
    Compute the flattening of a split given a pattern probability dictionary.
14

15
    Args:
16
        split (str or list): The split to compute the flattening of.
17
        pattern_probabilities (dict): A dictionary of pattern probabilities.
18
        flattening_format (FlatFormat): The format to return the flattening in.
19

20
    Returns:
21
        The flattening of the split in the specified format.
22
    """
23
    if isinstance(split, str):
3✔
NEW
24
        split = split.split("|")
×
25
    taxa = sorted(set(split[0]) | set(split[1]))
3✔
26
    if flattening_format is FlatFormat.sparse:
3✔
27
        return __sparse_flattening(split, pattern_probabilities, taxa)
3✔
28
    if flattening_format is FlatFormat.reduced:
3✔
29
        return __reduced_flattening(split, pattern_probabilities, taxa)
3✔
30

31

32
def __reduced_flattening(split, pattern_probabilities, taxa):
3✔
33
    if isinstance(split, str):
3✔
NEW
34
        split = split.split("|")
×
35
    flattening_data = {}
3✔
36
    used_cols = set()
3✔
37
    taxa_indexer = {taxon: i for i, taxon in enumerate(taxa)}
3✔
38
    for r in pattern_probabilities.items():
3✔
39
        pattern = r[0]
3✔
40
        row = __index_of("".join([str(pattern[taxa_indexer[s]]) for s in split[0]]))
3✔
41
        col = __index_of("".join([str(pattern[taxa_indexer[s]]) for s in split[1]]))
3✔
42
        used_cols.add(col)
3✔
43
        try:
3✔
44
            flattening_data[row][col] = r[1]
3✔
45
        except KeyError:
3✔
46
            flattening_data[row] = {col: r[1]}
3✔
47
    column_sort_order = {}
3✔
48

49
    for i, used_col in enumerate(sorted(used_cols)):
3✔
50
        column_sort_order[used_col] = i
3✔
51

52
    flattening = np.zeros((len(flattening_data), len(used_cols)))
3✔
53
    for i, (row_index, column_data) in enumerate(sorted(flattening_data.items())):
3✔
54
        for col_index, prob in column_data.items():
3✔
55
            flattening[i, column_sort_order[col_index]] = prob
3✔
56
    return flattening
3✔
57

58

59
def __sparse_flattening(split, pattern_probabilities, taxa):
3✔
60
    format = "dok" # Temporary hard-coded choice
3✔
61
    if isinstance(split, str):
3✔
NEW
62
        split = split.split("|")
×
63
    taxa_indexer = {taxon: i for i, taxon in enumerate(taxa)}
3✔
64
    if format == "coo":
3✔
NEW
65
        rows = []
×
NEW
66
        cols = []
×
NEW
67
        data = []
×
NEW
68
        for r in pattern_probabilities.items():
×
NEW
69
            if r[1] != 0:
×
NEW
70
                pattern = r[0]
×
NEW
71
                row = __index_of(
×
72
                    "".join([str(pattern[taxa_indexer[s]]) for s in split[0]])
73
                )
NEW
74
                col = __index_of(
×
75
                    "".join([str(pattern[taxa_indexer[s]]) for s in split[1]])
76
                )
NEW
77
                rows.append(row)
×
NEW
78
                cols.append(col)
×
NEW
79
                data.append(r[1])
×
NEW
80
        return coo_matrix(
×
81
            (data, (rows, cols)), shape=(4 ** len(split[0]), 4 ** len(split[1]))
82
        )
83
    elif format == "dok":
3✔
84
        flattening = dok_matrix((4 ** len(split[0]), 4 ** len(split[1])))
3✔
85
        for r in pattern_probabilities.items():
3✔
86
            pattern = r[0]
3✔
87
            row = __index_of("".join([str(pattern[taxa_indexer[s]]) for s in split[0]]))
3✔
88
            col = __index_of("".join([str(pattern[taxa_indexer[s]]) for s in split[1]]))
3✔
89
            flattening[row, col] = r[1]
3✔
90
        return flattening
3✔
91

92

93
def subflattening(split, pattern_probabilities, data=None):
3✔
94
    """
95
    A faster version of signed sum subflattening. Requires a data dictionary and can be supplied with a bundle of
96
    re-usable information to reduce the number of calls to the multiplications function.
97
    """
98
    state_space = constants.DNA_state_space
3✔
99
    if data is None:
3✔
100
        data = {}
3✔
101
    try:
3✔
102
        coeffs = data["coeffs"]
3✔
NEW
103
        labels = data["labels"]
×
104
    except KeyError:
3✔
105
        data["coeffs"] = coeffs = {}
3✔
106
        data["labels"] = labels = {}
3✔
107

108
    if isinstance(split, str):
3✔
NEW
109
        split = split.split("|")
×
110
    sp1, sp2 = len(split[0]), len(split[1])
3✔
111
    subflattening = [[0 for _ in range(3 * sp2 + 1)] for _ in range(3 * sp1 + 1)]
3✔
112
    try:
3✔
113
        row_labels = labels[sp1]
3✔
114
    except KeyError:
3✔
115
        row_labels = list(__subflattening_labels_generator(sp1))
3✔
116
        labels[sp1] = row_labels
3✔
117
    try:
3✔
118
        col_labels = labels[sp2]
3✔
NEW
119
    except KeyError:
×
NEW
120
        col_labels = list(__subflattening_labels_generator(sp2))
×
NEW
121
        labels[sp2] = col_labels
×
122
    banned = (
3✔
123
        {("C", "C"), ("G", "G"), ("A", "T")}
124
        | {(x, "A") for x in state_space}
125
        | {("T", x) for x in state_space}
126
    )
127
    for r, row in enumerate(row_labels):
3✔
128
        for c, col in enumerate(col_labels):
3✔
129
            pattern = __reconstruct_pattern(split, row, col)
3✔
130
            signed_sum = 0
3✔
131
            for table_pattern, value in pattern_probabilities.items():
3✔
132
                try:
3✔
133
                    product = coeffs[(pattern, table_pattern)]
3✔
134
                except KeyError:
3✔
135
                    product = 1
3✔
136
                    for t in zip(pattern, table_pattern):
3✔
137
                        if t not in banned:
3✔
138
                            product *= -1
3✔
139
                    coeffs[(pattern, table_pattern)] = product
3✔
140
                signed_sum += product * value
3✔
141
            subflattening[r][c] = signed_sum
3✔
142
    return np.array(subflattening)
3✔
143

144

145
def __index_of(string):
3✔
146
    string = reversed(string)
3✔
147
    index = 0
3✔
148
    for o, s in enumerate(string):
3✔
149
        index += (4**o) * constants.DNA_state_space_dict[s]
3✔
150
    return index
3✔
151

152

153
def __subflattening_labels_generator(length):
3✔
154
    n = length
3✔
155
    state_space = constants.DNA_state_space
3✔
156
    other_states = state_space[0:-1]
3✔
157
    special_state = state_space[-1]
3✔
158
    templates = (
3✔
159
        (
160
            "".join("T" for _ in range(i)),
161
            "".join("T" for _ in range(n - i - 1)),
162
        )
163
        for i in range(n)
164
    )
165
    for template in templates:
3✔
166
        for c in other_states:
3✔
167
            yield f"{template[0]}{c}{template[1]}"
3✔
168
    yield "".join(special_state for _ in range(n))
3✔
169

170

171
def __reconstruct_pattern(split, row_label, col_label):
3✔
172
    n = len(split[0]) + len(split[1])
3✔
173
    pattern = {}
3✔
174
    for splindex, loc in enumerate(split[0]):
3✔
175
        pattern[int(str(loc), n) if len(str(loc)) == 1 else int(str(loc)[1:])] = row_label[splindex]
3✔
176
    for splindex, loc in enumerate(split[1]):
3✔
177
        pattern[int(str(loc), n) if len(str(loc)) == 1 else int(str(loc)[1:])] = col_label[splindex]
3✔
178
    return "".join(pattern[i] for i in range(n))
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc