• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Edinburgh-Genome-Foundry / DnaChisel / 5190565251

pending completion
5190565251

push

github

veghp
Bump to v3.2.11

1 of 1 new or added line in 1 file covered. (100.0%)

2966 of 3299 relevant lines covered (89.91%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.44
/dnachisel/SequencePattern/SequencePattern.py
1
"""Implements the SequencePattern, DnaNotationPattern classes.
2

3
These classes are responsible for looking for a pattern in a sequence
4
(including overlapping patterns !), separating patterns with fixed size
5
and patterns with maximal size (from problem localization purposes).
6

7
The module also implements functions to specify common DNA patterns:
8
homopolymers, repeats, enzymatic restriction sites.
9

10

11
"""
12

13
import re
1✔
14
from ..biotools import reverse_complement
1✔
15
from ..Location import Location
1✔
16

17

18
class SequencePattern:
1✔
19
    """Pattern that will be looked for in a DNA sequence.
20

21
    Use this class for matching regular expression patterns, and
22
    DnaNotationPattern for matching explicit sequences or sequences using Ns
23
    etc.
24

25
    Examples
26
    --------
27
    >>> expression = "A[ATGC]{3,}"
28
    >>> pattern = SequencePattern(expression)
29
    >>> constraint = AvoidPattern(pattern)
30

31
    Parameters
32
    ----------
33

34
    expression
35
      Any string or regular expression (regex) for matching ATGC nucleotides.
36
      Note that multi-nucleotide symbols such as "N" (for A-T-G-C), or "K"
37
      are not supported by this class, see DnaNotationPattern instead.
38

39
    size
40
      Size of the pattern, in number of characters. The ``size`` is used to
41
      determine the size of windows when performing local optimization and
42
      constraint solving. It can be important to provide the size when the
43
      ``pattern`` string provided represents a complex regular expression whose
44
      maximal matching size cannot be easily evaluated.
45
      For example, if a regex is used to actively remove sites, then a size
46
      should be provided to inform DNA Chisel during optimization.
47

48
    name
49
      Name of the pattern (will be displayed e.g. when the pattern is printed).
50
    """
51

52
    registered_string_pattern_classes = []
1✔
53

54
    def __init__(
1✔
55
        self,
56
        expression,
57
        size=None,
58
        name=None,
59
        lookahead="loop",
60
        is_palyndromic=False,
61
    ):
62
        # if size is None:
63
        #     self.shrink_when_localized = False
64
        #     # size = len(expression)
65
        self.expression = expression
1✔
66
        self.lookahead = lookahead
1✔
67
        if lookahead == "re":
1✔
68
            expression = "(?=(%s))" % expression
×
69
        if "(" not in expression:
1✔
70
            expression = "(%s)" % expression
1✔
71
        self.lookahead_expression = expression
1✔
72
        self.compiled_expression = re.compile(self.lookahead_expression)
1✔
73
        self.size = size
1✔
74
        self.name = name
1✔
75
        self.is_palyndromic = is_palyndromic
1✔
76

77
    def find_matches(self, sequence, location=None, forced_strand=None):
1✔
78
        """Return the locations where the sequence matches the expression.
79

80
        Parameters
81
        ----------
82

83
        sequence
84
          A string of "ATGC..."
85

86
        location
87
          Location indicating a segment to which to restrict
88
          the search. Only patterns entirely included in the segment will be
89
          returned
90

91
        Returns
92
        -------
93

94
        matches
95
          A list of the locations of matches, of the form
96
          ``[(start1, end1), (start2, end2),...]``.
97

98
        """
99

100
        # THE FUNCTION HAS BEEN CALLED WITH A LOCATION AND A FORCED STRAND
101
        if forced_strand is not None:
1✔
102
            subsequence = sequence[location.start : location.end]
1✔
103
            if forced_strand == 1:
1✔
104
                return [
1✔
105
                    (loc + location.start) for loc in self.find_matches(subsequence)
106
                ]
107
            if forced_strand == -1:
1✔
108
                subsequence = reverse_complement(subsequence)
1✔
109
                return [
1✔
110
                    Location(
111
                        location.end - loc.end,
112
                        location.end - loc.start,
113
                        strand=-1,
114
                    )
115
                    for loc in self.find_matches(subsequence)
116
                ]
117

118
        # THE FUNCTION HAS BEEN CALLED WITH A LOCATION ONLY
119

120
        if location is not None:
1✔
121
            strand = location.strand
1✔
122
            if strand == 1:
1✔
123
                return self.find_matches(sequence, location, 1)
1✔
124
            if strand == -1:
1✔
125
                if self.is_palyndromic:
1✔
126
                    return self.find_matches(sequence, location, 1)
×
127
                else:
128
                    return self.find_matches(sequence, location, -1)
1✔
129
            if strand == 0:
1✔
130
                matches = self.find_matches(sequence, location, 1)
1✔
131
                if not self.is_palyndromic:
1✔
132
                    matches += self.find_matches(sequence, location, -1)
1✔
133
                return matches
1✔
134

135
        # THE FUNCTION HAS BEEN CALLED WITH NO LOCATION/STRAND: WHOLE SEQUENCE
136

137
        matches = self.find_matches_in_string(sequence)
1✔
138
        return [Location(start, end, strand) for start, end, strand in matches]
1✔
139

140
    def find_matches_in_string(self, sequence):
1✔
141
        if self.lookahead == "loop":
1✔
142
            matches = []
1✔
143
            position = 0
1✔
144
            while True:
145
                result = re.search(self.compiled_expression, sequence)
1✔
146
                if result is None:
1✔
147
                    return matches
1✔
148
                start, end = result.start(), result.end()
1✔
149
                matches.append((start + position, end + position, 1))
1✔
150
                sequence = sequence[start + 1 :]
1✔
151
                position += start + 1
1✔
152
        else:
153
            return [
×
154
                (match.start(), match.start() + len(match.groups()[0]), 1)
155
                for match in re.finditer(self.compiled_expression, sequence)
156
            ]
157

158
    def __str__(self):
1✔
159
        return self.expression + ("" if self.name is None else " (%s)" % self.name)
×
160

161
    @classmethod
1✔
162
    def from_string(cls, string):
1✔
163
        for myclass in cls.registered_string_pattern_classes:
1✔
164
            pattern = myclass.from_string(string)
1✔
165
            if pattern is not None:
1✔
166
                return pattern
1✔
167
        return SequencePattern(string)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc