• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

openvax / isovar / 7893503243

13 Feb 2024 10:21PM UTC coverage: 98.446% (+5.7%) from 92.704%
7893503243

push

github

iskandr
updated tests

2977 of 3024 relevant lines covered (98.45%)

2.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.29
/isovar/genetic_code.py
1
# Licensed under the Apache License, Version 2.0 (the "License");
2
# you may not use this file except in compliance with the License.
3
# You may obtain a copy of the License at
4
#
5
#     http://www.apache.org/licenses/LICENSE-2.0
6
#
7
# Unless required by applicable law or agreed to in writing, software
8
# distributed under the License is distributed on an "AS IS" BASIS,
9
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
# See the License for the specific language governing permissions and
11
# limitations under the License.
12

13
from __future__ import print_function, division, absolute_import
3✔
14

15
"""
16
GeneticCode objects contain the rules for translating cDNA into a protein
17
sequence: the set of valid start and stop codons, as well as which
18
amino acid each DNA triplet is translated into.
19
"""
20

21

22
class GeneticCode(object):
3✔
23
    """
24
    Represents distinct translation tables to go from cDNA triplets to amino
25
    acids.
26
    """
27
    def __init__(self, name, start_codons, stop_codons, codon_table):
3✔
28
        self.name = name
3✔
29
        self.start_codons = set(start_codons)
3✔
30
        self.stop_codons = set(stop_codons)
3✔
31
        self.codon_table = dict(codon_table)
3✔
32
        self._check_codons()
3✔
33

34
    def _check_codons(self):
3✔
35
        """
36
        If codon table is missing stop codons, then add them.
37
        """
38
        for stop_codon in self.stop_codons:
3✔
39
            if stop_codon in self.codon_table:
3✔
40
                if self.codon_table[stop_codon] != "*":
3✔
41
                    raise ValueError(
42
                        ("Codon '%s' not found in stop_codons, but codon table "
43
                         "indicates that it should be") % (stop_codon,))
44
            else:
×
45
                self.codon_table[stop_codon] = "*"
×
46

47
        for start_codon in self.start_codons:
3✔
48
            if start_codon not in self.codon_table:
3✔
49
                raise ValueError(
×
50
                    "Start codon '%s' missing from codon table" % (
51
                        start_codon,))
52

53
        for codon, amino_acid in self.codon_table.items():
3✔
54
            if amino_acid == "*" and codon not in self.stop_codons:
3✔
55
                raise ValueError(
×
56
                    "Non-stop codon '%s' can't translate to '*'" % (
57
                        codon,))
58

59
        if len(self.codon_table) != 64:
3✔
60
            raise ValueError(
×
61
                "Expected 64 codons but found %d in codon table" % (
62
                    len(self.codon_table,)))
63

64
    def translate(self, cdna_sequence, first_codon_is_start=False):
3✔
65
        """
66
        Given a cDNA sequence which is aligned to a reading frame, returns
67
        the translated protein sequence and a boolean flag indicating whether
68
        the translated sequence ended on a stop codon (or just ran out of codons).
69

70
        Parameters
71
        ----------
72
        cdna_sequence : str
73
            cDNA sequence which is expected to start and end on complete codons.
74

75
        first_codon_is_start : bool
76
            Is the first codon of the sequence a start codon?
77
        """
78
        if not isinstance(cdna_sequence, str):
3✔
79
            cdna_sequence = str(cdna_sequence)
×
80
        n = len(cdna_sequence)
3✔
81

82
        # trim to multiple of 3 length, if there are 1 or 2 nucleotides
83
        # dangling at the end of an mRNA they will not affect translation
84
        # since ribosome will fall off at that point
85
        end_idx = 3 * (n // 3)
3✔
86

87
        codon_table = self.codon_table
3✔
88
        if first_codon_is_start and cdna_sequence[:3] in self.start_codons:
3✔
89
            amino_acid_list = ['M']
3✔
90
            start_index = 3
3✔
91
        else:
×
92
            start_index = 0
3✔
93
            amino_acid_list = []
3✔
94

95
        ends_with_stop_codon = False
3✔
96
        for i in range(start_index, end_idx, 3):
3✔
97
            codon = cdna_sequence[i:i + 3]
3✔
98
            aa = codon_table[codon]
3✔
99

100
            if aa == "*":
3✔
101
                ends_with_stop_codon = True
3✔
102
                break
3✔
103
            amino_acid_list.append(aa)
3✔
104

105
        amino_acids = "".join(amino_acid_list)
3✔
106
        return amino_acids, ends_with_stop_codon
3✔
107

108
    def copy(
3✔
109
            self,
×
110
            name,
×
111
            start_codons=None,
2✔
112
            stop_codons=None,
2✔
113
            codon_table=None,
2✔
114
            codon_table_changes=None):
2✔
115
        """
×
116
        Make copy of this GeneticCode object with optional replacement
×
117
        values for all fields.
×
118
        """
×
119
        new_start_codons = (
3✔
120
            self.start_codons.copy()
1✔
121
            if start_codons is None
3✔
122
            else start_codons)
3✔
123

124
        new_stop_codons = (
3✔
125
            self.stop_codons.copy()
3✔
126
            if stop_codons is None
3✔
127
            else stop_codons)
3✔
128

129
        new_codon_table = (
3✔
130
            self.codon_table.copy()
3✔
131
            if codon_table is None
3✔
132
            else codon_table)
133

134
        if codon_table_changes is not None:
3✔
135
            new_codon_table.update(codon_table_changes)
3✔
136

137
        return GeneticCode(
3✔
138
            name=name,
3✔
139
            start_codons=new_start_codons,
3✔
140
            stop_codons=new_stop_codons,
3✔
141
            codon_table=new_codon_table)
3✔
142

143
standard_genetic_code = GeneticCode(
3✔
144
    name="standard",
3✔
145
    start_codons={'ATG', 'CTG', 'TTG'},
3✔
146
    stop_codons={'TAA', 'TAG', 'TGA'},
3✔
147
    codon_table={
3✔
148
        'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
3✔
149
        'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
3✔
150
        'TAT': 'Y', 'TAC': 'Y', 'TAA': '*', 'TAG': '*',
3✔
151
        'TGT': 'C', 'TGC': 'C', 'TGA': '*', 'TGG': 'W',
3✔
152
        'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
3✔
153
        'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
3✔
154
        'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
3✔
155
        'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
3✔
156
        'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
3✔
157
        'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
3✔
158
        'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
3✔
159
        'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
3✔
160
        'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
3✔
161
        'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
3✔
162
        'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
3✔
163
        'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
3✔
164
    }
165
)
166

167
# Non-canonical start sites based on figure 2 of
168
#   "Global mapping of translation initiation sites in mammalian
169
#   cells at single-nucleotide resolution"
170
standard_genetic_code_with_extra_start_codons = standard_genetic_code.copy(
3✔
171
    name="standard-with-extra-start-codons",
3✔
172
    start_codons=standard_genetic_code.start_codons.union({
3✔
173
        'GTG',
174
        'AGG',
175
        'ACG',
176
        'AAG',
177
        'ATC',
178
        'ATA',
179
        'ATT'}))
180

181
vertebrate_mitochondrial_genetic_code = standard_genetic_code.copy(
3✔
182
    name="verterbrate-mitochondrial",
3✔
183
    # "For thirty years AGA and AGG were considered terminators instead
184
    #  of coding for arginine. However, Temperley (2010) has recently shown
185
    #  that human mitochondria use only UAA and UAG stop codons."
186
    # (http://mitomap.org/bin/view.pl/MITOMAP/HumanMitoCode)
187
    stop_codons={'TAA', 'TAG'},
3✔
188
    # "AUU codes for isoleucine during elongation but can code for
189
    #  methionine for initiation (ND2) See Fearnley & Walker (1987) and
190
    #  Peabody (1989)."
191
    # (http://mitomap.org/bin/view.pl/MITOMAP/HumanMitoCode)
192
    start_codons=['ATT', 'ATC', 'ATA', 'ATG', 'GTG'],
3✔
193
    # "UGA codes for tryptophan instead of termination and AUA codes for
194
    #  methionine instead of isoleucine."
195
    # (http://mitomap.org/bin/view.pl/MITOMAP/HumanMitoCode)
196
    codon_table_changes={'TGA': 'W', 'ATA': 'M'},
3✔
197
)
198

199

200
def translate_cdna(
3✔
201
        cdna_sequence,
202
        first_codon_is_start=False,
2✔
203
        mitochondrial=False):
2✔
204
    """
205
    Given a cDNA sequence which is aligned to a reading frame, returns
206
    the translated protein sequence and a boolean flag indicating whether
207
    the translated sequence ended on a stop codon (or just ran out of codons).
208

209
    Parameters
210
    ----------
211
    cdna_sequence : str
212
        cDNA sequence which is expected to start and end on complete codons.
213

214
    first_codon_is_start : bool
215

216
    mitochondrial : bool
217
        Use the mitochondrial codon table instead of standard
218
        codon to amino acid table.
219
    """
220
    # once we drop some of the prefix nucleotides, we should be in a reading frame
221
    # which allows us to translate this protein
222
    if mitochondrial:
3✔
223
        genetic_code = vertebrate_mitochondrial_genetic_code
3✔
224
    else:
225
        genetic_code = standard_genetic_code_with_extra_start_codons
3✔
226

227
    return genetic_code.translate(
3✔
228
        cdna_sequence=cdna_sequence,
3✔
229
        first_codon_is_start=first_codon_is_start)
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc