• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Edinburgh-Genome-Foundry / DnaChisel / 5190565251

pending completion
5190565251

push

github

veghp
Bump to v3.2.11

1 of 1 new or added line in 1 file covered. (100.0%)

2966 of 3299 relevant lines covered (89.91%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.07
/dnachisel/builtin_specifications/codon_optimization/MatchTargetCodonUsage.py
1
import numpy as np
1✔
2
from ...Specification.SpecEvaluation import SpecEvaluation
1✔
3
from ...biotools import dict_to_pretty_string
1✔
4

5
from .BaseCodonOptimizationClass import BaseCodonOptimizationClass
1✔
6

7

8
class MatchTargetCodonUsage(BaseCodonOptimizationClass):
1✔
9
    """Codon-optimize a sequence so it has the same codon usage as a target.
10

11
    The objective minimized here is the sum of the discrepancies, over every
12
    possible triplet ATG, CCG, etc. between the codon frequency of this triplet
13
    in the sequence, and its frequency in the target organism.
14

15
    This method has had several names through the ages. It may have been first
16
    proposed by Hale and Thompson, 1998. It is called Individual Codon Usage
17
    Optimization in Chung 2012, Global CAI Harmonization in Mignon 2018, and
18
    Codon Harmonization in Jayaral 2005. We didn't call it "harmonization"
19
    in DNA Chisel to avoid any confusion with the now more common
20
    host-to-target codon harmonization. See DnaChisel's HarmonizeRCA class
21
    for Codon Harmonization.
22

23
    Parameters
24
    ----------
25

26
    species
27
      Species for which the sequence will be codon-optimized.
28
      Either a TaxID (this requires a web connection as the corresponding table
29
      will be downloaded from the internet) or the name of the species to
30
      codon-optimize for (the name must be supported by ``python_codon_tables``
31
      e.g. ``e_coli``, ``s_cerevisiae``, ``h_sapiens``, ``c_elegans``,
32
      ``b_subtilis``, ``d_melanogaster``).
33
      Note that a ``codon_usage_table`` can be provided instead, or even in
34
      addition, for species whose codon usage table cannot be auto-imported.
35

36
    location
37
      Either a DnaChisel Location or a tuple of the form (start, end, strand)
38
      or just (start, end), with strand defaulting to +1, indicating the
39
      position of the gene to codon-optimize. If not provided, the whole
40
      sequence is considered as the gene. The location should have a length
41
      that is a multiple of 3. The location strand is either 1 if the gene is
42
      encoded on the (+) strand, or -1 for antisense.
43

44
    codon_usage_table
45
      A dict of the form ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}``
46
      giving the codon frequency table (relative usage of each codon;
47
      frequencies add up to 1, separately for each amino acid). Only
48
      provide if no ``species`` parameter was provided.
49

50
    boost
51
      Score multiplicator (=weight) for when the specification is used as an
52
      optimization objective alongside competing objectives.
53

54
    References
55
    ----------
56
    Hale and Thompson, Codon Optimization of the Gene Encoding a
57
    Domain from Human Type 1 Neurofibromin Protein... Protein Expression and
58
    Purification 1998.
59

60
    Jayaraj et. al. GeMS: an advanced software package for designing synthetic
61
    genes, Nucleic Acids Research, 2005
62

63
    Mignon et. al. Codon harmonization – going beyond the speed limit for
64
    protein expression. FEBS Lett, 2018
65

66
    Chung BK, Lee DY. Computational codon optimization of synthetic gene for
67
    protein expression. BMC Syst Biol. 2012
68

69

70
    """
71

72
    shorthand_name = "match_codon_usage"
1✔
73

74
    def __init__(
1✔
75
        self, species=None, location=None, codon_usage_table=None, boost=1.0
76
    ):
77
        BaseCodonOptimizationClass.__init__(
1✔
78
            self,
79
            species=species,
80
            location=location,
81
            codon_usage_table=codon_usage_table,
82
            boost=boost,
83
        )
84
        self.codons_translations = self.get_codons_translations()
1✔
85

86
    def codon_usage_matching_stats(self, problem):
1✔
87
        """Return a codon harmonisation score and a suboptimal locations list.
88

89
        Parameters
90
        ----------
91

92
        sequence
93
          An ATGC string
94

95
        species
96
          Any species name from the DnaChisel codon tables, such as ``e_coli``.
97

98
        Returns
99
        -------
100
        score, list_of_over_represented_codons_positions
101
          ``score`` is a negative number equals to sum(fi - ei) where for the
102
          i-th codon in the sequence fi is the relative frequency of this
103
          triplet in the sequence and ei is the relative frequency in the
104
          reference species. The ``list_of_suboptimal_codons_positions`` is
105
          of the form [1, 4, 5, 6...] a number k in that list indicates that
106
          the k-th codon is over-represented, and that a synonymous mutation
107
          of this codon can improve the harmonization score.
108

109
        """
110
        codons = self.get_codons(problem)
1✔
111
        codons_positions, aa_comparisons = self.compare_frequencies(codons)
1✔
112
        score = 0
1✔
113
        nonoptimal_aa_indices = []
1✔
114
        for aa, data in aa_comparisons.items():
1✔
115
            total = data.pop("total")
1✔
116
            for codon, codon_freq in data.items():
1✔
117
                frequency_diff = codon_freq["sequence"] - codon_freq["table"]
1✔
118
                score -= total * abs(frequency_diff)
1✔
119
                if codon_freq["sequence"] > codon_freq["table"]:
1✔
120
                    nonoptimal_aa_indices += codons_positions[codon]
1✔
121
        return score, nonoptimal_aa_indices
1✔
122

123
    def evaluate(self, problem):
1✔
124
        """Evaluate on a problem"""
125
        score, nonoptimal_indices = self.codon_usage_matching_stats(problem)
1✔
126
        locations = self.codons_indices_to_locations(nonoptimal_indices)
1✔
127
        np.random.shuffle(locations)
1✔
128
        return SpecEvaluation(
1✔
129
            self,
130
            problem,
131
            score=score,
132
            locations=locations,
133
            message="Codon opt. on window %s scored %.02E"
134
            % (self.location, score),
135
        )
136

137
    def localized_on_window(self, new_location, start_codon, end_codon):
1✔
138
        """Relocate without changing much."""
139
        return self
1✔
140

141
    def label_parameters(self):
1✔
142
        return ["(custom table)" if self.species is None else self.species]
×
143

144
    def compare_frequencies(self, codons, text_mode=False):
1✔
145
        """Return a dict indicating differences between codons frequencies in
146
        the sequence and in this specifications's codons usage table.
147

148
        Examples
149
        --------
150

151
        >>> codons = spec.get_codons(problem)
152
        >>> print(spec.compare_frequencies(codons)
153

154
        Returns
155
        -------
156

157
        positions, comparisons
158
          (if text_mode = False)
159

160
        a formatted print-ready string
161
          (if text_mode = True)
162

163
        >>> {
164
        >>>   "K": {
165
        >>>     "total": 6,
166
        >>>     "AAA": {
167
        >>>         "sequence": 1.0,
168
        >>>         "table": 0.7
169
        >>>     },
170
        >>>     ...
171
        >>>   },
172
        >>>   "D": ...
173
        >>> }
174

175
        """
176
        codons_positions = {cod: [] for cod in self.codons_translations}
1✔
177
        for i, codon in enumerate(codons):
1✔
178
            codons_positions[codon].append(i)
1✔
179
        # aa: amino-acid
180
        codons_frequencies = {
1✔
181
            aa: {"total": 0} for aa in self.codon_usage_table
182
        }
183
        for codon, positions in codons_positions.items():
1✔
184
            count = len(positions)
1✔
185
            aa = self.codons_translations[codon]
1✔
186
            codons_frequencies[aa][codon] = count
1✔
187
            codons_frequencies[aa]["total"] += count
1✔
188
        for aa, data in codons_frequencies.items():
1✔
189
            total = max(1, data["total"])
1✔
190
            for codon, value in data.items():
1✔
191
                if codon != "total":
1✔
192
                    data[codon] = 1.0 * value / total
1✔
193
        codons_frequencies = {
1✔
194
            aa: data
195
            for aa, data in codons_frequencies.items()
196
            if data["total"]
197
        }
198
        comparisons = {
1✔
199
            aa: {
200
                "total": seq_data["total"],
201
                **{
202
                    codon: {"sequence": seq_data[codon], "table": table_data}
203
                    for codon, table_data in self.codon_usage_table[aa].items()
204
                },
205
            }
206
            for aa, seq_data in codons_frequencies.items()
207
        }
208
        if text_mode:
1✔
209
            return dict_to_pretty_string(comparisons)
1✔
210
        else:
211
            return codons_positions, comparisons
1✔
212
    def short_label(self):
1✔
213
        result = "match-codon-usage"
×
214
        if self.species is not None:
×
215
            result += " (%s)" % self.species
×
216
        return result
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc