• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Edinburgh-Genome-Foundry / DnaChisel / 14225235949

02 Apr 2025 04:56PM UTC coverage: 90.508% (+0.5%) from 90.054%
14225235949

push

github

veghp
Bump to v3.2.14

1 of 1 new or added line in 1 file covered. (100.0%)

101 existing lines in 35 files now uncovered.

2994 of 3308 relevant lines covered (90.51%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.07
/dnachisel/builtin_specifications/codon_optimization/MatchTargetCodonUsage.py
1
import numpy as np
1✔
2
from ...Specification.SpecEvaluation import SpecEvaluation
1✔
3
from ...biotools import dict_to_pretty_string
1✔
4

5
from .BaseCodonOptimizationClass import BaseCodonOptimizationClass
1✔
6

7

8
class MatchTargetCodonUsage(BaseCodonOptimizationClass):
1✔
9
    """Codon-optimize a sequence so it has the same codon usage as a target.
10

11
    The objective minimized here is the sum of the discrepancies, over every
12
    possible triplet ATG, CCG, etc. between the codon frequency of this triplet
13
    in the sequence, and its frequency in the target organism.
14

15
    This method has had several names through the ages. It may have been first
16
    proposed by Hale and Thompson, 1998. It is called Individual Codon Usage
17
    Optimization in Chung 2012, Global CAI Harmonization in Mignon 2018, and
18
    Codon Harmonization in Jayaral 2005. We didn't call it "harmonization"
19
    in DNA Chisel to avoid any confusion with the now more common
20
    host-to-target codon harmonization. See DnaChisel's HarmonizeRCA class
21
    for Codon Harmonization.
22

23
    Warning: always use this specification with an EnforceTranslation constraint
24
    defined over the same location, to preserve the amino acid sequence.
25

26

27
    Parameters
28
    ----------
29

30
    species
31
      Species for which the sequence will be codon-optimized.
32
      Either a TaxID (this requires a web connection as the corresponding table
33
      will be downloaded from the internet) or the name of the species to
34
      codon-optimize for (the name must be supported by ``python_codon_tables``
35
      e.g. ``e_coli``, ``s_cerevisiae``, ``h_sapiens``, ``c_elegans``,
36
      ``b_subtilis``, ``d_melanogaster``).
37
      Note that a ``codon_usage_table`` can be provided instead, or even in
38
      addition, for species whose codon usage table cannot be auto-imported.
39

40
    location
41
      Either a DnaChisel Location or a tuple of the form (start, end, strand)
42
      or just (start, end), with strand defaulting to +1, indicating the
43
      position of the gene to codon-optimize. If not provided, the whole
44
      sequence is considered as the gene. The location should have a length
45
      that is a multiple of 3. The location strand is either 1 if the gene is
46
      encoded on the (+) strand, or -1 for antisense.
47

48
    codon_usage_table
49
      A dict of the form ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}``
50
      giving the codon frequency table (relative usage of each codon;
51
      frequencies add up to 1, separately for each amino acid). Only
52
      provide if no ``species`` parameter was provided.
53

54
    boost
55
      Score multiplicator (=weight) for when the specification is used as an
56
      optimization objective alongside competing objectives.
57

58
    References
59
    ----------
60
    Hale and Thompson, Codon Optimization of the Gene Encoding a
61
    Domain from Human Type 1 Neurofibromin Protein... Protein Expression and
62
    Purification 1998.
63

64
    Jayaraj et. al. GeMS: an advanced software package for designing synthetic
65
    genes, Nucleic Acids Research, 2005
66

67
    Mignon et. al. Codon harmonization – going beyond the speed limit for
68
    protein expression. FEBS Lett, 2018
69

70
    Chung BK, Lee DY. Computational codon optimization of synthetic gene for
71
    protein expression. BMC Syst Biol. 2012
72

73

74
    """
75

76
    shorthand_name = "match_codon_usage"
1✔
77

78
    def __init__(self, species=None, location=None, codon_usage_table=None, boost=1.0):
1✔
79
        BaseCodonOptimizationClass.__init__(
1✔
80
            self,
81
            species=species,
82
            location=location,
83
            codon_usage_table=codon_usage_table,
84
            boost=boost,
85
        )
86
        self.codons_translations = self.get_codons_translations()
1✔
87

88
    def codon_usage_matching_stats(self, problem):
1✔
89
        """Return a codon harmonisation score and a suboptimal locations list.
90

91
        Parameters
92
        ----------
93

94
        sequence
95
          An ATGC string
96

97
        species
98
          Any species name from the DnaChisel codon tables, such as ``e_coli``.
99

100
        Returns
101
        -------
102
        score, list_of_over_represented_codons_positions
103
          ``score`` is a negative number equals to sum(fi - ei) where for the
104
          i-th codon in the sequence fi is the relative frequency of this
105
          triplet in the sequence and ei is the relative frequency in the
106
          reference species. The ``list_of_suboptimal_codons_positions`` is
107
          of the form [1, 4, 5, 6...] a number k in that list indicates that
108
          the k-th codon is over-represented, and that a synonymous mutation
109
          of this codon can improve the harmonization score.
110

111
        """
112
        codons = self.get_codons(problem)
1✔
113
        codons_positions, aa_comparisons = self.compare_frequencies(codons)
1✔
114
        score = 0
1✔
115
        nonoptimal_aa_indices = []
1✔
116
        for aa, data in aa_comparisons.items():
1✔
117
            total = data.pop("total")
1✔
118
            for codon, codon_freq in data.items():
1✔
119
                frequency_diff = codon_freq["sequence"] - codon_freq["table"]
1✔
120
                score -= total * abs(frequency_diff)
1✔
121
                if codon_freq["sequence"] > codon_freq["table"]:
1✔
122
                    nonoptimal_aa_indices += codons_positions[codon]
1✔
123
        return score, nonoptimal_aa_indices
1✔
124

125
    def evaluate(self, problem):
1✔
126
        """Evaluate on a problem"""
127
        score, nonoptimal_indices = self.codon_usage_matching_stats(problem)
1✔
128
        locations = self.codons_indices_to_locations(nonoptimal_indices)
1✔
129
        np.random.shuffle(locations)
1✔
130
        return SpecEvaluation(
1✔
131
            self,
132
            problem,
133
            score=score,
134
            locations=locations,
135
            message="Codon opt. on window %s scored %.02E" % (self.location, score),
136
        )
137

138
    def localized_on_window(self, new_location, start_codon, end_codon):
1✔
139
        """Relocate without changing much."""
140
        return self
1✔
141

142
    def label_parameters(self):
1✔
UNCOV
143
        return ["(custom table)" if self.species is None else self.species]
×
144

145
    def compare_frequencies(self, codons, text_mode=False):
1✔
146
        """Return a dict indicating differences between codons frequencies in
147
        the sequence and in this specifications's codons usage table.
148

149
        Examples
150
        --------
151

152
        >>> codons = spec.get_codons(problem)
153
        >>> print(spec.compare_frequencies(codons)
154

155
        Returns
156
        -------
157

158
        positions, comparisons
159
          (if text_mode = False)
160

161
        a formatted print-ready string
162
          (if text_mode = True)
163

164
        >>> {
165
        >>>   "K": {
166
        >>>     "total": 6,
167
        >>>     "AAA": {
168
        >>>         "sequence": 1.0,
169
        >>>         "table": 0.7
170
        >>>     },
171
        >>>     ...
172
        >>>   },
173
        >>>   "D": ...
174
        >>> }
175

176
        """
177
        codons_positions = {cod: [] for cod in self.codons_translations}
1✔
178
        for i, codon in enumerate(codons):
1✔
179
            codons_positions[codon].append(i)
1✔
180
        # aa: amino-acid
181
        codons_frequencies = {aa: {"total": 0} for aa in self.codon_usage_table}
1✔
182
        for codon, positions in codons_positions.items():
1✔
183
            count = len(positions)
1✔
184
            aa = self.codons_translations[codon]
1✔
185
            codons_frequencies[aa][codon] = count
1✔
186
            codons_frequencies[aa]["total"] += count
1✔
187
        for aa, data in codons_frequencies.items():
1✔
188
            total = max(1, data["total"])
1✔
189
            for codon, value in data.items():
1✔
190
                if codon != "total":
1✔
191
                    data[codon] = 1.0 * value / total
1✔
192
        codons_frequencies = {
1✔
193
            aa: data for aa, data in codons_frequencies.items() if data["total"]
194
        }
195
        comparisons = {
1✔
196
            aa: {
197
                "total": seq_data["total"],
198
                **{
199
                    codon: {"sequence": seq_data[codon], "table": table_data}
200
                    for codon, table_data in self.codon_usage_table[aa].items()
201
                },
202
            }
203
            for aa, seq_data in codons_frequencies.items()
204
        }
205
        if text_mode:
1✔
206
            return dict_to_pretty_string(comparisons)
1✔
207
        else:
208
            return codons_positions, comparisons
1✔
209

210
    def short_label(self):
1✔
UNCOV
211
        result = "match-codon-usage"
×
UNCOV
212
        if self.species is not None:
×
213
            result += " (%s)" % self.species
×
214
        return result
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc