• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Edinburgh-Genome-Foundry / DnaChisel / 5190565251

pending completion
5190565251

push

github

veghp
Bump to v3.2.11

1 of 1 new or added line in 1 file covered. (100.0%)

2966 of 3299 relevant lines covered (89.91%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.0
/dnachisel/builtin_specifications/codon_optimization/HarmonizeRCA.py
1
import numpy as np
1✔
2

3
from ...Specification.SpecEvaluation import SpecEvaluation
1✔
4
from .BaseCodonOptimizationClass import BaseCodonOptimizationClass
1✔
5

6

7
class HarmonizeRCA(BaseCodonOptimizationClass):
1✔
8
    """Codon-Harmonize a native sequence for a new host (Claassens method).
9

10
    This specification will optimize a Sequence 1 from Host 1 into a Sequence
11
    2 for target Host 2.
12

13
    In simple, rare Host 1 codons will be replaced by rare Host 2 codons, and
14
    high-frequency Host 1 codons will get replaced by codons that are
15
    high-frequency in Host 2.
16

17
    In more specific, each codon along Sequence 1 gets replaced by the codon
18
    whose Relative Codon Adaptiveness (RCA) in Host 2 is the closest from the
19
    RCA of the original codon in Host 1. A codon's RCA in a given organism is
20
    defined by f/fmax where f is the codon's frequency in the organism and fmax
21
    is the highest frequency of all synonymous codons.
22

23
    The minimized quantity is sum_i abs(RCA(c_i, H1) - RCA(c'_i, H2))
24
    where c_i, c'_i represent the i-th codon before and after optimization
25

26
    This method is taken from Claassens 2017, where they simplify a previous
27
    algorithm (Angov 2008), which was much more complicated as it involved
28
    predicting "ribosome pausing" sites in the sequence.
29

30
    Warning: always use with an EnforceTranslation constraint.
31

32

33
    Parameters
34
    ----------
35
    species
36
      Name or TaxID of the species for which to optimize the sequence. A custom
37
      codon_usage_table can be provided instead (or in addition, for species
38
      names whose codon usage table cannot be imported).
39

40
    codon_usage_table
41
      Optional - can be provided instead of ``species``. A dict of the form
42
      ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}`` giving the codon
43
      frequency table (relative usage of each codon; frequencies add up to 1,
44
      separately for each amino acid).
45

46
    original_species
47
      Name or TaxID of the species the original sequence was taken from. This
48
      information will be used to spot codons which are supposed to be rare
49
      or common. A codon_usage_table can be provided instead (or in addition,
50
      for species names whose codon usage table cannot be imported).
51

52
    original_codon_usage_table
53
      A dict of the form ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}``
54
      giving the codon frequency table (relative usage of each codon;
55
      frequencies add up to 1, separately for each amino acid).
56

57
    location
58
      Location on which the specification applies
59

60
    boost
61
      Score multiplicator (=weight) for when the specification is used as an
62
      optimization objective alongside competing objectives.
63

64

65
    References
66
    ----------
67
    Claassens et. al., Improving heterologous membrane protein
68
    production in Escherichia coli by combining transcriptional tuning and
69
    codon usage algorithms. PLOS One, 2017
70
    """
71

72
    shorthand_name = "harmonize_rca"
1✔
73

74
    def __init__(
1✔
75
        self,
76
        species=None,
77
        codon_usage_table=None,
78
        original_species=None,
79
        original_codon_usage_table=None,
80
        location=None,
81
        boost=1,
82
    ):
83
        if isinstance(species, str) and "->" in species:
1✔
84
            original_species, species = species.split("->")
1✔
85
            species = species.strip()
1✔
86
            original_species = original_species.strip()
1✔
87
        BaseCodonOptimizationClass.__init__(
1✔
88
            self,
89
            species=species,
90
            codon_usage_table=codon_usage_table,
91
            location=location,
92
            boost=boost,
93
        )
94
        self.codons_synonyms = self.get_codons_synonyms()
1✔
95
        self.original_species = original_species
1✔
96
        self.original_codon_usage_table = self.get_codons_table(
1✔
97
            original_species, original_codon_usage_table
98
        )
99
        for table in [self.codon_usage_table, self.original_codon_usage_table]:
1✔
100
            if "RCA" not in table:
1✔
101
                table["RCA"] = {
1✔
102
                    codon: frequency / max(codons_frequencies.values())
103
                    for aa, codons_frequencies in table.items()
104
                    for codon, frequency in codons_frequencies.items()
105
                    if len(aa) == 1
106
                }
107

108
    def initialized_on_problem(self, problem, role):
1✔
109
        new_spec = self._copy_with_full_span_if_no_location(problem)
1✔
110
        new_spec.original_codons = new_spec.get_codons(problem)
1✔
111
        rca = new_spec.codon_usage_table["RCA"]
1✔
112
        rca_o = new_spec.original_codon_usage_table["RCA"]
1✔
113
        new_spec.smallest_possible_discrepancies = [
1✔
114
            min([abs(rca[c] - rca_o[c]) for c in self.codons_synonyms[codon]])
115
            for codon in new_spec.original_codons
116
        ]
117
        return new_spec
1✔
118

119
    def evaluate(self, problem):
1✔
120
        """Return the evaluation for mode==best_codon."""
121
        codons = self.get_codons(problem)
1✔
122

123
        if len(codons) == 1:
1✔
124
            # We are evaluating a single codon. Easy!
125
            codon = codons[0]
1✔
126
            original = self.original_codons[0]
1✔
127
            rca_codon = self.codon_usage_table["RCA"][codon]
1✔
128
            rca_original = self.original_codon_usage_table["RCA"][original]
1✔
129
            score = -abs(rca_codon - rca_original)
1✔
130
            return SpecEvaluation(
1✔
131
                self,
132
                problem,
133
                score=score,
134
                locations=[] if (score == 0) else [self.location],
135
                message="Codon harmonization on window %s scored %.02E"
136
                % (self.location, score),
137
            )
138
        # print (len(codons))
139
        rca_in_original_species = [
1✔
140
            self.original_codon_usage_table["RCA"][original_codon]
141
            for original_codon in self.original_codons
142
        ]
143
        rca_in_target_species = [
1✔
144
            self.codon_usage_table["RCA"][codon] for codon in codons
145
        ]
146
        discrepancies = abs(
1✔
147
            np.array(rca_in_original_species) - np.array(rca_in_target_species)
148
        )
149
        non_optimality = self.smallest_possible_discrepancies - discrepancies
1✔
150
        nonoptimal_indices = np.nonzero(non_optimality)[0]
1✔
151
        locations = self.codons_indices_to_locations(nonoptimal_indices)
1✔
152
        score = -discrepancies.sum()
1✔
153
        return SpecEvaluation(
1✔
154
            self,
155
            problem,
156
            score=score,
157
            locations=locations,
158
            message="Codon harmonization on %s scored %.02E" % (self.location, score),
159
        )
160

161
    def label_parameters(self):
1✔
162
        if self.species is None:
1✔
163
            return ["(custom table)"]
×
164
        else:
165
            return [self.original_species + " -> " + self.species]
1✔
166

167
    def short_label(self):
1✔
168
        result = "best-codon-optimize"
×
169
        if self.species is not None:
×
170
            result += " (%s)" % self.species
×
171
        return result
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc