• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Edinburgh-Genome-Foundry / DnaChisel / 5190565251

pending completion
5190565251

push

github

veghp
Bump to v3.2.11

1 of 1 new or added line in 1 file covered. (100.0%)

2966 of 3299 relevant lines covered (89.91%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.57
/dnachisel/builtin_specifications/codon_optimization/MaximizeCAI.py
1
import numpy as np
1✔
2

3
from .BaseCodonOptimizationClass import BaseCodonOptimizationClass
1✔
4
from ...Specification.SpecEvaluation import SpecEvaluation
1✔
5

6

7
class MaximizeCAI(BaseCodonOptimizationClass):
1✔
8
    """Codon-optimize a coding sequence for a given species. Maximizes the CAI.
9

10
    To be precise, the score computed by this specification is N*log(CAI) where
11
    N is the number of codons. Maximizing this score also maximizes the CAI.
12

13
    Index (CAI). For a sequence with N codons, the CAI is the geometric mean
14
    of the Relative Codon Adaptiveness (RCA) of the different codons. The RCA
15
    of a codon is (f_i/fmax_i) were fi is the frequency of an oligo in the
16
    codon usage table, and fmax is the maximal frequency of the synonymous
17
    codons.
18

19
    So N*log(CAI) = sum_i ( log(f_i) - log(fmax_i) )
20

21
    This score is between -inf. and 0 (0 meaning a perfectly optimal sequence).
22

23
    Parameters
24
    ----------
25

26
    species
27
      Species for which the sequence will be codon-optimized.
28
      Either a TaxID (this requires a web connection as the corresponding table
29
      will be downloaded from the internet) or the name of the species to
30
      codon-optimize for (the name must be supported by ``python_codon_tables``
31
      e.g. ``e_coli``, ``s_cerevisiae``, ``h_sapiens``, ``c_elegans``,
32
      ``b_subtilis``, ``d_melanogaster``).
33
      Note that a ``codon_usage_table`` can be provided instead, or even in
34
      addition, for species whose codon usage table cannot be auto-imported.
35

36
    location
37
      Either a DnaChisel Location or a tuple of the form (start, end, strand)
38
      or just (start, end), with strand defaulting to +1, indicating the
39
      position of the gene to codon-optimize. If not provided, the whole
40
      sequence is considered as the gene. The location should have a length
41
      that is a multiple of 3. The location strand is either 1 if the gene is
42
      encoded on the (+) strand, or -1 for antisense.
43

44
    codon_usage_table
45
      A dict of the form ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}``
46
      giving the codon frequency table (relative usage of each codon;
47
      frequencies add up to 1, separately for each amino acid). Only
48
      provide if no ``species`` parameter was provided.
49

50
    boost
51
      Score multiplicator (=weight) for when the specification is used as an
52
      optimization objective alongside competing objectives.
53

54
    Examples
55
    --------
56

57
    >>> objective = MaximizeCAI(
58
    >>>     species = "E. coli",
59
    >>>     location = (150, 300), # coordinates of a gene
60
    >>>     strand = -1
61
    >>> )
62

63

64
    """
65

66
    shorthand_name = "use_best_codon"
1✔
67

68
    def __init__(
1✔
69
        self, species=None, location=None, codon_usage_table=None, boost=1.0
70
    ):
71
        BaseCodonOptimizationClass.__init__(
1✔
72
            self,
73
            species=species,
74
            location=location,
75
            codon_usage_table=codon_usage_table,
76
            boost=boost,
77
        )
78
        self.codons_translations = self.get_codons_translations()
1✔
79
        if "log_best_frequencies" not in self.codon_usage_table:
1✔
80
            self.codon_usage_table["log_best_frequencies"] = {
1✔
81
                aa: np.log(max(aa_data.values()))
82
                for aa, aa_data in self.codon_usage_table.items()
83
                if len(aa) == 1
84
            }
85
        if "log_codons_frequencies" not in self.codon_usage_table:
1✔
86
            self.codon_usage_table["log_codons_frequencies"] = {
1✔
87
                codon: np.log(frequency or 0.001)
88
                for aa, frequencies in self.codon_usage_table.items()
89
                for codon, frequency in frequencies.items()
90
                if len(aa) == 1
91
            }
92

93
    def evaluate(self, problem):
1✔
94
        """Evaluate!"""
95
        codons = self.get_codons(problem)
1✔
96
        ct = self.codons_translations
1✔
97
        if len(codons) == 1:
1✔
98
            # We are evaluating a single codon. Easy!
99
            codon = codons[0]
1✔
100
            freq = self.codon_usage_table["log_codons_frequencies"][codon]
1✔
101
            optimal = self.codon_usage_table["log_best_frequencies"][ct[codon]]
1✔
102
            score = freq - optimal
1✔
103
            return SpecEvaluation(
1✔
104
                self,
105
                problem,
106
                score=freq - optimal,
107
                locations=[] if (freq == optimal) else [self.location],
108
                message="Codon opt. on window %s scored %.02E"
109
                % (self.location, score),
110
            )
111
        current_usage = [
1✔
112
            self.codon_usage_table["log_codons_frequencies"][codon]
113
            for codon in codons
114
        ]
115
        optimal_usage = [
1✔
116
            self.codon_usage_table["log_best_frequencies"][ct[codon]]
117
            for codon in codons
118
        ]
119
        non_optimality = np.array(optimal_usage) - np.array(current_usage)
1✔
120
        nonoptimal_indices = np.nonzero(non_optimality)[0]
1✔
121
        locations = self.codons_indices_to_locations(nonoptimal_indices)
1✔
122
        score = -non_optimality.sum()
1✔
123
        return SpecEvaluation(
1✔
124
            self,
125
            problem,
126
            score=score,
127
            locations=locations,
128
            message="Codon opt. on window %s scored %.02E"
129
            % (self.location, score),
130
        )
131

132
    def label_parameters(self):
1✔
133
        return ["(custom table)" if self.species is None else self.species]
1✔
134

135
    def short_label(self):
1✔
136
        result = "best-codon-optimize"
×
137
        if self.species is not None:
×
138
            result += " (%s)" % self.species
×
139
        return result
×
140

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc