• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Edinburgh-Genome-Foundry / DnaWeaver / 14138433357

28 Mar 2025 09:46PM UTC coverage: 81.455% (+0.1%) from 81.358%
14138433357

push

github

veghp
Set up automated documentation generation

1836 of 2254 relevant lines covered (81.46%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.25
/dnaweaver/DnaAssemblyMethod/GoldenGateAssemblyMethod.py
1
import itertools
1✔
2
from ..biotools import (
1✔
3
    reverse_complement,
4
    gc_content_to_tm,
5
    find_enzyme_sites,
6
    get_sequence_topology,
7
)
8

9
from ..tools import memoize
1✔
10
from ..SegmentSelector import TmSegmentSelector
1✔
11
from .OverlapingAssemblyMethod import OverlapingAssemblyMethod
1✔
12

13

14
class GoldenGateAssemblyMethod(OverlapingAssemblyMethod):
1✔
15
    """The Golden Gate Assembly Method.
16

17
    This method adds overhangs with a Type IIS REase site to the segments.
18

19
    Parameters
20
    ----------
21

22
    enzyme
23
      The enzyme used in the cloning.
24

25
    wildcard_basepair
26
      The base to use between the enzyme recognition site and the restriction site
27
      (overhang). For example "A" in ggtctcAgaag.
28

29
    left_addition
30
      An ATGC DNA sequence representing the left overhang, in 5' -3'.
31
      For practicality, this can contain "[BsaI]", "[BsmBI]", "[BbsI]", which
32
      will be replaced by their restriction sites.
33

34
    right_addition
35
      An ATGC DNA sequence representing the right overhang, in 5'-3'.
36
      Be careful, it has to be 5'-3' !!!
37
      If left to None, will be equal to left_overhang.
38

39
    refuse_sequences_with_enzyme_site
40
      If True, refuse sequences with this enzyme's recognition site.
41

42
    min_overhangs_gc
43
      Minimum GC content of overhangs, expressed as a fraction (between 0 and 1).
44

45
    max_overhangs_gc
46
      Maximum GC content of overhangs, expressed as a fraction (between 0 and 1).
47

48
    min_overhangs_differences
49
      Ensure that overhang sequences differ by at least that many nucleotides.
50
    """
51

52
    name = "Golden Gate Assembly"
1✔
53

54
    enzymes_dict = {
1✔
55
        "BsaI": "GGTCTC",
56
        "BsmBI": "CGTCTC",
57
        "BbsI": "GAAGAC",
58
        "SapI": "GCTCTTC",
59
    }
60

61
    def __init__(
1✔
62
        self,
63
        enzyme="BsaI",
64
        wildcard_basepair="A",
65
        left_addition="",
66
        right_addition="",
67
        refuse_sequences_with_enzyme_site=True,
68
        min_overhangs_gc=0,
69
        max_overhangs_gc=1,
70
        min_overhangs_differences=1,
71
        **props
72
    ):
73
        if enzyme not in self.enzymes_dict:
1✔
74
            raise ValueError("Enzyme should be one of %s" % self.enzymes_dict.keys())
×
75

76
        self.min_overhangs_gc = min_gc = min_overhangs_gc
1✔
77
        self.max_overhangs_gc = max_gc = max_overhangs_gc
1✔
78
        self.min_overhangs_differences = min_overhangs_differences
1✔
79

80
        self.enzyme = enzyme
1✔
81
        self.enzyme_site = self.enzymes_dict[enzyme]
1✔
82
        enzyme_site_plus_basepair = self.enzyme_site + wildcard_basepair
1✔
83
        self.left_addition = left_addition + enzyme_site_plus_basepair
1✔
84
        self.right_addition = (
1✔
85
            reverse_complement(enzyme_site_plus_basepair) + right_addition
86
        )
87
        self.refuse_sequences_with_enzyme_site = refuse_sequences_with_enzyme_site
1✔
88
        self.overhang_size = 3 if self.enzyme == "SapI" else 4
1✔
89

90
        overhang_selector = TmSegmentSelector(
1✔
91
            min_size=self.overhang_size,
92
            max_size=self.overhang_size,
93
            min_tm=gc_content_to_tm(self.overhang_size, min_gc),
94
            max_tm=gc_content_to_tm(self.overhang_size, max_gc),
95
            left_addition=self.left_addition,
96
            right_addition=self.right_addition,
97
        )
98
        OverlapingAssemblyMethod.__init__(self, overhang_selector, **props)
1✔
99

100
        # CUTS LOCATION CONSTRAINT BASED ON GC CONTENT
101

102
        if refuse_sequences_with_enzyme_site:
1✔
103

104
            def no_site_in_sequence(sequence):
1✔
105
                sites = find_enzyme_sites(sequence, enzyme_name=self.enzyme)
1✔
106
                return sites == []
1✔
107

108
            self.sequence_constraints.append(no_site_in_sequence)
1✔
109

110
        # DO NOT CUT AT PALINDROMIC REGIONS
111

112
        def no_cut_at_palyndromic_locations(sequence):
1✔
113
            def no_palyndrom_filter(i):
1✔
114
                s = overhang_selector.compute_segment_around_index(sequence, i)
1✔
115
                rev_s = reverse_complement(s)
1✔
116
                rev_diffs = len([a for a, b in zip(s, rev_s) if a != b])
1✔
117
                assert len(s) == self.overhang_size
1✔
118
                return rev_diffs >= self.min_overhangs_differences
1✔
119

120
            return no_palyndrom_filter
1✔
121

122
        self.cut_location_constraints.append(no_cut_at_palyndromic_locations)
1✔
123

124
        # CUTS SET CONSTRAINT: ALL OVERHANGS MUST BE COMPATIBLE
125

126
        def overhangs_are_compatible(o1, o2):
1✔
127
            diffs = len([a for a, b in zip(o1, o2) if a != b])
1✔
128
            if diffs >= self.min_overhangs_differences:
1✔
129
                rev_o2 = reverse_complement(o2)
1✔
130
                rev_diffs = len([a for a, b in zip(o1, rev_o2) if a != b])
1✔
131
                return rev_diffs >= self.min_overhangs_differences
1✔
132
            return False
1✔
133

134
        overhangs_are_compatible = memoize(overhangs_are_compatible)
1✔
135

136
        def all_overhangs_are_compatible(sequence):
1✔
137
            topology = get_sequence_topology(sequence, "linear")
1✔
138

139
            def constraint(cut_locations):
1✔
140
                cut_overhangs = {
1✔
141
                    cut_location: overhang_selector.compute_segment_around_index(
142
                        sequence, cut_location
143
                    )
144
                    for cut_location in cut_locations
145
                }
146
                cut_pairs = list(itertools.combinations(cut_locations, 2))
1✔
147
                if topology == "circular":
1✔
148
                    cut_pairs.remove((0, len(sequence)))
1✔
149

150
                return all(
1✔
151
                    [
152
                        overhangs_are_compatible(cut_overhangs[c1], cut_overhangs[c2])
153
                        for c1, c2 in cut_pairs
154
                    ]
155
                )
156

157
                # overhangs = sorted(
158
                #     [
159
                #         overhang_selector.compute_segment_around_index(
160
                #             sequence, cut_location
161
                #         )
162
                #         for cut_location in cut_locations
163
                #     ]
164
                # )
165
                # return all(
166
                #     [
167
                #         overhangs_are_compatible(o1, o2)
168
                #         for o1, o2 in itertools.combinations(overhangs, 2)
169
                #     ]
170
                # )
171

172
            return constraint
1✔
173

174
        self.cuts_set_constraints.append(all_overhangs_are_compatible)
1✔
175

176
    def additional_dict_description(self):
1✔
177
        return {
1✔
178
            "enzyme": self.enzyme,
179
            "left addition": self.left_addition,
180
            "right addition": self.right_addition,
181
            "refuse sequences with enzyme site": str(
182
                self.refuse_sequences_with_enzyme_site
183
            ),
184
            "overhangs gc content": "%d-%d%%"
185
            % (100 * self.min_overhangs_gc, 100 * self.max_overhangs_gc),
186
            "overhangs differences": self.min_overhangs_differences,
187
        }
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc