• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Edinburgh-Genome-Foundry / DnaCauldron / 14247436103

03 Apr 2025 03:55PM UTC coverage: 87.595% (-1.0%) from 88.623%
14247436103

push

github

veghp
Update docs generation

2182 of 2491 relevant lines covered (87.6%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.68
/dnacauldron/SequenceRepository.py
1
from .biotools import (
1✔
2
    load_records_from_files,
3
    set_record_topology,
4
    sequence_to_biopython_record,
5
)
6
from fuzzywuzzy import process
1✔
7

8

9
class NotInRepositoryError(Exception):
1✔
10
    def __init__(self, parts, repository):
1✔
11
        self.parts = parts
1✔
12
        self.repository = repository
1✔
13

14
        # CREATE THE MESSAGE AND INITIALIZE THE EXCEPTION:
15

16
        suggestions = [
1✔
17
            self.create_part_suggestion_string(part_name) for part_name in parts
18
        ]
19
        suggestions = ", ".join(suggestions)
1✔
20
        message = "Parts not found in %s: %s" % (repository.name, suggestions)
1✔
21
        super().__init__(message)
1✔
22

23
    def create_part_suggestion_string(self, part_name):
1✔
24
        suggestions = self.repository.suggest_part_names(part_name)
1✔
25
        if len(suggestions) == 0:
1✔
26
            return part_name
1✔
27
        return "%s (did you mean %s ?)" % (part_name, " or ".join(suggestions))
×
28

29

30
class RepositoryDuplicateError(Exception):
1✔
31
    def __init__(self, parts, repository):
1✔
32
        self.parts = parts
1✔
33
        self.repository = repository
1✔
34
        parts_list = ", ".join(parts)
1✔
35
        if len(parts_list) > 150:
1✔
36
            parts_list = parts_list[:150] + "..."
×
37
        parts = "Part ID%s %s" % ("s" if len(parts) > 1 else "", parts_list)
1✔
38
        repo_name = (" in " + repository.name) if repository.name else ""
1✔
39
        message = parts + " duplicated in " + repo_name
1✔
40
        super().__init__(message)
1✔
41

42

43
class SequenceRepository:
1✔
44
    """Sequence repositories store and provide sequence records.
45

46
    The records are organized into collections, for instance "parts" to host
47
    parts, "constructs" for records created during assembly plan simulation,
48
    or any other collection name like "emma_connectors" to store EMMA
49
    connectors.
50

51
    The suggested initialization of a sequence repository is:
52

53
    >>> repository = SequenceRepository()
54
    >>> repository.import_records(files=['part.fa', 'records.zip', etc.])
55

56

57

58
    Parameters
59
    ----------
60

61
    collections
62
      A dict {'collection_name': {'record_id': record, ...}, ...} giving for
63
      each collection a dict of Biopython records.
64

65
    name
66
      The name of the repository as it may appear in error messages and other
67
      reports.
68
    """
69

70
    def __init__(self, collections=None, name="repo"):
1✔
71
        self.collections = collections or {}
1✔
72
        self.name = name
1✔
73

74
    def add_record(self, record, collection="parts"):
1✔
75
        """Add one record to a collection, using its record.id as key.
76

77
        The collection is created if it doesn't exist.
78

79
        The record can also be a pair (id, "ATGTGCC...").
80
        """
81
        if isinstance(record, (tuple, list)):
1✔
82
            _id, _sequence = record
1✔
83
            record = sequence_to_biopython_record(_sequence, id=_id)
1✔
84
        if self.contains_record(record.id):
1✔
85
            raise RepositoryDuplicateError([record.id], repository=self)
1✔
86
        if collection not in self.collections:
1✔
87
            self.collections[collection] = {}
1✔
88
        self.collections[collection][record.id] = record
1✔
89

90
    def add_records(self, records, collection="parts"):
1✔
91
        """Add"""
92

93
        if len(records) == 0:
1✔
94
            return
×
95
        for record in records:
1✔
96
            self.add_record(record, collection=collection)
1✔
97

98
    def contains_record(self, record_id):
1✔
99
        """Return whether the repo has a record corresponding to the given id"""
100
        collections = self.collections.values()
1✔
101
        return any(record_id in collection for collection in collections)
1✔
102

103
    def get_record(self, record_id):
1✔
104
        """Return the record from the repository from its ID."""
105
        for collection in self.collections.values():
1✔
106
            if record_id in collection:
1✔
107
                return collection[record_id]
1✔
108
        raise NotInRepositoryError([record_id], self)
×
109

110
    def get_records(self, record_ids):
1✔
111
        """Get a list of records from a list of record IDs."""
112
        records = []
1✔
113
        not_in_repository = []
1✔
114
        for name in record_ids:
1✔
115
            if self.contains_record(name):
1✔
116
                records.append(self.get_record(name))
1✔
117
            else:
118
                not_in_repository.append(name)
1✔
119
        if len(not_in_repository):
1✔
120
            raise NotInRepositoryError(not_in_repository, repository=self)
1✔
121
        return records
1✔
122

123
    def import_records(
1✔
124
        self,
125
        files=None,
126
        folder=None,
127
        collection="parts",
128
        use_file_names_as_ids=True,
129
        topology="default_to_linear",
130
    ):
131
        """Import records into the repository, from files and zips and folders.
132

133
        Parameters
134
        ----------
135

136
        files
137
          A list of file paths, either Genbank, Fasta, Snapgene (.dna), or zips
138
          containing any of these formats.
139

140
        folder
141
          Path to a folder which can be provided instead of ``files``.
142

143
        collection
144
          Name of the collection under which to import the new records.
145

146
        use_file_names_as_ids
147
          If True, the file name will be used as ID for any record obtained
148
          from a single-record file (fasta files with many records will still
149
          use the internal ID).
150

151
        topology
152
          Can be "circular", "linear", "default_to_circular" (will default
153
          to circular if ``annotations['topology']`` is not already set) or
154
          "default_to_linear".
155
        """
156
        if folder is not None:
1✔
157
            records = load_records_from_files(
1✔
158
                folder=folder, use_file_names_as_ids=use_file_names_as_ids
159
            )
160
        elif files is not None:
1✔
161
            records = load_records_from_files(
1✔
162
                files=files,
163
                use_file_names_as_ids=use_file_names_as_ids,
164
            )
165
        else:
166
            raise ValueError("Provide either ``files`` or ``folder``")
×
167
        for r in records:
1✔
168
            set_record_topology(r, topology)
1✔
169

170
        self.add_records(records, collection=collection)
1✔
171

172
    def get_part_names_by_collection(self, format="dict"):
1✔
173
        """Return a dictionnary or a string representing the repo's content.
174

175
        Format: "dict" or "string"
176
        """
177
        result = {
1✔
178
            collection_name: list(parts.keys())
179
            for collection_name, parts in self.collections.items()
180
        }
181
        if format == "dict":
1✔
182
            return result
×
183
        else:
184
            return "\n".join(
1✔
185
                "\n".join([name] + ["- " + part for part in sorted(parts)])
186
                for name, parts in result.items()
187
            )
188

189
    def get_all_part_names(self):
1✔
190
        """Return the list of all part names"""
191
        parts = [
1✔
192
            part for collection in self.collections.values() for part in collection
193
        ]
194
        return sorted(parts)
1✔
195

196
    def suggest_part_names(self, query, cutoff=90, limit=3):
1✔
197
        """Suggest part names in the repo close to the given query."""
198
        search = process.extract(query, self.get_all_part_names())
1✔
199
        return [
1✔
200
            name
201
            for (name, score) in sorted(search, key=lambda e: -e[1])
202
            if score >= cutoff
203
        ][:limit]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc