• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rmcar17 / cogent3 / 20125103811

10 Dec 2025 11:02PM UTC coverage: 90.654% (+0.02%) from 90.631%
20125103811

push

github

web-flow
Merge pull request #2545 from GavinHuttley/develop

TST: add explicit test for to_list() with empty table, relates to #721

28245 of 31157 relevant lines covered (90.65%)

5.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.15
/src/cogent3/parse/sequence.py
1
"""Delegator for sequence data format parsers."""
2

3
import abc
6✔
4
import functools
6✔
5
import os
6✔
6
import pathlib
6✔
7
import typing
6✔
8

9
from cogent3.parse import (
6✔
10
    clustal,
11
    fasta,
12
    gbseq,
13
    gcg,
14
    genbank,
15
    nexus,
16
    paml,
17
    phylip,
18
    tinyseq,
19
)
20
from cogent3.util.io import iter_splitlines
6✔
21

22
ParserOutputType = typing.Iterable[tuple[str, str] | dict]
6✔
23

24
SeqParserInputTypes = str | pathlib.Path | os.PathLike | tuple | list
6✔
25

26

27
class SequenceParserBase(abc.ABC):
6✔
28
    """Base class for sequence format parsers."""
29

30
    @property
6✔
31
    @abc.abstractmethod
6✔
32
    def name(self) -> str:
6✔
33
        """name of the format"""
34
        ...
35

36
    @property
6✔
37
    @abc.abstractmethod
6✔
38
    def supported_suffixes(self) -> set[str]:
6✔
39
        """Return list of file suffixes this parser supports"""
40
        ...
41

42
    @property
6✔
43
    def result_is_storage(self) -> bool:
6✔
44
        """True if the loader directly returns SeqsDataABC or AlignedSeqdDataABC"""
45
        return False
6✔
46

47
    @property
6✔
48
    def supports_unaligned(self) -> bool:
6✔
49
        """True if the loader supports unaligned sequences"""
50
        return True
×
51

52
    @property
6✔
53
    def supports_aligned(self) -> bool:
6✔
54
        """True if the loader supports aligned sequences"""
55
        return True
×
56

57
    @property
6✔
58
    @abc.abstractmethod
6✔
59
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
60
        """a callable for loading from a file"""
61
        ...
62

63

64
class LineBasedParser:
6✔
65
    """wrapper class to standardise input for line-based sequence format parsers"""
66

67
    def __init__(self, parser: typing.Callable[[typing.Any], ParserOutputType]) -> None:
6✔
68
        self._parse = parser
6✔
69

70
    @functools.singledispatchmethod
6✔
71
    def __call__(self, data, **kwargs: dict[str, typing.Any]) -> ParserOutputType:
6✔
72
        msg = f"Unsupported data type {type(data)}"
6✔
73
        raise TypeError(msg)
6✔
74

75
    @__call__.register
6✔
76
    def _(self, data: str, **kwargs: dict[str, typing.Any]) -> ParserOutputType:
6✔
77
        yield from self._parse(iter_splitlines(data), **kwargs)
6✔
78

79
    @__call__.register
6✔
80
    def _(
6✔
81
        self, data: pathlib.Path, **kwargs: dict[str, typing.Any]
82
    ) -> ParserOutputType:
83
        if not data.exists():
6✔
84
            msg = f"File '{data}' does not exist"
6✔
85
            raise FileNotFoundError(msg)
6✔
86
        yield from self._parse(iter_splitlines(data), **kwargs)
6✔
87

88
    @__call__.register
6✔
89
    def _(self, data: tuple, **kwargs: dict[str, typing.Any]) -> ParserOutputType:
6✔
90
        # we're assuming this is already split by lines
91
        yield from self._parse(data, **kwargs)
6✔
92

93
    @__call__.register
6✔
94
    def _(self, data: list, **kwargs: dict[str, typing.Any]) -> ParserOutputType:
6✔
95
        # we're assuming this is already split by lines
96
        yield from self._parse(data, **kwargs)
6✔
97

98

99
PARSERS = {
6✔
100
    "phylip": LineBasedParser(phylip.MinimalPhylipParser),
101
    "phy": LineBasedParser(phylip.MinimalPhylipParser),
102
    "paml": LineBasedParser(paml.PamlParser),
103
    "fasta": fasta.iter_fasta_records,
104
    "mfa": fasta.iter_fasta_records,
105
    "fa": fasta.iter_fasta_records,
106
    "faa": fasta.iter_fasta_records,
107
    "fna": fasta.iter_fasta_records,
108
    "gde": LineBasedParser(fasta.MinimalGdeParser),
109
    "aln": LineBasedParser(clustal.ClustalParser),
110
    "clustal": LineBasedParser(clustal.ClustalParser),
111
    "gb": genbank.rich_parser,
112
    "gbk": genbank.rich_parser,
113
    "gbff": genbank.rich_parser,
114
    "genbank": genbank.rich_parser,
115
    "msf": LineBasedParser(gcg.MsfParser),
116
    "nex": LineBasedParser(nexus.MinimalNexusAlignParser),
117
    "nxs": LineBasedParser(nexus.MinimalNexusAlignParser),
118
    "nexus": LineBasedParser(nexus.MinimalNexusAlignParser),
119
}
120

121

122
class FastaParser(SequenceParserBase):
6✔
123
    """Parser for FASTA format sequence files."""
124

125
    @property
6✔
126
    def name(self) -> str:
6✔
127
        return "fasta"
6✔
128

129
    @property
6✔
130
    def supported_suffixes(self) -> set[str]:
6✔
131
        return {"fasta", "fa", "fna", "faa", "mfa"}
6✔
132

133
    @property
6✔
134
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
135
        return fasta.iter_fasta_records
6✔
136

137

138
class GdeParser(SequenceParserBase):
6✔
139
    """Parser for GDE format sequence files."""
140

141
    @property
6✔
142
    def name(self) -> str:
6✔
143
        return "gde"
6✔
144

145
    @property
6✔
146
    def supported_suffixes(self) -> set[str]:
6✔
147
        return {"gde"}
6✔
148

149
    @property
6✔
150
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
151
        return LineBasedParser(fasta.MinimalGdeParser)
×
152

153

154
class PhylipParser(SequenceParserBase):
6✔
155
    """Parser for PHYLIP format sequence files."""
156

157
    @property
6✔
158
    def name(self) -> str:
6✔
159
        return "phylip"
6✔
160

161
    @property
6✔
162
    def supported_suffixes(self) -> set[str]:
6✔
163
        return {"phylip", "phy"}
6✔
164

165
    @property
6✔
166
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
167
        return LineBasedParser(phylip.MinimalPhylipParser)
6✔
168

169

170
class ClustalParser(SequenceParserBase):
6✔
171
    """Parser for Clustal format sequence files."""
172

173
    @property
6✔
174
    def name(self) -> str:
6✔
175
        return "clustal"
6✔
176

177
    @property
6✔
178
    def supported_suffixes(self) -> set[str]:
6✔
179
        return {"clustal", "aln"}
6✔
180

181
    @property
6✔
182
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
183
        return LineBasedParser(clustal.ClustalParser)
×
184

185

186
class PamlParser(SequenceParserBase):
6✔
187
    """Parser for PAML format sequence files."""
188

189
    @property
6✔
190
    def name(self) -> str:
6✔
191
        return "paml"
6✔
192

193
    @property
6✔
194
    def supported_suffixes(self) -> set[str]:
6✔
195
        return {"paml"}
6✔
196

197
    @property
6✔
198
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
199
        return LineBasedParser(paml.PamlParser)
6✔
200

201

202
class NexusParser(SequenceParserBase):
6✔
203
    """Parser for Nexus format sequence files."""
204

205
    @property
6✔
206
    def name(self) -> str:
6✔
207
        return "nexus"
6✔
208

209
    @property
6✔
210
    def supported_suffixes(self) -> set[str]:
6✔
211
        return {"nexus", "nex", "nxs"}
6✔
212

213
    @property
6✔
214
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
215
        return LineBasedParser(nexus.MinimalNexusAlignParser)
6✔
216

217

218
class GenbankParser(SequenceParserBase):
6✔
219
    """Parser for GenBank format sequence files."""
220

221
    @property
6✔
222
    def name(self) -> str:
6✔
223
        return "genbank"
6✔
224

225
    @property
6✔
226
    def supported_suffixes(self) -> set[str]:
6✔
227
        return {"gb", "gbk", "gbff", "genbank"}
6✔
228

229
    @property
6✔
230
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
231
        return genbank.rich_parser
×
232

233

234
class MsfParser(SequenceParserBase):
6✔
235
    """Parser for MSF format sequence files."""
236

237
    @property
6✔
238
    def name(self) -> str:
6✔
239
        return "msf"
6✔
240

241
    @property
6✔
242
    def supported_suffixes(self) -> set[str]:
6✔
243
        return {"msf"}
6✔
244

245
    @property
6✔
246
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
247
        return LineBasedParser(gcg.MsfParser)
×
248

249

250
class TinyseqParser(SequenceParserBase):
6✔
251
    """Parser for Tinyseq format sequence files."""
252

253
    @property
6✔
254
    def name(self) -> str:
6✔
255
        return "tinyseq"
6✔
256

257
    @property
6✔
258
    def supported_suffixes(self) -> set[str]:
6✔
259
        return {"tinyseq"}
6✔
260

261
    @property
6✔
262
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
263
        return LineBasedParser(tinyseq.TinyseqParser)
×
264

265

266
class GbSeqParser(SequenceParserBase):
6✔
267
    """Parser for GbSeq format sequence files."""
268

269
    @property
6✔
270
    def name(self) -> str:
6✔
271
        return "gbseq"
6✔
272

273
    @property
6✔
274
    def supported_suffixes(self) -> set[str]:
6✔
275
        return {"gbseq"}
6✔
276

277
    @property
6✔
278
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
279
        return gbseq.GbSeqXmlParser
×
280

281

282
XML_PARSERS = {"gbseq": gbseq.GbSeqXmlParser, "tseq": tinyseq.TinyseqParser}
6✔
283

284

285
def get_parser(fmt: str) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
286
    """returns a sequence format parser"""
287
    try:
6✔
288
        return PARSERS[fmt]
6✔
289
    except KeyError:
6✔
290
        msg = f"Unsupported format {fmt!r}"
6✔
291
        raise ValueError(msg)
6✔
292

293

294
def is_genbank(fmt: str | None) -> bool:
6✔
295
    """whether the provided format is a genbank format"""
296
    if fmt is None:
6✔
297
        return False
6✔
298
    try:
6✔
299
        return get_parser(fmt).__module__.endswith("genbank")
6✔
300
    except ValueError:
6✔
301
        return False
6✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc