• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rmcar17 / cogent3 / 17756080244

15 Sep 2025 11:25PM UTC coverage: 90.658% (+0.2%) from 90.467%
17756080244

push

github

web-flow
Merge pull request #2466 from cogent3/dependabot/pip/ruff-0.13.0

Bump ruff from 0.12.12 to 0.13.0

28210 of 31117 relevant lines covered (90.66%)

5.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.06
/src/cogent3/parse/sequence.py
1
"""Delegator for sequence data format parsers."""
2

3
import abc
6✔
4
import functools
6✔
5
import pathlib
6✔
6
import typing
6✔
7

8
from cogent3.parse import (
6✔
9
    clustal,
10
    fasta,
11
    gbseq,
12
    gcg,
13
    genbank,
14
    nexus,
15
    paml,
16
    phylip,
17
    tinyseq,
18
)
19
from cogent3.util.io import iter_splitlines
6✔
20

21
ParserOutputType = typing.Iterable[tuple[str, str] | dict]
6✔
22

23
SeqParserInputTypes = str | pathlib.Path | tuple | list
6✔
24

25

26
class SequenceParserBase(abc.ABC):
6✔
27
    """Base class for sequence format parsers."""
28

29
    @property
6✔
30
    @abc.abstractmethod
6✔
31
    def name(self) -> str:
6✔
32
        """name of the format"""
33
        ...
34

35
    @property
6✔
36
    @abc.abstractmethod
6✔
37
    def supported_suffixes(self) -> set[str]:
6✔
38
        """Return list of file suffixes this parser supports"""
39
        ...
40

41
    @property
6✔
42
    def result_is_storage(self) -> bool:
6✔
43
        """True if the loader directly returns SeqsDataABC or AlignedSeqdDataABC"""
44
        return False
6✔
45

46
    @property
6✔
47
    def supports_unaligned(self) -> bool:
6✔
48
        """True if the loader supports unaligned sequences"""
49
        return True
×
50

51
    @property
6✔
52
    def supports_aligned(self) -> bool:
6✔
53
        """True if the loader supports aligned sequences"""
54
        return True
×
55

56
    @property
6✔
57
    @abc.abstractmethod
6✔
58
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
59
        """a callable for loading from a file"""
60
        ...
61

62

63
class LineBasedParser:
6✔
64
    """wrapper class to standardise input for line-based sequence format parsers"""
65

66
    def __init__(self, parser: typing.Callable[[typing.Any], ParserOutputType]) -> None:
6✔
67
        self._parse = parser
6✔
68

69
    @functools.singledispatchmethod
6✔
70
    def __call__(self, data, **kwargs: dict[str, typing.Any]) -> ParserOutputType:
6✔
71
        msg = f"Unsupported data type {type(data)}"
6✔
72
        raise TypeError(msg)
6✔
73

74
    @__call__.register
6✔
75
    def _(self, data: str, **kwargs: dict[str, typing.Any]) -> ParserOutputType:
6✔
76
        yield from self._parse(iter_splitlines(data), **kwargs)
6✔
77

78
    @__call__.register
6✔
79
    def _(
6✔
80
        self, data: pathlib.Path, **kwargs: dict[str, typing.Any]
81
    ) -> ParserOutputType:
82
        if not data.exists():
6✔
83
            msg = f"File '{data}' does not exist"
6✔
84
            raise FileNotFoundError(msg)
6✔
85
        yield from self._parse(iter_splitlines(data), **kwargs)
6✔
86

87
    @__call__.register
6✔
88
    def _(self, data: tuple, **kwargs: dict[str, typing.Any]) -> ParserOutputType:
6✔
89
        # we're assuming this is already split by lines
90
        yield from self._parse(data, **kwargs)
6✔
91

92
    @__call__.register
6✔
93
    def _(self, data: list, **kwargs: dict[str, typing.Any]) -> ParserOutputType:
6✔
94
        # we're assuming this is already split by lines
95
        yield from self._parse(data, **kwargs)
6✔
96

97

98
PARSERS = {
6✔
99
    "phylip": LineBasedParser(phylip.MinimalPhylipParser),
100
    "phy": LineBasedParser(phylip.MinimalPhylipParser),
101
    "paml": LineBasedParser(paml.PamlParser),
102
    "fasta": fasta.iter_fasta_records,
103
    "mfa": fasta.iter_fasta_records,
104
    "fa": fasta.iter_fasta_records,
105
    "faa": fasta.iter_fasta_records,
106
    "fna": fasta.iter_fasta_records,
107
    "gde": LineBasedParser(fasta.MinimalGdeParser),
108
    "aln": LineBasedParser(clustal.ClustalParser),
109
    "clustal": LineBasedParser(clustal.ClustalParser),
110
    "gb": genbank.rich_parser,
111
    "gbk": genbank.rich_parser,
112
    "gbff": genbank.rich_parser,
113
    "genbank": genbank.rich_parser,
114
    "msf": LineBasedParser(gcg.MsfParser),
115
    "nex": LineBasedParser(nexus.MinimalNexusAlignParser),
116
    "nxs": LineBasedParser(nexus.MinimalNexusAlignParser),
117
    "nexus": LineBasedParser(nexus.MinimalNexusAlignParser),
118
}
119

120

121
class FastaParser(SequenceParserBase):
6✔
122
    """Parser for FASTA format sequence files."""
123

124
    @property
6✔
125
    def name(self) -> str:
6✔
126
        return "fasta"
6✔
127

128
    @property
6✔
129
    def supported_suffixes(self) -> set[str]:
6✔
130
        return {"fasta", "fa", "fna", "faa", "mfa"}
6✔
131

132
    @property
6✔
133
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
134
        return fasta.iter_fasta_records
6✔
135

136

137
class GdeParser(SequenceParserBase):
6✔
138
    """Parser for GDE format sequence files."""
139

140
    @property
6✔
141
    def name(self) -> str:
6✔
142
        return "gde"
6✔
143

144
    @property
6✔
145
    def supported_suffixes(self) -> set[str]:
6✔
146
        return {"gde"}
6✔
147

148
    @property
6✔
149
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
150
        return LineBasedParser(fasta.MinimalGdeParser)
×
151

152

153
class PhylipParser(SequenceParserBase):
6✔
154
    """Parser for PHYLIP format sequence files."""
155

156
    @property
6✔
157
    def name(self) -> str:
6✔
158
        return "phylip"
6✔
159

160
    @property
6✔
161
    def supported_suffixes(self) -> set[str]:
6✔
162
        return {"phylip", "phy"}
6✔
163

164
    @property
6✔
165
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
166
        return LineBasedParser(phylip.MinimalPhylipParser)
6✔
167

168

169
class ClustalParser(SequenceParserBase):
6✔
170
    """Parser for Clustal format sequence files."""
171

172
    @property
6✔
173
    def name(self) -> str:
6✔
174
        return "clustal"
6✔
175

176
    @property
6✔
177
    def supported_suffixes(self) -> set[str]:
6✔
178
        return {"clustal", "aln"}
6✔
179

180
    @property
6✔
181
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
182
        return LineBasedParser(clustal.ClustalParser)
×
183

184

185
class PamlParser(SequenceParserBase):
6✔
186
    """Parser for PAML format sequence files."""
187

188
    @property
6✔
189
    def name(self) -> str:
6✔
190
        return "paml"
6✔
191

192
    @property
6✔
193
    def supported_suffixes(self) -> set[str]:
6✔
194
        return {"paml"}
6✔
195

196
    @property
6✔
197
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
198
        return LineBasedParser(paml.PamlParser)
6✔
199

200

201
class NexusParser(SequenceParserBase):
6✔
202
    """Parser for Nexus format sequence files."""
203

204
    @property
6✔
205
    def name(self) -> str:
6✔
206
        return "nexus"
6✔
207

208
    @property
6✔
209
    def supported_suffixes(self) -> set[str]:
6✔
210
        return {"nexus", "nex", "nxs"}
6✔
211

212
    @property
6✔
213
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
214
        return LineBasedParser(nexus.MinimalNexusAlignParser)
6✔
215

216

217
class GenbankParser(SequenceParserBase):
6✔
218
    """Parser for GenBank format sequence files."""
219

220
    @property
6✔
221
    def name(self) -> str:
6✔
222
        return "genbank"
6✔
223

224
    @property
6✔
225
    def supported_suffixes(self) -> set[str]:
6✔
226
        return {"gb", "gbk", "gbff", "genbank"}
6✔
227

228
    @property
6✔
229
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
230
        return genbank.rich_parser
×
231

232

233
class MsfParser(SequenceParserBase):
6✔
234
    """Parser for MSF format sequence files."""
235

236
    @property
6✔
237
    def name(self) -> str:
6✔
238
        return "msf"
6✔
239

240
    @property
6✔
241
    def supported_suffixes(self) -> set[str]:
6✔
242
        return {"msf"}
6✔
243

244
    @property
6✔
245
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
246
        return LineBasedParser(gcg.MsfParser)
×
247

248

249
class TinyseqParser(SequenceParserBase):
6✔
250
    """Parser for Tinyseq format sequence files."""
251

252
    @property
6✔
253
    def name(self) -> str:
6✔
254
        return "tinyseq"
6✔
255

256
    @property
6✔
257
    def supported_suffixes(self) -> set[str]:
6✔
258
        return {"tinyseq"}
6✔
259

260
    @property
6✔
261
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
262
        return LineBasedParser(tinyseq.TinyseqParser)
×
263

264

265
class GbSeqParser(SequenceParserBase):
6✔
266
    """Parser for GbSeq format sequence files."""
267

268
    @property
6✔
269
    def name(self) -> str:
6✔
270
        return "gbseq"
6✔
271

272
    @property
6✔
273
    def supported_suffixes(self) -> set[str]:
6✔
274
        return {"gbseq"}
6✔
275

276
    @property
6✔
277
    def loader(self) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
278
        return gbseq.GbSeqXmlParser
×
279

280

281
XML_PARSERS = {"gbseq": gbseq.GbSeqXmlParser, "tseq": tinyseq.TinyseqParser}
6✔
282

283

284
def get_parser(fmt: str) -> typing.Callable[[SeqParserInputTypes], ParserOutputType]:
6✔
285
    """returns a sequence format parser"""
286
    try:
6✔
287
        return PARSERS[fmt]
6✔
288
    except KeyError:
6✔
289
        msg = f"Unsupported format {fmt!r}"
6✔
290
        raise ValueError(msg)
6✔
291

292

293
def is_genbank(fmt: str) -> bool:
6✔
294
    """whether the provided format is a genbank format"""
295
    try:
6✔
296
        return get_parser(fmt).__module__.endswith("genbank")
6✔
297
    except ValueError:
6✔
298
        return False
6✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc