• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cogent3 / cogent3 / 16072458076

04 Jul 2025 11:11AM UTC coverage: 90.83% (+0.005%) from 90.825%
16072458076

push

github

web-flow
Merge pull request #2390 from GavinHuttley/develop

Finalise the migration to new types 🎉

4044 of 4283 new or added lines in 43 files covered. (94.42%)

44 existing lines in 11 files now uncovered.

30101 of 33140 relevant lines covered (90.83%)

5.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.0
/src/cogent3/parse/tinyseq.py
1
"""Parser for NCBI Tiny Seq XML format.
2
DOCTYPE TSeqSet PUBLIC "-//NCBI//NCBI TSeq/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.dtd"
3
"""
4

5
import io
6✔
6
import xml.dom.minidom
6✔
7

8
from cogent3.core import moltype as c3_moltype
6✔
9

10
"""
6✔
11
CAUTION:
12
This XML PARSER uses minidom. This means a bad performance for
13
big files (>5MB), and huge XML files will for sure crash the program!
14
"""
15

16

17
def TinyseqParser(doc):
6✔
18
    """Parser for NCBI Tiny Seq XML format.
19
    DOCTYPE TSeqSet PUBLIC "-//NCBI//NCBI TSeq/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.dtd"
20

21
    Parameters
22
    ----------
23
    doc
24
        An xml.dom.minidom.Document, file object of string
25

26
    Returns
27
    -------
28
    name, cogent sequence
29

30
    CAUTION:
31
    This XML PARSER uses minidom. This means a bad performance for
32
    big files (>5MB), and huge XML files will for sure crash the program!
33
    """
34
    if isinstance(doc, xml.dom.minidom.Document):
6✔
35
        dom_obj = doc
6✔
36
    elif isinstance(doc, io.IOBase):
6✔
37
        dom_obj = xml.dom.minidom.parse(doc)
×
38
    elif isinstance(doc, str):
6✔
39
        dom_obj = xml.dom.minidom.parseString(doc)
6✔
40
    else:
41
        raise TypeError
×
42
    for record in dom_obj.getElementsByTagName("TSeq"):
6✔
43
        raw_seq = (
6✔
44
            record.getElementsByTagName("TSeq_sequence")[0].childNodes[0].nodeValue
45
        )
46
        name = record.getElementsByTagName("TSeq_accver")[0].childNodes[0].nodeValue
6✔
47

48
        # cast as string to de-unicode
49
        raw_string = str(raw_seq).upper()
6✔
50
        name = str(name)
6✔
51

52
        if (
6✔
53
            record.getElementsByTagName("TSeq_seqtype")[0].getAttribute("value")
54
            == "protein"
55
        ):
NEW
56
            alphabet = c3_moltype.PROTEIN
×
57
        else:
58
            alphabet = c3_moltype.DNA
6✔
59

60
        seq = alphabet.make_seq(seq=raw_string, name=name)
6✔
61

62
        seq.add_feature(biotype="genbank_id", name=name, spans=[(0, len(seq))])
6✔
63

64
        organism = str(
6✔
65
            record.getElementsByTagName("TSeq_orgname")[0].childNodes[0].nodeValue,
66
        )
67

68
        seq.add_feature(biotype="organism", name=organism, spans=[(0, len(seq))])
6✔
69

70
        yield (name, seq)
6✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc