14177235573

Committed 31 Mar 2025 05:07PM UTC coverage: 96.97% (+0.03%) from 96.939%

Build # 14177235573

Build Type

push

github

Committed by

veghp

Commit Message

Update build

Run Details

192 of 198 relevant lines covered (96.97%)

0.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.92

/snapgene_reader/snapgene_reader.py

"""
snapgene reader main file
"""

import struct

# import json
import xmltodict

from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

try:
    # Biopython <1.78
    from Bio.Alphabet import DNAAlphabet

    has_dna_alphabet = True
except ImportError:
    # Biopython >=1.78
    has_dna_alphabet = False
from Bio.SeqFeature import SeqFeature, FeatureLocation
import html2text

HTML_PARSER = html2text.HTML2Text()
HTML_PARSER.ignore_emphasis = True
HTML_PARSER.ignore_links = True
HTML_PARSER.body_width = 0
HTML_PARSER.single_line_break = True


def parse(val):
    """Parse html."""
    if isinstance(val, str):
        return HTML_PARSER.handle(val).strip().replace("\n", " ").replace('"', "'")
    else:
        return val


def parse_dict(obj):
    """Parse dict in the obj."""
    if isinstance(obj, dict):
        for key in obj:
            if isinstance(obj[key], str):
                obj[key] = parse(obj[key])
            elif isinstance(obj[key], dict):
                parse_dict(obj[key])
    return obj


def snapgene_file_to_dict(filepath=None, fileobject=None):
    """Return a dictionary containing the data from a ``*.dna`` file.

    Parameters
    ----------
    filepath
        Path to a .dna file created with SnapGene.

    fileobject
        On object-like pointing to the data of a .dna file created with
        SnapGene.
    """
    if filepath is not None:
        fileobject = open(filepath, "rb")

    if fileobject.read(1) != b"\t":
        raise ValueError("Wrong format for a SnapGene file!")

    def unpack(size, mode):
        """Unpack the fileobject."""
        return struct.unpack(">" + mode, fileobject.read(size))[0]

    # READ THE DOCUMENT PROPERTIES
    length = unpack(4, "I")
    title = fileobject.read(8).decode("ascii")
    if length != 14 or title != "SnapGene":
        raise ValueError("Wrong format for a SnapGene file !")

    data = dict(
        isDNA=unpack(2, "H"),
        exportVersion=unpack(2, "H"),
        importVersion=unpack(2, "H"),
        features=[],
    )

    while True:
        # READ THE WHOLE FILE, BLOCK BY BLOCK, UNTIL THE END
        next_byte = fileobject.read(1)

        # next_byte table
        # 0: dna sequence
        # 1: compressed DNA
        # 2: unknown
        # 3: unknown
        # 5: primers
        # 6: notes
        # 7: history tree
        # 8: additional sequence properties segment
        # 9: file Description
        # 10: features
        # 11: history node
        # 13: unknown
        # 16: alignable sequence
        # 17: alignable sequence
        # 18: sequence trace
        # 19: Uracil Positions
        # 20: custom DNA colors

        if next_byte == b"":
            # END OF FILE
            break

        block_size = unpack(4, "I")

        if ord(next_byte) == 0:
            # READ THE SEQUENCE AND ITS PROPERTIES
            props = unpack(1, "b")
            data["dna"] = dict(
                topology="circular" if props & 0x01 else "linear",
                strandedness="double" if props & 0x02 > 0 else "single",
                damMethylated=props & 0x04 > 0,
                dcmMethylated=props & 0x08 > 0,
                ecoKIMethylated=props & 0x10 > 0,
                length=block_size - 1,
            )
            data["seq"] = fileobject.read(block_size - 1).decode("ascii")

        elif ord(next_byte) == 6:
            # READ THE NOTES
            block_content = fileobject.read(block_size).decode("utf-8")
            note_data = parse_dict(xmltodict.parse(block_content))
            data["notes"] = note_data["Notes"]

        elif ord(next_byte) == 10:
            # READ THE FEATURES
            strand_dict = {"0": ".", "1": "+", "2": "-", "3": "="}
            format_dict = {"@text": parse, "@int": int}
            features_data = xmltodict.parse(fileobject.read(block_size))
            features = features_data["Features"]["Feature"]
            if not isinstance(features, list):
                features = [features]
            for feature in features:
                segments = feature["Segment"]
                if not isinstance(segments, list):
                    segments = [segments]
                segments_ranges = [
                    sorted([int(e) for e in segment["@range"].split("-")])
                    for segment in segments
                ]
                qualifiers = feature.get("Q", [])
                if not isinstance(qualifiers, list):
                    qualifiers = [qualifiers]
                parsed_qualifiers = {}
                for qualifier in qualifiers:
                    if qualifier["V"] is None:
                        pass
                    elif isinstance(qualifier["V"], list):
                        if len(qualifier["V"][0].items()) == 1:
                            parsed_qualifiers[qualifier["@name"]] = l_v = []
                            for e_v in qualifier["V"]:
                                fmt, value = e_v.popitem()
                                fmt = format_dict.get(fmt, parse)
                                l_v.append(fmt(value))
                        else:
                            parsed_qualifiers[qualifier["@name"]] = d_v = {}
                            for e_v in qualifier["V"]:
                                (fmt1, value1), (_, value2) = e_v.items()
                                fmt = format_dict.get(fmt1, parse)
                                d_v[value2] = fmt(value1)
                    else:
                        fmt, value = qualifier["V"].popitem()
                        fmt = format_dict.get(fmt, parse)
                        parsed_qualifiers[qualifier["@name"]] = fmt(value)

                if "label" not in parsed_qualifiers:
                    parsed_qualifiers["label"] = feature["@name"]
                if "note" not in parsed_qualifiers:
                    parsed_qualifiers["note"] = []
                if not isinstance(parsed_qualifiers["note"], list):
                    parsed_qualifiers["note"] = [parsed_qualifiers["note"]]
                color = segments[0]["@color"]
                parsed_qualifiers["note"].append("color: " + color)

                data["features"].append(
                    dict(
                        start=min([start - 1 for (start, end) in segments_ranges]),
                        end=max([end for (start, end) in segments_ranges]),
                        strand=strand_dict[feature.get("@directionality", "0")],
                        type=feature["@type"],
                        name=feature["@name"],
                        color=segments[0]["@color"],
                        textColor="black",
                        segments=segments,
                        row=0,
                        isOrf=False,
                        qualifiers=parsed_qualifiers,
                    )
                )

        else:
            # WE IGNORE THE WHOLE BLOCK
            fileobject.read(block_size)
            pass

    fileobject.close()
    return data


def snapgene_file_to_seqrecord(filepath=None, fileobject=None):
    """Return a BioPython SeqRecord from the data of a ``*.dna`` file.

    Parameters
    ----------
    filepath
        Path to a .dna file created with SnapGene.

    fileobject
        On object-like pointing to the data of a .dna file created with
        SnapGene.
    """
    data = snapgene_file_to_dict(filepath=filepath, fileobject=fileobject)
    strand_dict = {"+": 1, "-": -1, ".": 0, "=": 0}

    if has_dna_alphabet:
        seq = Seq(data["seq"], alphabet=DNAAlphabet())
    else:
        seq = Seq(data["seq"])

    seqrecord = SeqRecord(
        seq=seq,
        features=[
            SeqFeature(
                location=FeatureLocation(
                    start=feature["start"],
                    end=feature["end"],
                    strand=strand_dict[feature["strand"]],
                ),
                type=feature["type"],
                qualifiers=feature["qualifiers"],
            )
            for feature in data["features"]
        ],
        annotations=dict(topology=data["dna"]["topology"], **data["notes"]),
    )

    seqrecord.annotations["molecule_type"] = "DNA"

    return seqrecord


def snapgene_file_to_gbk(read_file_object, write_file_object):
    """Convert a file object."""

    def analyse_gs(dic, *args, **kwargs):
        """Extract gs block in the document."""
        if "default" not in kwargs:
            kwargs["default"] = None

        for arg in args:
            if arg in dic:
                dic = dic[arg]
            else:
                return kwargs["default"]
        return dic

    data = snapgene_file_to_dict(fileobject=read_file_object)
    wfo = write_file_object
    wfo.write(
        (
            "LOCUS       Exported              {0:>6} bp ds-DNA     {1:>8} SYN \
15-APR-2012\n"
        ).format(len(data["seq"]), data["dna"]["topology"])
    )
    definition = analyse_gs(data, "notes", "Description", default=".").replace(
        "\n", "\n            "
    )
    wfo.write("DEFINITION  {}\n".format(definition))
    wfo.write("ACCESSION   .\n")
    wfo.write("VERSION     .\n")
    wfo.write(
        "KEYWORDS    {}\n".format(
            analyse_gs(data, "notes", "CustomMapLabel", default=".")
        )
    )
    wfo.write("SOURCE      .\n")
    wfo.write("  ORGANISM  .\n")

    references = analyse_gs(data, "notes", "References")

    reference_count = 0
    if references:
        for key in references:
            reference_count += 1
            ref = references[key]
            wfo.write(
                "REFERENCE   {}  (bases 1 to {} )\n".format(
                    reference_count, analyse_gs(data, "dna", "length")
                )
            )
            for key2 in ref:
                gb_key = key2.replace("@", "").upper()
                wfo.write("  {}   {}\n".format(gb_key, ref[key2]))

    # generate special reference
    reference_count += 1
    wfo.write(
        "REFERENCE   {}  (bases 1 to {} )\n".format(
            reference_count, analyse_gs(data, "dna", "length")
        )
    )
    wfo.write("  AUTHORS   SnapGeneReader\n")
    wfo.write("  TITLE     Direct Submission\n")
    wfo.write(
        (
            "  JOURNAL   Exported Monday, Sep 05, 2020 from SnapGene File\
 Reader\n"
        )
    )
    wfo.write(
        "            https://github.com/Edinburgh-Genome-Foundry/SnapGeneReader\n"
    )

    wfo.write(
        "COMMENT     {}\n".format(
            analyse_gs(data, "notes", "Comments", default=".")
            .replace("\n", "\n            ")
            .replace("\\", "")
        )
    )
    wfo.write("FEATURES             Location/Qualifiers\n")

    features = analyse_gs(data, "features")
    for feature in features:
        strand = analyse_gs(feature, "strand", default="")

        segments = analyse_gs(feature, "segments", default=[])
        segments = [x for x in segments if x["@type"] == "standard"]
        if len(segments) > 1:
            line = "join("
            for segment in segments:
                segment_range = analyse_gs(segment, "@range").replace("-", "..")
                if analyse_gs(segment, "@type") == "standard":
                    line += segment_range
                    line += ","
            line = line[:-1] + ")"
        else:
            line = "{}..{}".format(
                analyse_gs(feature, "start", default=" "),
                analyse_gs(feature, "end", default=" "),
            )

        if strand == "-":
            wfo.write(
                "     {} complement({})\n".format(
                    analyse_gs(feature, "type", default=" ").ljust(15),
                    line,
                )
            )
        else:
            wfo.write(
                "     {} {}\n".format(
                    analyse_gs(feature, "type", default=" ").ljust(15),
                    line,
                )
            )
        strand = analyse_gs(feature, "strand", default="")
        # if strand == '-':
        #     wfo.write('                     /direction=LEFT\n')
        # name
        wfo.write(
            '                     /note="{}"\n'.format(
                analyse_gs(feature, "name", default="feature")
            )
        )
        # qualifiers
        for q_key in analyse_gs(feature, "qualifiers", default={}):
            # do not write label, because it has been written at first.
            if q_key == "label":
                pass
            elif q_key == "note":
                for note in analyse_gs(feature, "qualifiers", q_key, default=[]):
                    # do note write color, because it will be written later
                    if note[:6] != "color:":
                        wfo.write('                     /note="{}"\n'.format(note))
            else:
                wfo.write(
                    '                     /{}="{}"\n'.format(
                        q_key, analyse_gs(feature, "qualifiers", q_key, default="")
                    )
                )
        if len(segments) > 1:
            wfo.write(
                (
                    '                     /note="This feature \
has {} segments:'
                ).format(len(segments))
            )
            for seg_i, seg in enumerate(segments):
                segment_name = analyse_gs(seg, "@name", default="")
                if segment_name:
                    segment_name = " / {}".format(segment_name)
                wfo.write(
                    "\n                        {}:  {} / {}{}".format(
                        seg_i,
                        seg["@range"].replace("-", " .. "),
                        seg["@color"],
                        segment_name,
                    )
                )
            wfo.write('"\n')
        else:
            # write colors and direction
            wfo.write(
                21 * " "
                + '/note="color: {}'.format(
                    analyse_gs(feature, "color", default="#ffffff")
                )
            )
            if strand == "-":
                wfo.write('; direction: LEFT"\n')
                # wfo.write('"\n')
            elif strand == "+":
                wfo.write('; direction: RIGHT"\n')
            else:
                wfo.write('"\n')

    # sequence
    wfo.write("ORIGIN\n")
    seq = analyse_gs(data, "seq")
    # divide rows
    for i in range(0, len(seq), 60):
        wfo.write(str(i).rjust(9))
        for j in range(i, min(i + 60, len(seq)), 10):
            wfo.write(" {}".format(seq[j : j + 10]))
        wfo.write("\n")
    wfo.write("//\n")

1	"""
2	snapgene reader main file
3	"""
4
5	import struct	1✔
6
7	# import json
8	import xmltodict	1✔
9
10	from Bio.Seq import Seq	1✔
11	from Bio.SeqRecord import SeqRecord	1✔
12
13	try:	1✔
14	# Biopython <1.78
15	from Bio.Alphabet import DNAAlphabet	1✔
16
17	has_dna_alphabet = True	×
18	except ImportError:	1✔
19	# Biopython >=1.78
20	has_dna_alphabet = False	1✔
21	from Bio.SeqFeature import SeqFeature, FeatureLocation	1✔
22	import html2text	1✔
23
24	HTML_PARSER = html2text.HTML2Text()	1✔
25	HTML_PARSER.ignore_emphasis = True	1✔
26	HTML_PARSER.ignore_links = True	1✔
27	HTML_PARSER.body_width = 0	1✔
28	HTML_PARSER.single_line_break = True	1✔
29
30
31	def parse(val):	1✔
32	"""Parse html."""
33	if isinstance(val, str):	1✔
34	return HTML_PARSER.handle(val).strip().replace("\n", " ").replace('"', "'")	1✔
35	else:
36	return val	×
37
38
39	def parse_dict(obj):	1✔
40	"""Parse dict in the obj."""
41	if isinstance(obj, dict):	1✔
42	for key in obj:	1✔
43	if isinstance(obj[key], str):	1✔
44	obj[key] = parse(obj[key])	1✔
45	elif isinstance(obj[key], dict):	1✔
46	parse_dict(obj[key])	1✔
47	return obj	1✔
48
49
50	def snapgene_file_to_dict(filepath=None, fileobject=None):	1✔
51	"""Return a dictionary containing the data from a ``*.dna`` file.
52
53	Parameters
54	----------
55	filepath
56	Path to a .dna file created with SnapGene.
57
58	fileobject
59	On object-like pointing to the data of a .dna file created with
60	SnapGene.
61	"""
62	if filepath is not None:	1✔
63	fileobject = open(filepath, "rb")	1✔
64
65	if fileobject.read(1) != b"\t":	1✔
66	raise ValueError("Wrong format for a SnapGene file!")	×
67
68	def unpack(size, mode):	1✔
69	"""Unpack the fileobject."""
70	return struct.unpack(">" + mode, fileobject.read(size))[0]	1✔
71
72	# READ THE DOCUMENT PROPERTIES
73	length = unpack(4, "I")	1✔
74	title = fileobject.read(8).decode("ascii")	1✔
75	if length != 14 or title != "SnapGene":	1✔
76	raise ValueError("Wrong format for a SnapGene file !")	×
77
78	data = dict(	1✔
79	isDNA=unpack(2, "H"),
80	exportVersion=unpack(2, "H"),
81	importVersion=unpack(2, "H"),
82	features=[],
83	)
84
85	while True:	1✔
86	# READ THE WHOLE FILE, BLOCK BY BLOCK, UNTIL THE END
87	next_byte = fileobject.read(1)	1✔
88
89	# next_byte table
90	# 0: dna sequence
91	# 1: compressed DNA
92	# 2: unknown
93	# 3: unknown
94	# 5: primers
95	# 6: notes
96	# 7: history tree
97	# 8: additional sequence properties segment
98	# 9: file Description
99	# 10: features
100	# 11: history node
101	# 13: unknown
102	# 16: alignable sequence
103	# 17: alignable sequence
104	# 18: sequence trace
105	# 19: Uracil Positions
106	# 20: custom DNA colors
107
108	if next_byte == b"":	1✔
109	# END OF FILE
110	break	1✔
111
112	block_size = unpack(4, "I")	1✔
113
114	if ord(next_byte) == 0:	1✔
115	# READ THE SEQUENCE AND ITS PROPERTIES
116	props = unpack(1, "b")	1✔
117	data["dna"] = dict(	1✔
118	topology="circular" if props & 0x01 else "linear",
119	strandedness="double" if props & 0x02 > 0 else "single",
120	damMethylated=props & 0x04 > 0,
121	dcmMethylated=props & 0x08 > 0,
122	ecoKIMethylated=props & 0x10 > 0,
123	length=block_size - 1,
124	)
125	data["seq"] = fileobject.read(block_size - 1).decode("ascii")	1✔
126
127	elif ord(next_byte) == 6:	1✔
128	# READ THE NOTES
129	block_content = fileobject.read(block_size).decode("utf-8")	1✔
130	note_data = parse_dict(xmltodict.parse(block_content))	1✔
131	data["notes"] = note_data["Notes"]	1✔
132
133	elif ord(next_byte) == 10:	1✔
134	# READ THE FEATURES
135	strand_dict = {"0": ".", "1": "+", "2": "-", "3": "="}	1✔
136	format_dict = {"@text": parse, "@int": int}	1✔
137	features_data = xmltodict.parse(fileobject.read(block_size))	1✔
138	features = features_data["Features"]["Feature"]	1✔
139	if not isinstance(features, list):	1✔
140	features = [features]	1✔
141	for feature in features:	1✔
142	segments = feature["Segment"]	1✔
143	if not isinstance(segments, list):	1✔
144	segments = [segments]	1✔
145	segments_ranges = [	1✔
146	sorted([int(e) for e in segment["@range"].split("-")])
147	for segment in segments
148	]
149	qualifiers = feature.get("Q", [])	1✔
150	if not isinstance(qualifiers, list):	1✔
151	qualifiers = [qualifiers]	1✔
152	parsed_qualifiers = {}	1✔
153	for qualifier in qualifiers:	1✔
154	if qualifier["V"] is None:	1✔
155	pass	×
156	elif isinstance(qualifier["V"], list):	1✔
157	if len(qualifier["V"][0].items()) == 1:	1✔
158	parsed_qualifiers[qualifier["@name"]] = l_v = []	1✔
159	for e_v in qualifier["V"]:	1✔
160	fmt, value = e_v.popitem()	1✔
161	fmt = format_dict.get(fmt, parse)	1✔
162	l_v.append(fmt(value))	1✔
163	else:
164	parsed_qualifiers[qualifier["@name"]] = d_v = {}	1✔
165	for e_v in qualifier["V"]:	1✔
166	(fmt1, value1), (_, value2) = e_v.items()	1✔
167	fmt = format_dict.get(fmt1, parse)	1✔
168	d_v[value2] = fmt(value1)	1✔
169	else:
170	fmt, value = qualifier["V"].popitem()	1✔
171	fmt = format_dict.get(fmt, parse)	1✔
172	parsed_qualifiers[qualifier["@name"]] = fmt(value)	1✔
173
174	if "label" not in parsed_qualifiers:	1✔
175	parsed_qualifiers["label"] = feature["@name"]	1✔
176	if "note" not in parsed_qualifiers:	1✔
177	parsed_qualifiers["note"] = []	1✔
178	if not isinstance(parsed_qualifiers["note"], list):	1✔
179	parsed_qualifiers["note"] = [parsed_qualifiers["note"]]	1✔
180	color = segments[0]["@color"]	1✔
181	parsed_qualifiers["note"].append("color: " + color)	1✔
182
183	data["features"].append(	1✔
184	dict(
185	start=min([start - 1 for (start, end) in segments_ranges]),
186	end=max([end for (start, end) in segments_ranges]),
187	strand=strand_dict[feature.get("@directionality", "0")],
188	type=feature["@type"],
189	name=feature["@name"],
190	color=segments[0]["@color"],
191	textColor="black",
192	segments=segments,
193	row=0,
194	isOrf=False,
195	qualifiers=parsed_qualifiers,
196	)
197	)
198
199	else:
200	# WE IGNORE THE WHOLE BLOCK
201	fileobject.read(block_size)	1✔
202	pass	1✔
203
204	fileobject.close()	1✔
205	return data	1✔
206
207
208	def snapgene_file_to_seqrecord(filepath=None, fileobject=None):	1✔
209	"""Return a BioPython SeqRecord from the data of a ``*.dna`` file.
210
211	Parameters
212	----------
213	filepath
214	Path to a .dna file created with SnapGene.
215
216	fileobject
217	On object-like pointing to the data of a .dna file created with
218	SnapGene.
219	"""
220	data = snapgene_file_to_dict(filepath=filepath, fileobject=fileobject)	1✔
221	strand_dict = {"+": 1, "-": -1, ".": 0, "=": 0}	1✔
222
223	if has_dna_alphabet:	1✔
224	seq = Seq(data["seq"], alphabet=DNAAlphabet())	×
225	else:
226	seq = Seq(data["seq"])	1✔
227
228	seqrecord = SeqRecord(	1✔
229	seq=seq,
230	features=[
231	SeqFeature(
232	location=FeatureLocation(
233	start=feature["start"],
234	end=feature["end"],
235	strand=strand_dict[feature["strand"]],
236	),
237	type=feature["type"],
238	qualifiers=feature["qualifiers"],
239	)
240	for feature in data["features"]
241	],
242	annotations=dict(topology=data["dna"]["topology"], **data["notes"]),
243	)
244
245	seqrecord.annotations["molecule_type"] = "DNA"	1✔
246
247	return seqrecord	1✔
248
249
250	def snapgene_file_to_gbk(read_file_object, write_file_object):	1✔
251	"""Convert a file object."""
252
253	def analyse_gs(dic, args, *kwargs):	1✔
254	"""Extract gs block in the document."""
255	if "default" not in kwargs:	1✔
256	kwargs["default"] = None	1✔
257
258	for arg in args:	1✔
259	if arg in dic:	1✔
260	dic = dic[arg]	1✔
261	else:
262	return kwargs["default"]	1✔
263	return dic	1✔
264
265	data = snapgene_file_to_dict(fileobject=read_file_object)	1✔
266	wfo = write_file_object	1✔
267	wfo.write(	1✔
268	(
269	"LOCUS Exported {0:>6} bp ds-DNA {1:>8} SYN \
270	15-APR-2012\n"
271	).format(len(data["seq"]), data["dna"]["topology"])
272	)
273	definition = analyse_gs(data, "notes", "Description", default=".").replace(	1✔
274	"\n", "\n "
275	)
276	wfo.write("DEFINITION {}\n".format(definition))	1✔
277	wfo.write("ACCESSION .\n")	1✔
278	wfo.write("VERSION .\n")	1✔
279	wfo.write(	1✔
280	"KEYWORDS {}\n".format(
281	analyse_gs(data, "notes", "CustomMapLabel", default=".")
282	)
283	)
284	wfo.write("SOURCE .\n")	1✔
285	wfo.write(" ORGANISM .\n")	1✔
286
287	references = analyse_gs(data, "notes", "References")	1✔
288
289	reference_count = 0	1✔
290	if references:	1✔
291	for key in references:	1✔
292	reference_count += 1	1✔
293	ref = references[key]	1✔
294	wfo.write(	1✔
295	"REFERENCE {} (bases 1 to {} )\n".format(
296	reference_count, analyse_gs(data, "dna", "length")
297	)
298	)
299	for key2 in ref:	1✔
300	gb_key = key2.replace("@", "").upper()	1✔
301	wfo.write(" {} {}\n".format(gb_key, ref[key2]))	1✔
302
303	# generate special reference
304	reference_count += 1	1✔
305	wfo.write(	1✔
306	"REFERENCE {} (bases 1 to {} )\n".format(
307	reference_count, analyse_gs(data, "dna", "length")
308	)
309	)
310	wfo.write(" AUTHORS SnapGeneReader\n")	1✔
311	wfo.write(" TITLE Direct Submission\n")	1✔
312	wfo.write(	1✔
313	(
314	" JOURNAL Exported Monday, Sep 05, 2020 from SnapGene File\
315	Reader\n"
316	)
317	)
318	wfo.write(	1✔
319	" https://github.com/Edinburgh-Genome-Foundry/SnapGeneReader\n"
320	)
321
322	wfo.write(	1✔
323	"COMMENT {}\n".format(
324	analyse_gs(data, "notes", "Comments", default=".")
325	.replace("\n", "\n ")
326	.replace("\\", "")
327	)
328	)
329	wfo.write("FEATURES Location/Qualifiers\n")	1✔
330
331	features = analyse_gs(data, "features")	1✔
332	for feature in features:	1✔
333	strand = analyse_gs(feature, "strand", default="")	1✔
334
335	segments = analyse_gs(feature, "segments", default=[])	1✔
336	segments = [x for x in segments if x["@type"] == "standard"]	1✔
337	if len(segments) > 1:	1✔
338	line = "join("	1✔
339	for segment in segments:	1✔
340	segment_range = analyse_gs(segment, "@range").replace("-", "..")	1✔
341	if analyse_gs(segment, "@type") == "standard":	1✔
342	line += segment_range	1✔
343	line += ","	1✔
344	line = line[:-1] + ")"	1✔
345	else:
346	line = "{}..{}".format(	1✔
347	analyse_gs(feature, "start", default=" "),
348	analyse_gs(feature, "end", default=" "),
349	)
350
351	if strand == "-":	1✔
352	wfo.write(	1✔
353	" {} complement({})\n".format(
354	analyse_gs(feature, "type", default=" ").ljust(15),
355	line,
356	)
357	)
358	else:
359	wfo.write(	1✔
360	" {} {}\n".format(
361	analyse_gs(feature, "type", default=" ").ljust(15),
362	line,
363	)
364	)
365	strand = analyse_gs(feature, "strand", default="")	1✔
366	# if strand == '-':
367	# wfo.write(' /direction=LEFT\n')
368	# name
369	wfo.write(	1✔
370	' /note="{}"\n'.format(
371	analyse_gs(feature, "name", default="feature")
372	)
373	)
374	# qualifiers
375	for q_key in analyse_gs(feature, "qualifiers", default={}):	1✔
376	# do not write label, because it has been written at first.
377	if q_key == "label":	1✔
378	pass	1✔
379	elif q_key == "note":	1✔
380	for note in analyse_gs(feature, "qualifiers", q_key, default=[]):	1✔
381	# do note write color, because it will be written later
382	if note[:6] != "color:":	1✔
383	wfo.write(' /note="{}"\n'.format(note))	1✔
384	else:
385	wfo.write(	1✔
386	' /{}="{}"\n'.format(
387	q_key, analyse_gs(feature, "qualifiers", q_key, default="")
388	)
389	)
390	if len(segments) > 1:	1✔
391	wfo.write(	1✔
392	(
393	' /note="This feature \
394	has {} segments:'
395	).format(len(segments))
396	)
397	for seg_i, seg in enumerate(segments):	1✔
398	segment_name = analyse_gs(seg, "@name", default="")	1✔
399	if segment_name:	1✔
400	segment_name = " / {}".format(segment_name)	1✔
401	wfo.write(	1✔
402	"\n {}: {} / {}{}".format(
403	seg_i,
404	seg["@range"].replace("-", " .. "),
405	seg["@color"],
406	segment_name,
407	)
408	)
409	wfo.write('"\n')	1✔
410	else:
411	# write colors and direction
412	wfo.write(	1✔
413	21 * " "
414	+ '/note="color: {}'.format(
415	analyse_gs(feature, "color", default="#ffffff")
416	)
417	)
418	if strand == "-":	1✔
419	wfo.write('; direction: LEFT"\n')	1✔
420	# wfo.write('"\n')
421	elif strand == "+":	1✔
422	wfo.write('; direction: RIGHT"\n')	1✔
423	else:
424	wfo.write('"\n')	1✔
425
426	# sequence
427	wfo.write("ORIGIN\n")	1✔
428	seq = analyse_gs(data, "seq")	1✔
429	# divide rows
430	for i in range(0, len(seq), 60):	1✔
431	wfo.write(str(i).rjust(9))	1✔
432	for j in range(i, min(i + 60, len(seq)), 10):	1✔
433	wfo.write(" {}".format(seq[j : j + 10]))	1✔
434	wfo.write("\n")	1✔
435	wfo.write("//\n")	1✔

Edinburgh-Genome-Foundry / SnapGeneReader / 14177235573

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous