19136907997

Committed 06 Nov 2025 12:56PM UTC coverage: 68.835% (-0.4%) from 69.186%

Build # 19136907997

Build Type

push

github

Committed by

web-flow

Commit Message

Merge pull request #875 from cokelaer/dev

update HTML variant calling + others

Run Details

157 of 332 new or added lines in 14 files covered. (47.29%)

5 existing lines in 3 files now uncovered.

14551 of 21139 relevant lines covered (68.83%)

2.07 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

12.35

/sequana/scripts/main/summary.py

#  This file is part of Sequana software
#
#  Copyright (c) 2016-2020 - Sequana Development Team
#
#  Distributed under the terms of the 3-clause BSD license.
#  The full license is in the LICENSE file, distributed with this software.
#
#  website: https://github.com/sequana/sequana
#  documentation: http://sequana.readthedocs.io
#
##############################################################################
import sys

import colorlog
import rich_click as click

from sequana.scripts.utils import CONTEXT_SETTINGS

logger = colorlog.getLogger(__name__)


@click.command(context_settings=CONTEXT_SETTINGS)
@click.argument("name", type=click.Path(exists=True), nargs=-1)
@click.option(
    "--module",
    required=False,
    type=click.Choice(["bamqc", "bam", "fasta", "fastq", "gff", "vcf", "sam"]),
)
@click.option("--output-file", required=False, type=click.Path())
def summary(**kwargs):
    """Create a HTML report for various type of NGS formats.

    \b
    * bamqc
    * fastq

    This will process all files in the given pattern (in back quotes)
    sequentially and procude one HTML file per input file.


    Other module all work in the same way. For example, for FastQ files::

        sequana summary one_input.fastq
        sequana summary `ls *fastq`


    """
    names = kwargs["name"]
    module = kwargs["module"]

    if module is None:
        if names[0].endswith("fastq.gz") or names[0].endswith(".fastq"):
            module = "fastq"
        elif names[0].endswith(".bam"):
            module = "bam"
        elif names[0].endswith(".sam"):
            module = "bam"
        elif names[0].endswith(".gff") or names[0].endswith("gff3"):
            module = "gff"
        elif names[0].endswith("fasta.gz") or names[0].endswith(".fasta"):
            module = "fasta"
        elif names[0].endswith("fa.gz") or names[0].endswith(".fa"):
            module = "fasta"
        elif names[0].endswith("vcf"):
            module = "vcf"
        else:
            logger.error(
                "Only extensions fastq, fasta, bam, sam, gff, gff3 and vcf are recognised. please use --module to tell us about the type of the input files"
            )
            sys.exit(1)

    if module == "bamqc":
        for name in names:
            print(f"Processing {name}")
            from sequana.modules_report.bamqc import BAMQCModule

            BAMQCModule(name, "bamqc.html")
    elif module == "fasta":  # there is no module per se. HEre we just call FastA.summary()
        from sequana.fasta import FastA

        for name in names:
            f = FastA(name)
            f.summary()
    elif module == "fastq":  # there is no module per se. HEre we just call FastA.summary()
        from sequana import FastQC

        for filename in names:
            ff = FastQC(filename, max_sample=1e6, verbose=False)
            stats = ff.get_stats()
            print(stats)
    elif module == "bam":
        import pandas as pd

        from sequana import BAM

        for filename in names:
            ff = BAM(filename)
            stats = ff.get_stats()
            df = pd.Series(stats).to_frame().T
            print(df)
    elif module == "sam":
        import pandas as pd

        from sequana import SAM

        for filename in names:
            ff = SAM(filename)
            stats = ff.get_stats()
            df = pd.Series(stats).to_frame().T
            print(df)
    elif module == "gff":
        import pandas as pd

        from sequana import GFF3

        for filename in names:
            ff = GFF3(filename)
            print(f"#filename: {filename}")
            print("#Number of entries per genetic type:")
            print(ff.df.value_counts("genetic_type").to_string())
            print("#Number of duplicated attribute (if any) per attribute:")
            ff.get_duplicated_attributes_per_genetic_type()
    elif module == "vcf":
        from sequana.variants import VariantFile

        for filename in names:
            print(f"#filename: {filename}")
            vcf = VariantFile(filename, progress=True)
            df = vcf.df
            columns = (
                "chr",
                "position",
                "depth",
                "reference",
                "alternative",
                "freebayes_score",
                "strand_balance",
                "frequency",
            )
            print(df.groupby("type").count())
            if kwargs["output_file"]:
                df.to_csv(kwargs["output_file"], index=False)

1	# This file is part of Sequana software
2	#
3	# Copyright (c) 2016-2020 - Sequana Development Team
4	#
5	# Distributed under the terms of the 3-clause BSD license.
6	# The full license is in the LICENSE file, distributed with this software.
7	#
8	# website: https://github.com/sequana/sequana
9	# documentation: http://sequana.readthedocs.io
10	#
11	##############################################################################
12	import sys	3✔
13
14	import colorlog	3✔
15	import rich_click as click	3✔
16
17	from sequana.scripts.utils import CONTEXT_SETTINGS	3✔
18
19	logger = colorlog.getLogger(__name__)	3✔
20
21
22	@click.command(context_settings=CONTEXT_SETTINGS)	3✔
23	@click.argument("name", type=click.Path(exists=True), nargs=-1)	3✔
24	@click.option(	3✔
25	"--module",
26	required=False,
27	type=click.Choice(["bamqc", "bam", "fasta", "fastq", "gff", "vcf", "sam"]),
28	)
29	@click.option("--output-file", required=False, type=click.Path())	3✔
30	def summary(**kwargs):	3✔
31	"""Create a HTML report for various type of NGS formats.
32
33	\b
34	* bamqc
35	* fastq
36
37	This will process all files in the given pattern (in back quotes)
38	sequentially and procude one HTML file per input file.
39
40
41	Other module all work in the same way. For example, for FastQ files::
42
43	sequana summary one_input.fastq
44	sequana summary `ls *fastq`
45
46
47	"""
48	names = kwargs["name"]	×
49	module = kwargs["module"]	×
50
51	if module is None:	×
52	if names[0].endswith("fastq.gz") or names[0].endswith(".fastq"):	×
53	module = "fastq"	×
54	elif names[0].endswith(".bam"):	×
55	module = "bam"	×
56	elif names[0].endswith(".sam"):	×
57	module = "bam"	×
58	elif names[0].endswith(".gff") or names[0].endswith("gff3"):	×
59	module = "gff"	×
60	elif names[0].endswith("fasta.gz") or names[0].endswith(".fasta"):	×
61	module = "fasta"	×
62	elif names[0].endswith("fa.gz") or names[0].endswith(".fa"):	×
63	module = "fasta"	×
64	elif names[0].endswith("vcf"):	×
65	module = "vcf"	×
66	else:
NEW 67	logger.error(	×
68	"Only extensions fastq, fasta, bam, sam, gff, gff3 and vcf are recognised. please use --module to tell us about the type of the input files"
69	)
UNCOV 70	sys.exit(1)	×
71
72	if module == "bamqc":	×
73	for name in names:	×
74	print(f"Processing {name}")	×
75	from sequana.modules_report.bamqc import BAMQCModule	×
76
77	BAMQCModule(name, "bamqc.html")	×
78	elif module == "fasta": # there is no module per se. HEre we just call FastA.summary()	×
79	from sequana.fasta import FastA	×
80
81	for name in names:	×
82	f = FastA(name)	×
83	f.summary()	×
84	elif module == "fastq": # there is no module per se. HEre we just call FastA.summary()	×
85	from sequana import FastQC	×
86
87	for filename in names:	×
88	ff = FastQC(filename, max_sample=1e6, verbose=False)	×
89	stats = ff.get_stats()	×
90	print(stats)	×
91	elif module == "bam":	×
92	import pandas as pd	×
93
94	from sequana import BAM	×
95
96	for filename in names:	×
97	ff = BAM(filename)	×
98	stats = ff.get_stats()	×
99	df = pd.Series(stats).to_frame().T	×
100	print(df)	×
101	elif module == "sam":	×
102	import pandas as pd	×
103
104	from sequana import SAM	×
105
106	for filename in names:	×
107	ff = SAM(filename)	×
108	stats = ff.get_stats()	×
109	df = pd.Series(stats).to_frame().T	×
110	print(df)	×
111	elif module == "gff":	×
112	import pandas as pd	×
113
114	from sequana import GFF3	×
115
116	for filename in names:	×
117	ff = GFF3(filename)	×
118	print(f"#filename: {filename}")	×
119	print("#Number of entries per genetic type:")	×
120	print(ff.df.value_counts("genetic_type").to_string())	×
121	print("#Number of duplicated attribute (if any) per attribute:")	×
122	ff.get_duplicated_attributes_per_genetic_type()	×
123	elif module == "vcf":	×
124	from sequana.variants import VariantFile	×
125
126	for filename in names:	×
127	print(f"#filename: {filename}")	×
128	vcf = VariantFile(filename, progress=True)	×
129	df = vcf.df	×
130	columns = (	×
131	"chr",
132	"position",
133	"depth",
134	"reference",
135	"alternative",
136	"freebayes_score",
137	"strand_balance",
138	"frequency",
139	)
NEW 140	print(df.groupby("type").count())	×
NEW 141	if kwargs["output_file"]:	×
NEW 142	df.to_csv(kwargs["output_file"], index=False)	×

sequana / sequana / 19136907997

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous