7117302237

Committed 06 Dec 2023 04:23PM UTC coverage: 75.482% (+1.8%) from 73.729%

Build # 7117302237

Build Type

push

github

Committed by

cokelaer

Commit Message

Update version

Run Details

13709 of 18162 relevant lines covered (75.48%)

2.26 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

14.93

/sequana/scripts/main/summary.py

#  This file is part of Sequana software
#
#  Copyright (c) 2016-2020 - Sequana Development Team
#
#  Distributed under the terms of the 3-clause BSD license.
#  The full license is in the LICENSE file, distributed with this software.
#
#  website: https://github.com/sequana/sequana
#  documentation: http://sequana.readthedocs.io
#
##############################################################################
import sys

import click
import colorlog
import rich_click as click

from sequana.scripts.utils import CONTEXT_SETTINGS

logger = colorlog.getLogger(__name__)


@click.command(context_settings=CONTEXT_SETTINGS)
@click.argument("name", type=click.Path(exists=True), nargs=-1)
@click.option(
    "--module",
    required=False,
    type=click.Choice(["bamqc", "bam", "fasta", "fastq", "gff", "vcf"]),
)
def summary(**kwargs):
    """Create a HTML report for various type of NGS formats.

    \b
    * bamqc
    * fastq

    This will process all files in the given pattern (in back quotes)
    sequentially and procude one HTML file per input file.


    Other module all work in the same way. For example, for FastQ files::

        sequana summary one_input.fastq
        sequana summary `ls *fastq`


    """
    names = kwargs["name"]
    module = kwargs["module"]

    if module is None:
        if names[0].endswith("fastq.gz") or names[0].endswith(".fastq"):
            module = "fastq"
        elif names[0].endswith(".bam"):
            module = "bam"
        elif names[0].endswith(".gff") or names[0].endswith("gff3"):
            module = "gff"
        elif names[0].endswith("fasta.gz") or names[0].endswith(".fasta"):
            module = "fasta"
        else:
            logger.error("please use --module to tell us about the input fimes")
            sys.exit(1)

    if module == "bamqc":
        for name in names:
            print(f"Processing {name}")
            from sequana.modules_report.bamqc import BAMQCModule

            BAMQCModule(name, "bamqc.html")
    elif module == "fasta":  # there is no module per se. HEre we just call FastA.summary()
        from sequana.fasta import FastA

        for name in names:
            f = FastA(name)
            f.summary()
    elif module == "fastq":  # there is no module per se. HEre we just call FastA.summary()
        from sequana import FastQC

        for filename in names:
            ff = FastQC(filename, max_sample=1e6, verbose=False)
            stats = ff.get_stats()
            print(stats)
    elif module == "bam":
        import pandas as pd

        from sequana import BAM

        for filename in names:
            ff = BAM(filename)
            stats = ff.get_stats()
            df = pd.Series(stats).to_frame().T
            print(df)
    elif module == "gff":
        import pandas as pd

        from sequana import GFF3

        for filename in names:
            ff = GFF3(filename)
            print(f"#filename: {filename}")
            print("#Number of entries per genetic type:")
            print(ff.df.value_counts("genetic_type").to_string())
            print("#Number of duplicated attribute (if any) per attribute:")
            ff.get_duplicated_attributes_per_genetic_type()
    elif module == "vcf":
        from sequana.freebayes_vcf_filter import VCF_freebayes

        for filename in names:
            print(f"#filename: {filename}")
            vcf = VCF_freebayes(filename)
            columns = (
                "chr",
                "position",
                "depth",
                "reference",
                "alternative",
                "freebayes_score",
                "strand_balance",
                "frequency",
            )
            print(",".join(columns))
            for variant in vcf.get_variants():
                resume = variant.resume
                print(",".join([str(resume[col]) for col in columns]))

1	# This file is part of Sequana software
2	#
3	# Copyright (c) 2016-2020 - Sequana Development Team
4	#
5	# Distributed under the terms of the 3-clause BSD license.
6	# The full license is in the LICENSE file, distributed with this software.
7	#
8	# website: https://github.com/sequana/sequana
9	# documentation: http://sequana.readthedocs.io
10	#
11	##############################################################################
12	import sys	3✔
13
14	import click	3✔
15	import colorlog	3✔
16	import rich_click as click	3✔
17
18	from sequana.scripts.utils import CONTEXT_SETTINGS	3✔
19
20	logger = colorlog.getLogger(__name__)	3✔
21
22
23	@click.command(context_settings=CONTEXT_SETTINGS)	3✔
24	@click.argument("name", type=click.Path(exists=True), nargs=-1)	3✔
25	@click.option(	3✔
26	"--module",
27	required=False,
28	type=click.Choice(["bamqc", "bam", "fasta", "fastq", "gff", "vcf"]),
29	)
30	def summary(**kwargs):	3✔
31	"""Create a HTML report for various type of NGS formats.
32
33	\b
34	* bamqc
35	* fastq
36
37	This will process all files in the given pattern (in back quotes)
38	sequentially and procude one HTML file per input file.
39
40
41	Other module all work in the same way. For example, for FastQ files::
42
43	sequana summary one_input.fastq
44	sequana summary `ls *fastq`
45
46
47	"""
48	names = kwargs["name"]	×
49	module = kwargs["module"]	×
50
51	if module is None:	×
52	if names[0].endswith("fastq.gz") or names[0].endswith(".fastq"):	×
53	module = "fastq"	×
54	elif names[0].endswith(".bam"):	×
55	module = "bam"	×
56	elif names[0].endswith(".gff") or names[0].endswith("gff3"):	×
57	module = "gff"	×
58	elif names[0].endswith("fasta.gz") or names[0].endswith(".fasta"):	×
59	module = "fasta"	×
60	else:
61	logger.error("please use --module to tell us about the input fimes")	×
62	sys.exit(1)	×
63
64	if module == "bamqc":	×
65	for name in names:	×
66	print(f"Processing {name}")	×
67	from sequana.modules_report.bamqc import BAMQCModule	×
68
69	BAMQCModule(name, "bamqc.html")	×
70	elif module == "fasta": # there is no module per se. HEre we just call FastA.summary()	×
71	from sequana.fasta import FastA	×
72
73	for name in names:	×
74	f = FastA(name)	×
75	f.summary()	×
76	elif module == "fastq": # there is no module per se. HEre we just call FastA.summary()	×
77	from sequana import FastQC	×
78
79	for filename in names:	×
80	ff = FastQC(filename, max_sample=1e6, verbose=False)	×
81	stats = ff.get_stats()	×
82	print(stats)	×
83	elif module == "bam":	×
84	import pandas as pd	×
85
86	from sequana import BAM	×
87
88	for filename in names:	×
89	ff = BAM(filename)	×
90	stats = ff.get_stats()	×
91	df = pd.Series(stats).to_frame().T	×
92	print(df)	×
93	elif module == "gff":	×
94	import pandas as pd	×
95
96	from sequana import GFF3	×
97
98	for filename in names:	×
99	ff = GFF3(filename)	×
100	print(f"#filename: {filename}")	×
101	print("#Number of entries per genetic type:")	×
102	print(ff.df.value_counts("genetic_type").to_string())	×
103	print("#Number of duplicated attribute (if any) per attribute:")	×
104	ff.get_duplicated_attributes_per_genetic_type()	×
105	elif module == "vcf":	×
106	from sequana.freebayes_vcf_filter import VCF_freebayes	×
107
108	for filename in names:	×
109	print(f"#filename: {filename}")	×
110	vcf = VCF_freebayes(filename)	×
111	columns = (	×
112	"chr",
113	"position",
114	"depth",
115	"reference",
116	"alternative",
117	"freebayes_score",
118	"strand_balance",
119	"frequency",
120	)
121	print(",".join(columns))	×
122	for variant in vcf.get_variants():	×
123	resume = variant.resume	×
124	print(",".join([str(resume[col]) for col in columns]))	×

cokelaer / sequana / 7117302237

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous