• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sequana / sequana / 19136907997

06 Nov 2025 12:56PM UTC coverage: 68.835% (-0.4%) from 69.186%
19136907997

push

github

web-flow
Merge pull request #875 from cokelaer/dev

update HTML variant calling + others

157 of 332 new or added lines in 14 files covered. (47.29%)

5 existing lines in 3 files now uncovered.

14551 of 21139 relevant lines covered (68.83%)

2.07 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

12.35
/sequana/scripts/main/summary.py
1
#  This file is part of Sequana software
2
#
3
#  Copyright (c) 2016-2020 - Sequana Development Team
4
#
5
#  Distributed under the terms of the 3-clause BSD license.
6
#  The full license is in the LICENSE file, distributed with this software.
7
#
8
#  website: https://github.com/sequana/sequana
9
#  documentation: http://sequana.readthedocs.io
10
#
11
##############################################################################
12
import sys
3✔
13

14
import colorlog
3✔
15
import rich_click as click
3✔
16

17
from sequana.scripts.utils import CONTEXT_SETTINGS
3✔
18

19
logger = colorlog.getLogger(__name__)
3✔
20

21

22
@click.command(context_settings=CONTEXT_SETTINGS)
3✔
23
@click.argument("name", type=click.Path(exists=True), nargs=-1)
3✔
24
@click.option(
3✔
25
    "--module",
26
    required=False,
27
    type=click.Choice(["bamqc", "bam", "fasta", "fastq", "gff", "vcf", "sam"]),
28
)
29
@click.option("--output-file", required=False, type=click.Path())
3✔
30
def summary(**kwargs):
3✔
31
    """Create a HTML report for various type of NGS formats.
32

33
    \b
34
    * bamqc
35
    * fastq
36

37
    This will process all files in the given pattern (in back quotes)
38
    sequentially and procude one HTML file per input file.
39

40

41
    Other module all work in the same way. For example, for FastQ files::
42

43
        sequana summary one_input.fastq
44
        sequana summary `ls *fastq`
45

46

47
    """
48
    names = kwargs["name"]
×
49
    module = kwargs["module"]
×
50

51
    if module is None:
×
52
        if names[0].endswith("fastq.gz") or names[0].endswith(".fastq"):
×
53
            module = "fastq"
×
54
        elif names[0].endswith(".bam"):
×
55
            module = "bam"
×
56
        elif names[0].endswith(".sam"):
×
57
            module = "bam"
×
58
        elif names[0].endswith(".gff") or names[0].endswith("gff3"):
×
59
            module = "gff"
×
60
        elif names[0].endswith("fasta.gz") or names[0].endswith(".fasta"):
×
61
            module = "fasta"
×
62
        elif names[0].endswith("fa.gz") or names[0].endswith(".fa"):
×
63
            module = "fasta"
×
64
        elif names[0].endswith("vcf"):
×
65
            module = "vcf"
×
66
        else:
NEW
67
            logger.error(
×
68
                "Only extensions fastq, fasta, bam, sam, gff, gff3 and vcf are recognised. please use --module to tell us about the type of the input files"
69
            )
UNCOV
70
            sys.exit(1)
×
71

72
    if module == "bamqc":
×
73
        for name in names:
×
74
            print(f"Processing {name}")
×
75
            from sequana.modules_report.bamqc import BAMQCModule
×
76

77
            BAMQCModule(name, "bamqc.html")
×
78
    elif module == "fasta":  # there is no module per se. HEre we just call FastA.summary()
×
79
        from sequana.fasta import FastA
×
80

81
        for name in names:
×
82
            f = FastA(name)
×
83
            f.summary()
×
84
    elif module == "fastq":  # there is no module per se. HEre we just call FastA.summary()
×
85
        from sequana import FastQC
×
86

87
        for filename in names:
×
88
            ff = FastQC(filename, max_sample=1e6, verbose=False)
×
89
            stats = ff.get_stats()
×
90
            print(stats)
×
91
    elif module == "bam":
×
92
        import pandas as pd
×
93

94
        from sequana import BAM
×
95

96
        for filename in names:
×
97
            ff = BAM(filename)
×
98
            stats = ff.get_stats()
×
99
            df = pd.Series(stats).to_frame().T
×
100
            print(df)
×
101
    elif module == "sam":
×
102
        import pandas as pd
×
103

104
        from sequana import SAM
×
105

106
        for filename in names:
×
107
            ff = SAM(filename)
×
108
            stats = ff.get_stats()
×
109
            df = pd.Series(stats).to_frame().T
×
110
            print(df)
×
111
    elif module == "gff":
×
112
        import pandas as pd
×
113

114
        from sequana import GFF3
×
115

116
        for filename in names:
×
117
            ff = GFF3(filename)
×
118
            print(f"#filename: {filename}")
×
119
            print("#Number of entries per genetic type:")
×
120
            print(ff.df.value_counts("genetic_type").to_string())
×
121
            print("#Number of duplicated attribute (if any) per attribute:")
×
122
            ff.get_duplicated_attributes_per_genetic_type()
×
123
    elif module == "vcf":
×
124
        from sequana.variants import VariantFile
×
125

126
        for filename in names:
×
127
            print(f"#filename: {filename}")
×
128
            vcf = VariantFile(filename, progress=True)
×
129
            df = vcf.df
×
130
            columns = (
×
131
                "chr",
132
                "position",
133
                "depth",
134
                "reference",
135
                "alternative",
136
                "freebayes_score",
137
                "strand_balance",
138
                "frequency",
139
            )
NEW
140
            print(df.groupby("type").count())
×
NEW
141
            if kwargs["output_file"]:
×
NEW
142
                df.to_csv(kwargs["output_file"], index=False)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc