• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sequana / sequana / 16568275773

28 Jul 2025 11:51AM UTC coverage: 69.115% (-0.6%) from 69.722%
16568275773

push

github

web-flow
Merge pull request #870 from cokelaer/dev

add threshold parameter in G4 module

1 of 2 new or added lines in 1 file covered. (50.0%)

725 existing lines in 17 files now uncovered.

14389 of 20819 relevant lines covered (69.11%)

2.07 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

48.21
/sequana/scripts/main/somy_score.py
1
#  This file is part of Sequana software
2
#
3
#  Copyright (c) 2016-2020 - Sequana Development Team
4
#
5
#  Distributed under the terms of the 3-clause BSD license.
6
#  The full license is in the LICENSE file, distributed with this software.
7
#
8
#  website: https://github.com/sequana/sequana
9
#  documentation: http://sequana.readthedocs.io
10
#
11
##############################################################################
12
import os
3✔
13
import sys
3✔
14
from pathlib import Path
3✔
15

16
import colorlog
3✔
17
import rich_click as click
3✔
18
from tqdm import tqdm
3✔
19

20
from sequana.scripts.common import teardown
3✔
21
from sequana.scripts.utils import CONTEXT_SETTINGS, common_logger
3✔
22
from sequana.utils import config
3✔
23

24
logger = colorlog.getLogger(__name__)
3✔
25

26

27
click.rich_click.OPTION_GROUPS = {
3✔
28
    "somy score": [
29
        {
30
            "name": "mosdepth",
31
            "options": [
32
                "--window-size",
33
                "--fast",
34
            ],
35
        },
36
    ],
37
}
38

39

40
@click.command(context_settings=CONTEXT_SETTINGS)
3✔
41
@click.argument("filename", type=click.STRING)
3✔
42
@click.option(
3✔
43
    "--window-size",
44
    type=click.INT,
45
    default=1000,
46
    show_default=True,
47
    help="""The reference to test DGE against. If provided, conditions not
48
            involving the reference are ignored. Otherwise all combinations are
49
            tested""",
50
)
51
@click.option(
3✔
52
    "--fast",
53
    is_flag=True,
54
    help="""fast option""",
55
)
56
@click.option(
3✔
57
    "--method",
58
    type=click.Choice(["em", "median", "mean"]),
59
    default="median",
60
    show_default=True,
61
    help="""Method to estimate the main somy (in principle diploid). Median and mean methods simply compute those statistics from the chunks (windows). The EM is more complex, and compute distribution, estimate mixture model and therefore the mean of the diploid distribution based on those estimates""",
62
)
63
@click.option(
3✔
64
    "--estimated-diploy-coverage",
65
    default=None,
66
    required=False,
67
    help="""If not provided, data normalisation is based on --method. If provided, this is the estimated coverage""",
68
)
69
@click.option(
3✔
70
    "--chromosomes",
71
    type=click.STRING,
72
    help="""list of chromosomes to restrict to """,
73
)
74
@click.option(
3✔
75
    "--mapq",
76
    type=click.INT,
77
    default=0,
78
    show_default=True,
79
    help="""list of chromosomes to restrict to """,
80
)
81
@click.option(
3✔
82
    "--telomeric-span",
83
    type=click.INT,
84
    default=10,
85
    show_default=True,
86
    help="""region to ignore in kb. This suppose that contigs are circularised correctly. If not, set to 0""",
87
)
88
@click.option(
3✔
89
    "-k",
90
    type=click.INT,
91
    default=4,
92
    show_default=True,
93
    help="""Model for gaussin mixture (k=4 is suppose to capture di + tri + tetraploidy)""",
94
)
95
@click.option(
3✔
96
    "--minimum-depth", type=click.INT, default=10, help="""coverage with depth below this value are removed."""
97
)
98
@click.option(
3✔
99
    "--flag",
100
    type=click.INT,
101
    default=3844,
102
    show_default=True,
103
    help="""3844 means that it removes unmapped reads, but also secondary and supplementary reads.""",
104
)
105
@click.option("--threads", type=click.INT, default=4, help="""use 4 threads .""")
3✔
106
@click.option("--exclude-chromosomes", type=click.STRING, default="", help="""list of chromosomes to exclude""")
3✔
107
@common_logger
3✔
108
def somy_score(**kwargs):
3✔
109
    """**Somy score on polyploid (or not)**"""
110
    import pandas as pd
×
111
    from easydev import cmd_exists
×
112

113
    from sequana import logger
×
114
    from sequana.modules_report.rnadiff import RNAdiffModule
×
115

116
    logger.setLevel(kwargs["logger"])
×
117

118
    if not cmd_exists("mosdepth"):
×
119
        logger.critical(
×
120
            """mosdepth not found. You may install it yourself or use damona using 'damona install mosdepth' """
121
        )
122
        sys.exit(1)
×
123

124
    exclude_chromosomes = kwargs.get("exclude_chromosomes", "").split(",")
×
125

126
    from sequana.somy import SomyScore
×
127

128
    ss = SomyScore(kwargs["filename"], window_size=kwargs["window_size"])
×
129
    if kwargs["chromosomes"]:
×
130
        ss.compute_coverage(
×
131
            use_median=True,
132
            fast=True,
133
            mapq=kwargs["mapq"],
134
            flag=kwargs["flag"],
135
            chromosomes=kwargs["chromosomes"].split(","),
136
            threads=kwargs["threads"],
137
            exclude_chromosomes=exclude_chromosomes,
138
        )
139
    else:
140
        ss.compute_coverage(
×
141
            use_median=True,
142
            fast=True,
143
            mapq=kwargs["mapq"],
144
            flag=kwargs["flag"],
145
            threads=kwargs["threads"],
146
            exclude_chromosomes=exclude_chromosomes,
147
        )
148
    # save results before filtering so that we can introspect the data later on
149
    logger.info(f"length input coverage file: {len(ss.df)}. Saving data into data.csv")
×
150
    ss.df.to_csv("data.csv", index=False)
×
151

152
    ss.remove_outliers()
×
153
    logger.info(f"Removed outliers. length input coverage file: {len(ss.df)}")
×
154

155
    ss.remove_low_depth(kwargs["minimum_depth"])
×
156
    logger.info(f"Removed low depth. length input coverage file: {len(ss.df)}")
×
157

158
    flanks = kwargs.get("telomeric_span")
×
159
    ss.remove_flanks(remove_flanking_regions_kb=flanks)
×
160
    logger.info(f"Removing flanks ({flanks}kb). length input coverage file: {len(ss.df)}")
×
161

162
    from matplotlib import rcParams
×
163

164
    rcParams["figure.figsize"] = (12, 8)
×
165

166
    if kwargs["estimated_diploy_coverage"]:
×
UNCOV
167
        ss.boxplot(
×
168
            k=kwargs["k"], method=kwargs["method"], hybrid=True, muhat=float(kwargs["estimated_diploy_coverage"])
169
        )
170
    else:
UNCOV
171
        ss.boxplot(k=kwargs["k"], method=kwargs["method"], hybrid=True)
×
172

UNCOV
173
    print(ss.info)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc