• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / marbel / 15907532099

26 Jun 2025 04:38PM UTC coverage: 39.513% (-0.1%) from 39.62%
15907532099

push

github

tensulin
implement warning instead of error

0 of 3 new or added lines in 1 file covered. (0.0%)

292 of 739 relevant lines covered (39.51%)

0.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

19.49
/src/marbel/cli.py
1
from typing import Optional, Tuple
1✔
2
from typing_extensions import Annotated
1✔
3

4
import typer
1✔
5
import os
1✔
6
import sys
1✔
7
import shutil
1✔
8

9

10
from marbel.presets import __version__, MAX_SPECIES, MAX_ORTHO_GROUPS, rank_distance, LibrarySizeDistribution, Rank, ErrorModel, DESEQ2_FITTED_A0, DESEQ2_FITTED_A1, OrthologyLevel
1✔
11
from marbel.core import generate_dataset
1✔
12

13
app = typer.Typer()
1✔
14

15

16
def version_callback(value: bool):
1✔
17
    if value:
×
18
        print(f"marbel Version: {__version__}")
×
19
        raise typer.Exit()
×
20

21

22
def species_callback(value: Optional[int]):
1✔
23
    if value > MAX_SPECIES:
×
24
        raise typer.BadParameter(f"The current maximum number of species is {MAX_SPECIES}")
×
25
    if value < 1:
×
26
        raise typer.BadParameter("The number of species is 1")
×
27
    return value
×
28

29

30
def orthogroups_callback(value: Optional[int]):
1✔
31
    if value > MAX_ORTHO_GROUPS:
×
32
        raise typer.BadParameter(f"The current maximum number of orthologous groups is {MAX_ORTHO_GROUPS}")
×
33
    if value < 1:
×
34
        raise typer.BadParameter("The number of orthologous groups is 1")
×
35
    return value
×
36

37

38
def checknegative(value: float):
1✔
39
    if value < 0:
×
40
        raise typer.BadParameter("Deseq2 dispersion values cannot be negative.")
×
41
    return value
×
42

43

44
def sample_callback(value: Optional[Tuple[int, int]]):
1✔
45
    if value[0] < 1 or value[1] < 1:
×
46
        raise typer.BadParameter("The minimum number of samples has to be 1")
×
47
    return value
×
48

49

50
def check_error_multiplier(value, error_model):
1✔
NEW
51
    if value < 0:
×
NEW
52
        raise typer.BadParameter("Error multiplier cannot be negative")
×
53
    if value < 0.01 and value > 100:
×
NEW
54
        print(f"Info: Error multiplier: {value} is very large or small, might distort error introduction.", file=sys.stderr)
×
55
    if value == 1.0 and error_model == ErrorModel.perfect:
×
56
        print("Warning: The error multiplier will be ignored, as perfect does not introduce read errors.", file=sys.stderr)
×
57
    return value
×
58

59

60
def dge_ratio_callback(value: float):
1✔
61
    if value < 0:
×
62
        raise typer.BadParameter("DGE ratio cannot be negative")
×
63
    if value >= 1:
×
64
        raise typer.BadParameter("DGE ratio must be smaller than 1")
×
65
    return value
×
66

67

68
def library_size_distribution_callback(value):
1✔
69
    value = value.split(",")
×
70
    if value[0] not in LibrarySizeDistribution.possible_distributions:
×
71
        raise typer.BadParameter(f"Library size distribution {value[0]} is not a valid distribution. Choose from {LibrarySizeDistribution.possible_distributions}")
×
72
    if value[0] == "negative_binomial":
×
73
        if len(value[1:]) > 2 or len(value[1:]) == 1:
×
74
            raise typer.BadParameter("Negative binomial distribution requires two parameters")
×
75
        if value[1:] == 2:
×
76
            try:
×
77
                value[1] = int(value[1])
×
78
                value[2] = float(value[2])
×
79
            except ValueError:
×
80
                raise typer.BadParameter(f"Negative binomial distribution requires n to be an integer and p to be a float. Given: n: {value[1]} and p: {value[2]}")
×
81
            if value[1] < 1:
×
82
                raise typer.BadParameter(f"Negative binomial distribution requires n to be greater than 1. Given: {value[1]}")
×
83
            if value[2] < 0 or value[2] > 1:
×
84
                raise typer.BadParameter(f"Negative binomial distribution requires p to be between 0 and 1. Given: {value[2]}")
×
85
            return LibrarySizeDistribution(value[0], nbin_n=value[1], nbin_p=value[2])
×
86
        return LibrarySizeDistribution(value[0])
×
87
    if value[0] == "poisson":
×
88
        try:
×
89
            value[1] = int(value[1])
×
90
        except ValueError:
×
91
            raise typer.BadParameter(f"Poisson distribution requires an integer parameter. Given: {value[1]}")
×
92
        if len(value[1:]) > 1:
×
93
            raise typer.BadParameter("Poisson distribution requires one parameter")
×
94
        if len(value[1:]) == 1:
×
95
            return LibrarySizeDistribution(value[0], poisson=value[1])
×
96
        return LibrarySizeDistribution(value[0])
×
97
    return LibrarySizeDistribution(value[0])
×
98

99

100
def rank_species_callback(value: Optional[str]):
1✔
101
    if value is None:
×
102
        return None
×
103
    try:
×
104
        return rank_distance[value]
×
105
    except KeyError:
×
106
        try:
×
107
            return float(value)
×
108
        except ValueError:
×
109
            raise typer.BadParameter(f"Rank {value} is not a valid rank or a valid float. Choose from {list(rank_distance.keys())} or specify a float.")
×
110

111

112
def limitthreads(value: int):
1✔
113
    if value == 0 or value == -1:
×
114
        value = min(os.cpu_count() or 1, 128)
×
115
        print(f"Info: Automatic thread detection, detected: {value} threads.", file=sys.stderr)
×
116
    elif value < 1:
×
117
        raise typer.BadParameter("The number of threads must be at least 1. Use 0 or -1 for automatic thread detection.")
×
118
    elif value > 128:
×
119
        print("Info: The number of threads is set to 128, which is the upper limit.", file=sys.stderr)
×
120
        value = 128
×
121
    return value
×
122

123

124
def check_outdir(outdir, force_creation):
1✔
125
    if os.path.exists(outdir) and os.path.isdir(outdir):
×
126
        if force_creation:
×
127
            print(f"Info: The output directory {outdir} already exists. Overwriting the existing directory because of force.", file=sys.stderr)
×
128
            shutil.rmtree(outdir)
×
129
            return
×
130
        raise typer.BadParameter(f"The output directory {outdir} already exists. Please choose a different directory or remove the existing one.")
×
131

132

133
@app.command()
1✔
134
def main(n_species: Annotated[int, typer.Option(callback=species_callback,
1✔
135
                                                help=f"Number of species to be drawn for the metatranscriptomic in silico dataset. Maximum value: {MAX_SPECIES}.")] = 20,
136
         n_orthogroups: Annotated[int,
137
                                  typer.Option(callback=orthogroups_callback,
138
                                               help=f"Number of orthologous groups to be drawn for the metatranscriptomic in silico dataset. Maximum value: {MAX_ORTHO_GROUPS}.")] = 1000,
139
         n_samples: Annotated[Tuple[int, int],
140
                              typer.Option(callback=sample_callback,
141
                                           help="Number of samples to be created for the metatranscriptomic in silico dataset"
142
                                           + "the first number is the number of samples for group 1 and"
143
                                           + "the second number is the number of samples for group 2"
144
                                           )] = [10, 10],
145
         outdir: Annotated[str, typer.Option(help="Output directory for the metatranscriptomic in silico dataset")] = "simulated_reads",
146
         max_phylo_distance: Annotated[Rank, typer.Option(callback=rank_species_callback, help="Maximimum mean phylogenetic distance for orthologous groups."
147
                                                          + "specify stricter limit, if you want to avoid orthologous groups"
148
                                                          + "with a more diverse phylogenetic distance.")] = None,
149
         min_identity: Annotated[float, typer.Option(help="Minimum mean sequence identity score for an orthologous groups."
150
                                                          + "Specify for more ")] = None,
151
         dge_ratio: Annotated[float, typer.Option(callback=dge_ratio_callback, help="Ratio of up and down regulated genes. Must be between 0 and 1."
152
                                                  "This is a random drawing process from normal distribution, so the actual ratio might vary.")] = 0.2,
153
         seed: Annotated[int, typer.Option(help="Seed for the sampling. Set for reproducibility")] = None,
154
         error_model: Annotated[ErrorModel, typer.Option(help="Sequencer model for the reads, use basic or perfect (no errors) for custom read length. Note that read lenght must be set when using basic or perfect.")] = ErrorModel.HiSeq,
155
         compressed: Annotated[bool, typer.Option(help="Compress the output fastq files")] = True,
156
         read_length: Annotated[int, typer.Option(help="Read length for the reads. Only available when using error_model basic or perfect")] = None,
157
         library_size: Annotated[int, typer.Option(help="Library size for the reads.")] = 100000,
158
         library_size_distribution: Annotated[str, typer.Option(help=f"Distribution for the library size. Select from: {LibrarySizeDistribution.possible_distributions}.")] = "uniform",
159
         group_orthology_level: Annotated[OrthologyLevel, typer.Option(help="Determines the level of orthology in groups. If you use this, use it with a lot of threads. Takes a long time.")] = OrthologyLevel.normal,
160
         threads: Annotated[int, typer.Option(callback=limitthreads, help="Number of threads to be used. Use 0 or -1 for auto etection. Uppler limit: 128.")] = 10,
161
         deseq_dispersion_parameter_a0: Annotated[float, typer.Option(callback=checknegative, help="For generating sampling: General dispersion estimation of DESeq2. Only set when you have knowledge of DESeq2 dispersion.")] = DESEQ2_FITTED_A0,
162
         deseq_dispersion_parameter_a1: Annotated[float, typer.Option(callback=checknegative, help="For generating sampling: Gene mean dependent dispersion of DESeq2. Only set when you have knowledge of DESeq2 dispersion.")] = DESEQ2_FITTED_A1,
163
         min_sparsity: Annotated[float, typer.Option(callback=dge_ratio_callback, help="Will archive the minimum specified sparcity by zeroing count values randomly.")] = 0,
164
         force_creation: Annotated[bool, typer.Option(help="Force the creation of the dataset, even if available orthogroups do not suffice for specified number of orthogroups.")] = False,
165
         min_overlap: Annotated[int, typer.Option(help="Minimum overlap for the blocks. Use this to evaluate overlap blocks, i.e. uninterrupted parts covered with reads that overlap on the genome. Accounts for kmer size.")] = 16,
166
         error_multiplier: Annotated[float, typer.Option(help="Error multiplier for the error model. This is a multiplier for the error rate of the sequencing model. Accepting alues between 0.01 and 100.")] = 1.0,
167
         _: Annotated[Optional[bool], typer.Option("--version", callback=version_callback)] = None,):
168

169
    if error_model == ErrorModel.basic or error_model == ErrorModel.perfect:
×
170
        if read_length is None:
×
171
            if force_creation:
×
172
                print("Info: Read length is not specified. Using default read length of 100, because --force-creation is set.", file=sys.stderr)
×
173
                read_length = 100
×
174
            else:
175
                raise typer.BadParameter('Read length must be specified when using --error-model "basic" or "perfect".')
×
176

177
    check_error_multiplier(error_multiplier, error_model)
×
178

179
    check_outdir(outdir, force_creation)
×
180

181
    library_size_distribution = library_size_distribution_callback(library_size_distribution)
×
182
    generate_dataset(n_species, n_orthogroups, n_samples, outdir, max_phylo_distance, min_identity, dge_ratio, seed,
×
183
                     error_model, compressed, read_length, library_size, library_size_distribution,
184
                     group_orthology_level, threads, deseq_dispersion_parameter_a0, deseq_dispersion_parameter_a1,
185
                     min_sparsity, force_creation, min_overlap, error_multiplier)
186

187

188
if __name__ == "__main__":
1✔
189
    app()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc