• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / marbel / 15446708427

04 Jun 2025 03:35PM UTC coverage: 40.056% (-0.3%) from 40.311%
15446708427

push

github

tensulin
fix: large number of threads may cause error

1 of 7 new or added lines in 1 file covered. (14.29%)

286 of 714 relevant lines covered (40.06%)

0.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

18.95
/src/marbel/cli.py
1
from typing import Optional, Tuple
1✔
2
from typing_extensions import Annotated
1✔
3

4
import typer
1✔
5

6
from marbel.presets import __version__, MAX_SPECIES, MAX_ORTHO_GROUPS, rank_distance, LibrarySizeDistribution, Rank, ErrorModel, DESEQ2_FITTED_A0, DESEQ2_FITTED_A1, OrthologyLevel
1✔
7
from marbel.core import generate_dataset
1✔
8

9
app = typer.Typer()
1✔
10

11

12
def version_callback(value: bool):
1✔
13
    if value:
×
14
        print(f"marbel Version: {__version__}")
×
15
        raise typer.Exit()
×
16

17

18
def species_callback(value: Optional[int]):
1✔
19
    if value > MAX_SPECIES:
×
20
        raise typer.BadParameter(f"The current maximum number of species is {MAX_SPECIES}")
×
21
    if value < 1:
×
22
        raise typer.BadParameter("The number of species is 1")
×
23
    return value
×
24

25

26
def orthogroups_callback(value: Optional[int]):
1✔
27
    if value > MAX_ORTHO_GROUPS:
×
28
        raise typer.BadParameter(f"The current maximum number of orthologous groups is {MAX_ORTHO_GROUPS}")
×
29
    if value < 1:
×
30
        raise typer.BadParameter("The number of orthologous groups is 1")
×
31
    return value
×
32

33

34
def checknegative(value: float):
1✔
35
    if value < 0:
×
36
        raise typer.BadParameter("Deseq2 dispersion values cannot be negative.")
×
37
    return value
×
38

39

40
def sample_callback(value: Optional[Tuple[int, int]]):
1✔
41
    if value[0] < 1 or value[1] < 1:
×
42
        raise typer.BadParameter("The minimum number of samples has to be 1")
×
43
    return value
×
44

45

46
def dge_ratio_callback(value: float):
1✔
47
    if value < 0:
×
48
        raise typer.BadParameter("DGE ratio cannot be negative")
×
49
    if value >= 1:
×
50
        raise typer.BadParameter("DGE ratio must be smaller than 1")
×
51
    return value
×
52

53

54
def library_size_distribution_callback(value):
1✔
55
    value = value.split(",")
×
56
    if value[0] not in LibrarySizeDistribution.possible_distributions:
×
57
        raise typer.BadParameter(f"Library size distribution {value[0]} is not a valid distribution. Choose from {LibrarySizeDistribution.possible_distributions}")
×
58
    if value[0] == "negative_binomial":
×
59
        if len(value[1:]) > 2 or len(value[1:]) == 1:
×
60
            raise typer.BadParameter("Negative binomial distribution requires two parameters")
×
61
        if value[1:] == 2:
×
62
            try:
×
63
                value[1] = int(value[1])
×
64
                value[2] = float(value[2])
×
65
            except ValueError:
×
66
                raise typer.BadParameter(f"Negative binomial distribution requires n to be an integer and p to be a float. Given: n: {value[1]} and p: {value[2]}")
×
67
            if value[1] < 1:
×
68
                raise typer.BadParameter(f"Negative binomial distribution requires n to be greater than 1. Given: {value[1]}")
×
69
            if value[2] < 0 or value[2] > 1:
×
70
                raise typer.BadParameter(f"Negative binomial distribution requires p to be between 0 and 1. Given: {value[2]}")
×
71
            return LibrarySizeDistribution(value[0], nbin_n=value[1], nbin_p=value[2])
×
72
        return LibrarySizeDistribution(value[0])
×
73
    if value[0] == "poisson":
×
74
        try:
×
75
            value[1] = int(value[1])
×
76
        except ValueError:
×
77
            raise typer.BadParameter(f"Poisson distribution requires an integer parameter. Given: {value[1]}")
×
78
        if len(value[1:]) > 1:
×
79
            raise typer.BadParameter("Poisson distribution requires one parameter")
×
80
        if len(value[1:]) == 1:
×
81
            return LibrarySizeDistribution(value[0], poisson=value[1])
×
82
        return LibrarySizeDistribution(value[0])
×
83
    return LibrarySizeDistribution(value[0])
×
84

85

86
def rank_species_callback(value: Optional[str]):
1✔
87
    if value is None:
×
88
        return None
×
89
    try:
×
90
        return rank_distance[value]
×
91
    except KeyError:
×
92
        try:
×
93
            return float(value)
×
94
        except ValueError:
×
95
            raise typer.BadParameter(f"Rank {value} is not a valid rank or a valid float. Choose from {list(rank_distance.keys())} or specify a float.")
×
96

97

98
def limitthreads(value: int):
1✔
NEW
99
    if value < 1:
×
NEW
100
        raise typer.BadParameter("The number of threads must be at least 1")
×
NEW
101
    if value > 128:
×
NEW
102
        print("Info: The number of threads is set to 128, which is the upper limit.")
×
NEW
103
        value = 128
×
NEW
104
    return value
×
105

106

107
@app.command()
1✔
108
def main(n_species: Annotated[int, typer.Option(callback=species_callback,
1✔
109
                                                help=f"Number of species to be drawn for the metatranscriptomic in silico dataset. Maximum value: {MAX_SPECIES}.")] = 20,
110
         n_orthogroups: Annotated[int,
111
                                  typer.Option(callback=orthogroups_callback,
112
                                               help=f"Number of orthologous groups to be drawn for the metatranscriptomic in silico dataset. Maximum value: {MAX_ORTHO_GROUPS}.")] = 1000,
113
         n_samples: Annotated[Tuple[int, int],
114
                              typer.Option(callback=sample_callback,
115
                                           help="Number of samples to be created for the metatranscriptomic in silico dataset"
116
                                           + "the first number is the number of samples for group 1 and"
117
                                           + "the second number is the number of samples for group 2"
118
                                           )] = [10, 10],
119
         outdir: Annotated[str, typer.Option(help="Output directory for the metatranscriptomic in silico dataset")] = "simulated_reads",
120
         max_phylo_distance: Annotated[Rank, typer.Option(callback=rank_species_callback, help="Maximimum mean phylogenetic distance for orthologous groups."
121
                                                          + "specify stricter limit, if you want to avoid orthologous groups"
122
                                                          + "with a more diverse phylogenetic distance.")] = None,
123
         min_identity: Annotated[float, typer.Option(help="Minimum mean sequence identity score for an orthologous groups."
124
                                                          + "Specify for more ")] = None,
125
         dge_ratio: Annotated[float, typer.Option(callback=dge_ratio_callback, help="Ratio of up and down regulated genes. Must be between 0 and 1."
126
                                                  "This is a random drawing process from normal distribution, so the actual ratio might vary.")] = 0.2,
127
         seed: Annotated[int, typer.Option(help="Seed for the sampling. Set for reproducibility")] = None,
128
         error_model: Annotated[ErrorModel, typer.Option(help="Sequencer model for the reads, use basic or perfect (no errors) for custom read length. Note that read lenght must be set when using basic or perfect.")] = ErrorModel.HiSeq,
129
         compressed: Annotated[bool, typer.Option(help="Compress the output fastq files")] = True,
130
         read_length: Annotated[int, typer.Option(help="Read length for the reads. Only available when using error_model basic or perfect")] = None,
131
         library_size: Annotated[int, typer.Option(help="Library size for the reads.")] = 100000,
132
         library_size_distribution: Annotated[str, typer.Option(help=f"Distribution for the library size. Select from: {LibrarySizeDistribution.possible_distributions}.")] = "uniform",
133
         group_orthology_level: Annotated[OrthologyLevel, typer.Option(help="Determines the level of orthology in groups. If you use this, use it with a lot of threads. Takes a long time.")] = OrthologyLevel.normal,
134
         threads: Annotated[int, typer.Option(callback=limitthreads, help="Number of threads to be used. Uppler limit: 128.")] = 10,
135
         deseq_dispersion_parameter_a0: Annotated[float, typer.Option(callback=checknegative, help="For generating sampling: General dispersion estimation of DESeq2. Only set when you have knowledge of DESeq2 dispersion.")] = DESEQ2_FITTED_A0,
136
         deseq_dispersion_parameter_a1: Annotated[float, typer.Option(callback=checknegative, help="For generating sampling: Gene mean dependent dispersion of DESeq2. Only set when you have knowledge of DESeq2 dispersion.")] = DESEQ2_FITTED_A1,
137
         min_sparsity: Annotated[float, typer.Option(callback=dge_ratio_callback, help="Will archive the minimum specified sparcity by zeroing count values randomly.")] = 0,
138
         force_creation: Annotated[bool, typer.Option(help="Force the creation of the dataset, even if available orthogroups do not suffice for specified number of orthogroups.")] = False,
139
         min_overlap: Annotated[int, typer.Option(help="Minimum overlap for the blocks. Use this to evaluate overlap blocks, i.e. uninterrupted parts covered with reads that overlap on the genome. Accounts for kmer size.")] = 16,
140
         _: Annotated[Optional[bool], typer.Option("--version", callback=version_callback)] = None,):
141

142
    if error_model == ErrorModel.basic or error_model == ErrorModel.perfect:
×
143
        if read_length is None:
×
144
            if force_creation:
×
145
                print("Warning: Read length is not specified. Using default read length of 100, because --force-creation is set.")
×
146
                read_length = 100
×
147
            else:
148
                raise typer.BadParameter('Read length must be specified when using --error-model "basic" or "perfect".')
×
149

150
    library_size_distribution = library_size_distribution_callback(library_size_distribution)
×
151
    generate_dataset(n_species, n_orthogroups, n_samples, outdir, max_phylo_distance, min_identity, dge_ratio, seed,
×
152
                     error_model, compressed, read_length, library_size, library_size_distribution,
153
                     group_orthology_level, threads, deseq_dispersion_parameter_a0, deseq_dispersion_parameter_a1,
154
                     min_sparsity, force_creation, min_overlap)
155

156

157
if __name__ == "__main__":
1✔
158
    app()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc