• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / marbel / 15168495052

21 May 2025 05:18PM UTC coverage: 55.975% (+21.8%) from 34.142%
15168495052

push

github

web-flow
Merge pull request #51 from jlab/dev

Dev

975 of 1606 new or added lines in 19 files covered. (60.71%)

2 existing lines in 1 file now uncovered.

1096 of 1958 relevant lines covered (55.98%)

0.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

19.32
/src/marbel/cli.py
1
from typing import Optional, Tuple
1✔
2
from typing_extensions import Annotated
1✔
3

4
import typer
1✔
5

6
from marbel.presets import __version__, MAX_SPECIES, MAX_ORTHO_GROUPS, rank_distance, LibrarySizeDistribution, Rank, ErrorModel, DESEQ2_FITTED_A0, DESEQ2_FITTED_A1, OrthologyLevel
1✔
7
from marbel.core import generate_dataset
1✔
8

9
app = typer.Typer()
1✔
10

11

12
def version_callback(value: bool):
1✔
13
    if value:
×
14
        print(f"marbel Version: {__version__}")
×
15
        raise typer.Exit()
×
16

17

18
def species_callback(value: Optional[int]):
1✔
19
    if value > MAX_SPECIES:
×
20
        raise typer.BadParameter(f"The current maximum number of species is {MAX_SPECIES}")
×
21
    if value < 1:
×
22
        raise typer.BadParameter("The number of species is 1")
×
23
    return value
×
24

25

26
def orthogroups_callback(value: Optional[int]):
1✔
27
    if value > MAX_ORTHO_GROUPS:
×
28
        raise typer.BadParameter(f"The current maximum number of orthologous groups is {MAX_ORTHO_GROUPS}")
×
29
    if value < 1:
×
30
        raise typer.BadParameter("The number of orthologous groups is 1")
×
31
    return value
×
32

33

34
def checknegative(value: float):
1✔
35
    if value < 0:
×
36
        raise typer.BadParameter("Deseq2 dispersion values cannot be negative.")
×
37
    return value
×
38

39

40
def sample_callback(value: Optional[Tuple[int, int]]):
1✔
41
    if value[0] < 1 or value[1] < 1:
×
42
        raise typer.BadParameter("The minimum number of samples has to be 1")
×
43
    return value
×
44

45

46
def dge_ratio_callback(value: float):
1✔
47
    if value < 0:
×
48
        raise typer.BadParameter("Ratio cannot be negative")
×
49
    if value >= 1:
×
50
        raise typer.BadParameter("DGE ratio must be smaller than 1")
×
51
    return value
×
52

53

54
def library_size_distribution_callback(value):
1✔
55
    value = value.split(",")
×
56
    if value[0] not in LibrarySizeDistribution.possible_distributions:
×
57
        raise typer.BadParameter(f"Library size distribution {value[0]} is not a valid distribution. Choose from {LibrarySizeDistribution.possible_distributions}")
×
58
    if value[0] == "negative_binomial":
×
59
        if len(value[1:]) > 2 or len(value[1:]) == 1:
×
60
            raise typer.BadParameter("Negative binomial distribution requires two parameters")
×
61
        if value[1:] == 2:
×
62
            try:
×
63
                value[1] = int(value[1])
×
64
                value[2] = float(value[2])
×
65
            except ValueError:
×
66
                raise typer.BadParameter(f"Negative binomial distribution requires n to be an integer and p to be a float. Given: n: {value[1]} and p: {value[2]}")
×
67
            if value[1] < 1:
×
68
                raise typer.BadParameter(f"Negative binomial distribution requires n to be greater than 1. Given: {value[1]}")
×
69
            if value[2] < 0 or value[2] > 1:
×
70
                raise typer.BadParameter(f"Negative binomial distribution requires p to be between 0 and 1. Given: {value[2]}")
×
71
            return LibrarySizeDistribution(value[0], nbin_n=value[1], nbin_p=value[2])
×
72
        return LibrarySizeDistribution(value[0])
×
73
    if value[0] == "poisson":
×
74
        try:
×
75
            value[1] = int(value[1])
×
76
        except ValueError:
×
77
            raise typer.BadParameter(f"Poisson distribution requires an integer parameter. Given: {value[1]}")
×
78
        if len(value[1:]) > 1:
×
79
            raise typer.BadParameter("Poisson distribution requires one parameter")
×
80
        if len(value[1:]) == 1:
×
81
            return LibrarySizeDistribution(value[0], poisson=value[1])
×
82
        return LibrarySizeDistribution(value[0])
×
83
    return LibrarySizeDistribution(value[0])
×
84

85

86
def rank_species_callback(value: Optional[str]):
1✔
87
    if value is None:
×
88
        return None
×
89
    try:
×
90
        return rank_distance[value]
×
91
    except KeyError:
×
92
        try:
×
93
            return float(value)
×
94
        except ValueError:
×
95
            raise typer.BadParameter(f"Rank {value} is not a valid rank or a valid float. Choose from {list(rank_distance.keys())} or specify a float.")
×
96

97

98
@app.command()
1✔
99
def main(n_species: Annotated[int, typer.Option(callback=species_callback,
1✔
100
                                                help=f"Number of species to be drawn for the metatranscriptomic in silico dataset. Maximum value: {MAX_SPECIES}.")] = 20,
101
         n_orthogroups: Annotated[int,
102
                                  typer.Option(callback=orthogroups_callback,
103
                                               help=f"Number of orthologous groups to be drawn for the metatranscriptomic in silico dataset. Maximum value: {MAX_ORTHO_GROUPS}.")] = 1000,
104
         n_samples: Annotated[Tuple[int, int],
105
                              typer.Option(callback=sample_callback,
106
                                           help="Number of samples to be created for the metatranscriptomic in silico dataset"
107
                                           + "the first number is the number of samples for group 1 and"
108
                                           + "the second number is the number of samples for group 2"
109
                                           )] = [10, 10],
110
         outdir: Annotated[str, typer.Option(help="Output directory for the metatranscriptomic in silico dataset")] = "simulated_reads",
111
         max_phylo_distance: Annotated[Rank, typer.Option(callback=rank_species_callback, help="Maximimum mean phylogenetic distance for orthologous groups."
112
                                                          + "specify stricter limit, if you want to avoid orthologous groups"
113
                                                          + "with a more diverse phylogenetic distance.")] = None,
114
         min_identity: Annotated[float, typer.Option(help="Minimum mean sequence identity score for an orthologous groups."
115
                                                          + "Specify for more ")] = None,
116
         dge_ratio: Annotated[float, typer.Option(callback=dge_ratio_callback, help="Ratio of up and down regulated genes. Must be between 0 and 1."
117
                                                  "This is a random drawing process from normal distribution, so the actual ratio might vary.")] = 0.2,
118
         seed: Annotated[int, typer.Option(help="Seed for the sampling. Set for reproducibility")] = None,
119
         error_model: Annotated[ErrorModel, typer.Option(help="Sequencer model for the reads, use basic or perfect (no errors) for custom read length. Note that read lenght must be set when using basic or perfect.")] = ErrorModel.HiSeq,
120
         compressed: Annotated[bool, typer.Option(help="Compress the output fastq files")] = True,
121
         read_length: Annotated[int, typer.Option(help="Read length for the reads. Only available when using error_model basic or perfect")] = None,
122
         library_size: Annotated[int, typer.Option(help="Library size for the reads.")] = 100000,
123
         library_size_distribution: Annotated[str, typer.Option(help=f"Distribution for the library size. Select from: {LibrarySizeDistribution.possible_distributions}.")] = "uniform",
124
         group_orthology_level: Annotated[OrthologyLevel, typer.Option(help="Determines the level of orthology in groups. If you use this, use it with a lot of threads. Takes a long time.")] = OrthologyLevel.normal,
125
         threads: Annotated[int, typer.Option(help="Number of threads to be used")] = 10,
126
         deseq_dispersion_parameter_a0: Annotated[float, typer.Option(callback=checknegative, help="For generating sampling: General dispersion estimation of DESeq2. Only set when you have knowledge of DESeq2 dispersion.")] = DESEQ2_FITTED_A0,
127
         deseq_dispersion_parameter_a1: Annotated[float, typer.Option(callback=checknegative, help="For generating sampling: Gene mean dependent dispersion of DESeq2. Only set when you have knowledge of DESeq2 dispersion.")] = DESEQ2_FITTED_A1,
128
         min_sparsity: Annotated[float, typer.Option(help="Will archive the minimum specified sparcity by zeroing count values randomly.")] = 0,
129
         force_creation: Annotated[bool, typer.Option(help="Force the creation of the dataset, even if available orthogroups do not suffice for specified number of orthogroups.")] = False,
130
         min_overlap: Annotated[int, typer.Option(help="Minimum overlap for the blocks. Use this to evaluate overlap blocks, i.e. uninterrupted parts covered with reads that overlap on the genome. Accounts for kmer size.")] = 16,
131
         _: Annotated[Optional[bool], typer.Option("--version", callback=version_callback)] = None,):
132

NEW
133
    if error_model == ErrorModel.basic or error_model == ErrorModel.perfect:
×
NEW
134
        if read_length is None:
×
NEW
135
            if force_creation:
×
NEW
136
                print("Warning: Read length is not specified. Using default read length of 100, because --force-creation is set.")
×
NEW
137
                read_length = 100
×
138
            else:
NEW
139
                raise typer.BadParameter('Read length must be specified when using --error-model "basic" or "perfect".')
×
140

141
    library_size_distribution = library_size_distribution_callback(library_size_distribution)
×
NEW
142
    generate_dataset(n_species, n_orthogroups, n_samples, outdir, max_phylo_distance, min_identity, dge_ratio, seed,
×
143
                     error_model, compressed, read_length, library_size, library_size_distribution,
144
                     group_orthology_level, threads, deseq_dispersion_parameter_a0, deseq_dispersion_parameter_a1,
145
                     min_sparsity, force_creation, min_overlap)
146

147

148
if __name__ == "__main__":
1✔
149
    app()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc