• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

clintval / krak / 25939513200

15 May 2026 08:21PM UTC coverage: 79.419% (-5.0%) from 84.378%
25939513200

push

github

web-flow
feat: support multiple files for FASTX IO (#3)

1 of 152 new or added lines in 2 files covered. (0.66%)

1 existing line in 1 file now uncovered.

1995 of 2512 relevant lines covered (79.42%)

1.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main.rs
1
//! An addicting set of Kraken-enhancing tools.
2
use std::path::PathBuf;
3
use std::process;
4

5
use anyhow::{Error, Result};
6
use clap::{CommandFactory, FromArgMatches, Parser, Subcommand};
7
use env_logger::Env;
8
use log::*;
9

10
use clap::builder::styling::{AnsiColor, Effects, Style, Styles};
11

12
use kraklib::annotate::AnnotateArgs;
13
use kraklib::filter::FilterArgs;
14
use kraklib::n2ref::N2RefArgs;
15
use kraklib::prep::PrepArgs;
16
use kraklib::report2tsv::Report2TsvArgs;
17

18
pub(crate) const HEADER: Style = AnsiColor::Green.on_default().effects(Effects::BOLD);
19
pub(crate) const USAGE: Style = AnsiColor::Green.on_default().effects(Effects::BOLD);
20
pub(crate) const LITERAL: Style = AnsiColor::Cyan.on_default().effects(Effects::BOLD);
21
pub(crate) const PLACEHOLDER: Style = AnsiColor::Cyan.on_default();
22
pub(crate) const ERROR: Style = AnsiColor::Red.on_default().effects(Effects::BOLD);
23
pub(crate) const VALID: Style = AnsiColor::Cyan.on_default().effects(Effects::BOLD);
24
pub(crate) const INVALID: Style = AnsiColor::Yellow.on_default().effects(Effects::BOLD);
25

26
/// Cargo's color style.
27
/// [source](https://github.com/crate-ci/clap-cargo/blob/master/src/style.rs)
28
pub(crate) const CARGO_STYLING: Styles = Styles::styled()
29
    .header(HEADER)
30
    .usage(USAGE)
31
    .literal(LITERAL)
32
    .placeholder(PLACEHOLDER)
33
    .error(ERROR)
34
    .valid(VALID)
35
    .invalid(INVALID);
36

37
/// An addicting set of Kraken-enhancing tools.
38
#[derive(Debug, Parser)]
39
#[command(
40
    author,
41
    version,
42
    color = clap::ColorChoice::Always,
43
    term_width = 80
44
)]
45
#[clap(styles = CARGO_STYLING)]
46
struct Cli {
47
    #[command(subcommand)]
48
    command: Commands,
49
}
50

51
#[derive(Debug, Subcommand)]
52
enum Commands {
53
    /// Convert FASTX/SAM/BAM/CRAM for Kraken classification. Single-end and
54
    /// interleaved FASTA/FASTQ are both accepted; interleaved is auto-detected
55
    /// from read names (`/1`/`/2` suffixes or matching mate names, including
56
    /// Casava 1.8+). Query-grouped SAM/BAM/CRAM is auto-detected from the
57
    /// `@HD` header. Pass `--per-record` to disable auto-detection.
58
    Prep(PrepCmd),
59
    /// Annotate SAM/BAM/CRAM records with Kraken classifications.
60
    Annotate(AnnotateCmd),
61
    /// Filter FASTX/SAM/BAM/CRAM records by Kraken classifications.
62
    Filter(FilterCmd),
63
    /// Revert aligned N-calls in SAM/BAM/CRAM to reference bases.
64
    #[command(name = "n2ref")]
65
    N2Ref(N2RefCmd),
66
    /// Convert a Kraken report to a flat TSV.
67
    #[command(name = "report2tsv")]
68
    Report2Tsv(Report2TsvCmd),
69
}
70

71
/// Arguments for the `prep` subcommand.
72
#[derive(Debug, Parser)]
73
#[command(about, rename_all = "kebab-case")]
74
struct PrepCmd {
75
    /// Input file(s). For FASTA/FASTQ, one file (single-end or interleaved) or
76
    /// two files (R1, R2 paired-end). For SAM/BAM/CRAM, exactly one file.
77
    /// Accepted positionally or as -i/--input. Use `-` or omit for stdin
78
    /// (single-input only).
79
    #[arg(index = 1, value_name = "FILE", num_args = 0..=2)]
80
    input_positional: Vec<PathBuf>,
81

82
    /// Input file(s) (flag form; equivalent to the positional arguments).
83
    /// Accepts 1 file (single-end / interleaved FASTA/FASTQ or SAM/BAM/CRAM)
84
    /// or 2 files (paired-end FASTA/FASTQ R1, R2). Use `-` for stdin
85
    /// (single-input only).
86
    #[arg(
87
        short = 'i',
88
        long = "input",
89
        value_name = "FILE",
90
        num_args = 1..=2,
91
        conflicts_with = "input_positional"
92
    )]
93
    input_flag: Vec<PathBuf>,
94

95
    /// Disable auto pair-detection. Each FASTQ/FASTA record (or SAM/BAM/CRAM
96
    /// primary record) is emitted as its own single-end template, even when
97
    /// the input looks interleaved (`/1`/`/2` suffixes or matching mate names)
98
    /// or query-grouped. Mutually exclusive with two-file input. Secondary
99
    /// (0x100) and supplementary (0x800) alignments are always dropped,
100
    /// regardless of this flag.
101
    #[arg(long)]
102
    per_record: bool,
103

104
    /// Output FASTA file. Use `-` or omit for stdout.
105
    #[arg(short = 'o', long, default_value = "-")]
106
    output: PathBuf,
107

108
    /// Reference FASTA for CRAM decompression (requires a `.fai` index alongside
109
    /// it). Not needed for CRAM files with embedded references or for
110
    /// FASTX/SAM/BAM input.
111
    #[arg(long)]
112
    cram_reference: Option<PathBuf>,
113
}
114

115
/// Arguments for the `annotate` subcommand.
116
#[derive(Debug, Parser)]
117
#[command(about, rename_all = "kebab-case")]
118
struct AnnotateCmd {
119
    /// Input SAM/BAM/CRAM file. Use `-` or omit for stdin.
120
    #[arg(short = 'i', long, default_value = "-")]
121
    input: PathBuf,
122

123
    /// Kraken classification output file (tab-delimited, 5 columns).
124
    /// Use `-` for stdin.
125
    #[arg(short = 'a', long)]
126
    assignments: PathBuf,
127

128
    /// Output SAM/BAM/CRAM file with `ti` tags added. Use `-` or omit for stdout.
129
    #[arg(short = 'o', long, default_value = "-")]
130
    output: PathBuf,
131

132
    /// Kraken report file. When provided, the taxonomy tree is embedded in the
133
    /// output header as a `@CO krak:report:` line, making `filter --kraken-report`
134
    /// unnecessary for downstream filtering. Mutually exclusive with --kraken-db.
135
    #[arg(short = 'R', long, conflicts_with = "kraken_db")]
136
    kraken_report: Option<PathBuf>,
137

138
    /// Kraken database directory. Reads the Kraken DB files in the directory
139
    /// and embeds the taxonomy tree in the output header, replacing the need
140
    /// for both `kraken2 --report` and `--kraken-report`. Mutually exclusive
141
    /// with --kraken-report.
142
    #[arg(short = 'd', long, conflicts_with = "kraken_report")]
143
    kraken_db: Option<PathBuf>,
144

145
    /// Load all assignments into memory before reading the input file. Use when
146
    /// the assignments file is substantially out of QNAME order relative to the
147
    /// input. By default, assignments are streamed record-by-record with a
148
    /// lookahead buffer: modest disorder (such as Kraken v1 multi-threaded
149
    /// output which flushes work-unit buffers in completion order) is handled
150
    /// automatically and the buffer grows only as deep as the actual disorder.
151
    /// Use `--unordered` when disorder is large or unpredictable (e.g. a
152
    /// completely unsorted file) which will load all assignments into memory
153
    /// upfront.
154
    #[arg(long)]
155
    unordered: bool,
156

157
    /// Reference FASTA for CRAM decompression (requires a `.fai` index alongside
158
    /// it). Not needed for CRAM files with embedded references or for SAM/BAM
159
    /// input.
160
    #[arg(long)]
161
    cram_reference: Option<PathBuf>,
162

163
    /// Number of bgzf compression worker threads for BAM output. At 1
164
    /// (default), one compressor + one writer thread pipeline with the
165
    /// annotation loop. Ignored for SAM (no compression) and CRAM (per-block
166
    /// codecs).
167
    #[arg(long, default_value_t = 1, value_parser = clap::value_parser!(u16).range(1..))]
168
    threads: u16,
169

170
    /// bgzf compression level (0-9) for BAM output. Ignored for SAM and CRAM.
171
    #[arg(long, default_value_t = 5, value_parser = clap::value_parser!(u8).range(0..=9))]
172
    compression_level: u8,
173
}
174

175
/// Arguments for the `filter` subcommand.
176
#[derive(Debug, Parser)]
177
#[command(about, rename_all = "kebab-case")]
178
struct FilterCmd {
179
    /// Input file(s). For SAM/BAM/CRAM, one file annotated with `ti` tags
180
    /// (via `krak annotate`). For FASTA/FASTQ, one file (single-end /
181
    /// interleaved) or two files (R1, R2 paired-end); supply taxon IDs with
182
    /// `--classifications` (-c). Accepted positionally or as -i/--input.
183
    /// Use `-` or omit for stdin (single-input only).
184
    #[arg(index = 1, value_name = "FILE", num_args = 0..=2)]
185
    input_positional: Vec<PathBuf>,
186

187
    /// Input file(s) (flag form; equivalent to the positional arguments).
188
    /// Accepts 1 file (SAM/BAM/CRAM or single-end / interleaved FASTA/FASTQ)
189
    /// or 2 files (paired-end FASTA/FASTQ R1, R2). Use `-` for stdin
190
    /// (single-input only).
191
    #[arg(
192
        short = 'i',
193
        long = "input",
194
        value_name = "FILE",
195
        num_args = 1..=2,
196
        conflicts_with = "input_positional"
197
    )]
198
    input_flag: Vec<PathBuf>,
199

200
    /// Output file(s). Format matches input: SAM/BAM/CRAM for alignment input,
201
    /// FASTA/FASTQ for FASTX input. One file for single-input mode, two files
202
    /// for paired-end FASTA/FASTQ. Use `-` or omit for stdout (single-output
203
    /// only).
204
    #[arg(short = 'o', long = "output", value_name = "FILE", num_args = 1..=2)]
205
    output: Vec<PathBuf>,
206

207
    /// Kraken report file. For SAM/BAM/CRAM, serves as fallback when no
208
    /// taxonomy tree is embedded in the header (embed one via `krak annotate
209
    /// --kraken-report` or `--kraken-db`). For FASTA/FASTQ, required when
210
    /// using `--allow-ancestors` or `--include-descendants`.
211
    #[arg(short = 'R', long)]
212
    kraken_report: Option<PathBuf>,
213

214
    /// TSV metrics output file. If omitted, metrics are only logged.
215
    #[arg(short = 'm', long)]
216
    metrics: Option<PathBuf>,
217

218
    /// Taxon IDs to retain (repeat for multiple).
219
    #[arg(short = 't', long = "taxon-id", required = true)]
220
    taxon_ids: Vec<u32>,
221

222
    /// Output file(s) for rejected records. Format matches input: SAM/BAM/CRAM
223
    /// for alignment input, FASTA/FASTQ for FASTX input. One file for
224
    /// single-input mode, two files for paired-end FASTA/FASTQ.
225
    #[arg(short = 'r', long, value_name = "FILE", num_args = 1..=2)]
226
    rejects: Vec<PathBuf>,
227

228
    /// Also keep reads assigned to ancestors of target taxon IDs.
229
    #[arg(short = 'a', long)]
230
    allow_ancestors: bool,
231

232
    /// Maximum edit distance for rescuing off-taxa reads. Not applicable to
233
    /// FASTA/FASTQ input (requires MD tag and CIGAR from alignment).
234
    #[arg(long)]
235
    rescue_max_edit_distance: Option<u32>,
236

237
    /// Maximum number of indel events allowed in off-taxa rescue. Not
238
    /// applicable to FASTA/FASTQ input.
239
    #[arg(long)]
240
    rescue_max_indels: Option<u32>,
241

242
    /// Maximum length of any single indel in off-taxa rescue. Not applicable
243
    /// to FASTA/FASTQ input.
244
    #[arg(long)]
245
    rescue_max_indel_length: Option<u32>,
246

247
    /// For every COUNT 'N' bases in a read, reduce the rescue-max-edit-distance
248
    /// threshold by 1 (integer division: 4 Ns with `--rescue-n-adjustment 5`
249
    /// reduces by 0; 5 Ns reduces by 1; 10 Ns reduces by 2). Must be >= 1 if
250
    /// set. Not applicable to FASTA/FASTQ input.
251
    #[arg(long)]
252
    rescue_n_adjustment: Option<u32>,
253

254
    /// Disable auto template grouping. Each FASTQ/FASTA record (or SAM/BAM/CRAM
255
    /// primary record) is filtered as its own single-record template, even when
256
    /// the input looks interleaved (`/1`/`/2` suffixes or matching mate names)
257
    /// or query-grouped. Required when the input BAM/SAM/CRAM is
258
    /// coordinate-sorted (use `samtools sort -n` to sort by queryname instead).
259
    /// Secondary (0x100) and supplementary (0x800) alignments; which normally
260
    /// share their primary's keep/reject decision and whose `ti` tags are
261
    /// ignored for classification; are instead classified independently by
262
    /// their own `ti` tag under this mode.
263
    #[arg(long)]
264
    per_record: bool,
265

266
    /// Kraken2 per-read classification output file. Required with FASTA/FASTQ input;
267
    /// mutually exclusive with SAM/BAM/CRAM input.
268
    #[arg(short = 'c', long)]
269
    classifications: Option<PathBuf>,
270

271
    /// Also keep reads classified at any taxon in the clade of each target
272
    /// (expands the target set to all descendants). Requires a Kraken report
273
    /// via `--kraken-report`; for SAM/BAM/CRAM a report embedded in the header
274
    /// (via `krak annotate`) also suffices.
275
    #[arg(short = 'd', long)]
276
    include_descendants: bool,
277

278
    /// Also keep reads classified as unclassified (taxon ID 0).
279
    #[arg(short = 'u', long)]
280
    include_unclassified: bool,
281

282
    /// Reference FASTA for CRAM decompression (requires a `.fai` index alongside
283
    /// it). Not needed for CRAM files with embedded references, SAM/BAM input,
284
    /// or FASTA/FASTQ input.
285
    #[arg(long)]
286
    cram_reference: Option<PathBuf>,
287

288
    /// Keep records that lack a `ti` tag (unannotated reads). By default,
289
    /// records with no `ti` tag are rejected. Not applicable to FASTA/FASTQ
290
    /// input (taxon IDs come from `--classifications`, not from SAM tags).
291
    #[arg(long)]
292
    keep_unannotated: bool,
293

294
    /// Load all assignments into memory before reading the input file. Use
295
    /// when the `--classifications` file is substantially out of QNAME order
296
    /// relative to the input. By default, assignments are streamed
297
    /// record-by-record with a lookahead buffer: modest disorder (such as
298
    /// Kraken v1 multi-threaded output which flushes work-unit buffers in
299
    /// completion order) is handled automatically and the buffer grows only
300
    /// as deep as the actual disorder. Use `--unordered` when disorder is
301
    /// large or unpredictable (e.g. a completely unsorted file) which will
302
    /// load all assignments into memory upfront. Only applies to FASTA/FASTQ
303
    /// input; SAM/BAM/CRAM input reads taxon IDs from `ti` tags.
304
    #[arg(long)]
305
    unordered: bool,
306

307
    /// Number of bgzf compression worker threads for `.gz` outputs. At 1
308
    /// (default), one compressor + one writer thread pipeline with the main
309
    /// filter loop; higher values fan compression out across more workers.
310
    /// SAM and CRAM outputs ignore this value.
311
    #[arg(long, default_value_t = 1, value_parser = clap::value_parser!(u16).range(1..))]
312
    threads: u16,
313

314
    /// bgzf compression level (0-9) for `.gz` outputs.
315
    #[arg(long, default_value_t = 5, value_parser = clap::value_parser!(u8).range(0..=9))]
316
    compression_level: u8,
317
}
318

319
/// Arguments for the `n2ref` subcommand.
320
#[derive(Debug, Parser)]
321
#[command(about, rename_all = "kebab-case")]
322
struct N2RefCmd {
323
    /// Input SAM/BAM/CRAM file. Accepted positionally (first argument) or as
324
    /// -i. Use `-` or omit for stdin.
325
    #[arg(index = 1, value_name = "INPUT")]
326
    input_positional: Option<PathBuf>,
327

328
    /// Output SAM/BAM/CRAM file. Accepted positionally (second argument) or
329
    /// as -o. Use `-` or omit for stdout.
330
    #[arg(index = 2, value_name = "OUTPUT")]
331
    output_positional: Option<PathBuf>,
332

333
    /// Input SAM/BAM/CRAM file (flag form; equivalent to the first positional
334
    /// argument). Use `-` for stdin.
335
    #[arg(short = 'i', long = "input", value_name = "FILE")]
336
    input_flag: Option<PathBuf>,
337

338
    /// Output SAM/BAM/CRAM file (flag form; equivalent to the second
339
    /// positional argument). Use `-` for stdout.
340
    #[arg(short = 'o', long = "output", value_name = "FILE")]
341
    output_flag: Option<PathBuf>,
342

343
    /// Reference FASTA file (must match SAM/BAM/CRAM reference dictionary).
344
    #[arg(short = 'r', long)]
345
    reference: PathBuf,
346

347
    /// Replacement base quality score for converted N-calls (0–93). Defaults to original quality.
348
    #[arg(short = 'q', long)]
349
    qual: Option<u8>,
350

351
    /// Number of bgzf compression worker threads for BAM output. At 1
352
    /// (default), one compressor + one writer thread pipeline with the
353
    /// n2ref loop. Ignored for SAM (no compression) and CRAM (per-block
354
    /// codecs).
355
    #[arg(long, default_value_t = 1, value_parser = clap::value_parser!(u16).range(1..))]
356
    threads: u16,
357

358
    /// bgzf compression level (0-9) for BAM output. Ignored for SAM and CRAM.
359
    #[arg(long, default_value_t = 5, value_parser = clap::value_parser!(u8).range(0..=9))]
360
    compression_level: u8,
361
}
362

363
/// Arguments for the `report2tsv` subcommand.
364
#[derive(Debug, Parser)]
365
#[command(about, rename_all = "kebab-case")]
366
struct Report2TsvCmd {
367
    /// Input Kraken report file. Accepted positionally (first argument) or as
368
    /// -i. Use `-` or omit for stdin.
369
    #[arg(index = 1, value_name = "INPUT")]
370
    input_positional: Option<PathBuf>,
371

372
    /// Output TSV file. Accepted positionally (second argument) or as -o.
373
    /// Use `-` or omit for stdout.
374
    #[arg(index = 2, value_name = "OUTPUT")]
375
    output_positional: Option<PathBuf>,
376

377
    /// Input Kraken report file (flag form; equivalent to the first positional
378
    /// argument). Use `-` for stdin.
379
    #[arg(short = 'i', long = "input", value_name = "FILE")]
380
    input_flag: Option<PathBuf>,
381

382
    /// Output TSV file (flag form; equivalent to the second positional
383
    /// argument). Use `-` for stdout.
384
    #[arg(short = 'o', long = "output", value_name = "FILE")]
385
    output_flag: Option<PathBuf>,
386
}
387

388
/// Replace `-` with the given pseudo-path (`/dev/stdin` or `/dev/stdout`).
389
fn resolve_dash(path: PathBuf, default: &'static str) -> PathBuf {
×
390
    if path.as_os_str() == "-" {
×
391
        PathBuf::from(default)
×
392
    } else {
393
        path
×
394
    }
395
}
396

397
fn resolve_input(path: PathBuf) -> PathBuf {
×
398
    resolve_dash(path, "/dev/stdin")
×
399
}
400

401
fn resolve_output(path: PathBuf) -> PathBuf {
×
402
    resolve_dash(path, "/dev/stdout")
×
403
}
404

405
/// Pick exactly one of a positional arg or its `--flag` equivalent. Defaults
406
/// to the given pseudo-path (`/dev/stdin` / `/dev/stdout`) if both are absent;
407
/// errors out the process if both are provided.
408
fn pick_one(
×
409
    positional: Option<PathBuf>,
410
    flag: Option<PathBuf>,
411
    flag_label: &str,
412
    default: &'static str,
413
) -> PathBuf {
414
    match (positional, flag) {
×
415
        (Some(p), None) | (None, Some(p)) => resolve_dash(p, default),
×
416
        (None, None) => PathBuf::from(default),
×
417
        (Some(_), Some(_)) => {
418
            error!("cannot specify {flag_label} both positionally and as a flag");
×
419
            process::exit(1);
×
420
        }
421
    }
422
}
423

424
/// Resolve a 1..=2 output list to a `(output, output2)` pair. Errors out if
425
/// `paired` and the list has 1 element (or vice versa). Defaults to a single
426
/// stdout pseudo-path when the list is empty.
NEW
427
fn pick_output_pair(output: Vec<PathBuf>, paired: bool) -> (PathBuf, Option<PathBuf>) {
×
NEW
428
    match (output.len(), paired) {
×
NEW
429
        (0, false) => (PathBuf::from("/dev/stdout"), None),
×
430
        (0, true) => {
NEW
431
            error!("paired input requires two output paths via -o/--output");
×
NEW
432
            process::exit(1);
×
433
        }
NEW
434
        (1, false) => (resolve_output(output.into_iter().next().unwrap()), None),
×
435
        (1, true) => {
NEW
436
            error!("paired input requires two output paths via -o/--output");
×
NEW
437
            process::exit(1);
×
438
        }
439
        (2, false) => {
NEW
440
            error!("two output paths supplied to -o/--output but input is single-end");
×
NEW
441
            process::exit(1);
×
442
        }
443
        (2, true) => {
NEW
444
            let mut it = output.into_iter();
×
NEW
445
            let o1 = resolve_output(it.next().unwrap());
×
NEW
446
            let o2 = it.next().unwrap();
×
NEW
447
            if o2.as_os_str() == "-" {
×
NEW
448
                error!("R2 output cannot be stdout (`-`)");
×
NEW
449
                process::exit(1);
×
450
            }
NEW
451
            (o1, Some(o2))
×
452
        }
453
        _ => unreachable!("clap enforces num_args = 1..=2"),
454
    }
455
}
456

457
/// Resolve a 0..=2 rejects list to `(rejects, rejects2)`. Same matching rules
458
/// as outputs, except an empty list means "no rejects file".
NEW
459
fn pick_rejects_pair(rejects: Vec<PathBuf>, paired: bool) -> (Option<PathBuf>, Option<PathBuf>) {
×
NEW
460
    match (rejects.len(), paired) {
×
NEW
461
        (0, _) => (None, None),
×
NEW
462
        (1, false) => (
×
NEW
463
            Some(resolve_output(rejects.into_iter().next().unwrap())),
×
NEW
464
            None,
×
465
        ),
466
        (1, true) => {
NEW
467
            error!("paired input requires two reject paths via -r/--rejects");
×
NEW
468
            process::exit(1);
×
469
        }
470
        (2, false) => {
NEW
471
            error!("two reject paths supplied to -r/--rejects but input is single-end");
×
NEW
472
            process::exit(1);
×
473
        }
474
        (2, true) => {
NEW
475
            let mut it = rejects.into_iter();
×
NEW
476
            let r1 = resolve_output(it.next().unwrap());
×
NEW
477
            let r2 = it.next().unwrap();
×
NEW
478
            if r2.as_os_str() == "-" {
×
NEW
479
                error!("R2 rejects cannot be stdout (`-`)");
×
NEW
480
                process::exit(1);
×
481
            }
NEW
482
            (Some(r1), Some(r2))
×
483
        }
484
        _ => unreachable!("clap enforces num_args = 1..=2"),
485
    }
486
}
487

488
/// Resolve a 1..=2 input list (positional or `--input`/`-i` flag form) to a
489
/// `(input, input2)` pair. `clap`'s `conflicts_with` guarantees the two
490
/// sources are mutually exclusive, so we pick whichever is non-empty.
491
/// Defaults to a single stdin pseudo-path when both are empty. Errors out
492
/// if `per_record` is set with two inputs.
NEW
493
fn pick_input_pair(
×
494
    positional: Vec<PathBuf>,
495
    flag: Vec<PathBuf>,
496
    flag_label: &str,
497
    per_record: bool,
498
) -> (PathBuf, Option<PathBuf>) {
NEW
499
    let files = if !positional.is_empty() {
×
NEW
500
        positional
×
501
    } else {
NEW
502
        flag
×
503
    };
NEW
504
    match files.len() {
×
NEW
505
        0 => (PathBuf::from("/dev/stdin"), None),
×
NEW
506
        1 => (resolve_input(files.into_iter().next().unwrap()), None),
×
507
        2 => {
NEW
508
            if per_record {
×
NEW
509
                error!(
×
510
                    "cannot use --per-record with two-file paired input \
511
                     (positional or {flag_label})"
512
                );
NEW
513
                process::exit(1);
×
514
            }
NEW
515
            let mut it = files.into_iter();
×
NEW
516
            let r1 = resolve_input(it.next().unwrap());
×
NEW
517
            let r2 = it.next().unwrap();
×
NEW
518
            if r2.as_os_str() == "-" {
×
NEW
519
                error!("R2 input cannot be stdin (`-`)");
×
NEW
520
                process::exit(1);
×
521
            }
NEW
522
            (r1, Some(r2))
×
523
        }
524
        _ => unreachable!("clap enforces num_args = 0..=2 / 1..=2"),
525
    }
526
}
527

528
/// Main binary entrypoint.
529
#[cfg(not(tarpaulin_include))]
530
fn main() -> Result<(), Error> {
531
    let env = Env::default().default_filter_or("info");
532
    env_logger::Builder::from_env(env).init();
533

534
    let matches = Cli::command().term_width(80).get_matches();
535
    let cli = Cli::from_arg_matches(&matches).unwrap_or_else(|e| e.exit());
536

537
    let result = match cli.command {
538
        Commands::Prep(cmd) => {
539
            let (input, input2) =
540
                pick_input_pair(cmd.input_positional, cmd.input_flag, "-i", cmd.per_record);
541
            kraklib::run_prep(PrepArgs {
542
                input,
543
                input2,
544
                per_record: cmd.per_record,
545
                output: resolve_output(cmd.output),
546
                cram_reference: cmd.cram_reference,
547
            })
548
        }
549
        Commands::Annotate(cmd) => kraklib::run_annotate(AnnotateArgs {
550
            input: resolve_input(cmd.input),
551
            assignments: resolve_input(cmd.assignments),
552
            output: resolve_output(cmd.output),
553
            kraken_report: cmd.kraken_report,
554
            kraken_db: cmd.kraken_db,
555
            unordered: cmd.unordered,
556
            cram_reference: cmd.cram_reference,
557
            threads: cmd.threads as usize,
558
            compression_level: cmd.compression_level as u32,
559
        }),
560
        Commands::Filter(cmd) => {
561
            let (input, input2) =
562
                pick_input_pair(cmd.input_positional, cmd.input_flag, "-i", cmd.per_record);
563
            let (output, output2) = pick_output_pair(cmd.output, input2.is_some());
564
            let (rejects, rejects2) = pick_rejects_pair(cmd.rejects, input2.is_some());
565
            kraklib::run_filter(FilterArgs {
566
                input,
567
                input2,
568
                output,
569
                output2,
570
                kraken_report: cmd.kraken_report,
571
                metrics: cmd.metrics,
572
                taxon_ids: cmd.taxon_ids.into_iter().collect(),
573
                rejects,
574
                rejects2,
575
                allow_ancestors: cmd.allow_ancestors,
576
                rescue_max_edit_distance: cmd.rescue_max_edit_distance,
577
                rescue_max_indels: cmd.rescue_max_indels,
578
                rescue_max_indel_length: cmd.rescue_max_indel_length,
579
                rescue_n_adjustment: cmd.rescue_n_adjustment,
580
                per_record: cmd.per_record,
581
                classifications: cmd.classifications,
582
                include_descendants: cmd.include_descendants,
583
                include_unclassified: cmd.include_unclassified,
584
                cram_reference: cmd.cram_reference,
585
                keep_unannotated: cmd.keep_unannotated,
586
                unordered: cmd.unordered,
587
                threads: cmd.threads as usize,
588
                compression_level: cmd.compression_level as u32,
589
            })
590
        }
591
        Commands::N2Ref(cmd) => {
592
            let input = pick_one(cmd.input_positional, cmd.input_flag, "-i", "/dev/stdin");
593
            let output = pick_one(cmd.output_positional, cmd.output_flag, "-o", "/dev/stdout");
594
            kraklib::run_n2ref(N2RefArgs {
595
                input,
596
                output,
597
                reference: cmd.reference,
598
                qual: cmd.qual,
599
                threads: cmd.threads as usize,
600
                compression_level: cmd.compression_level as u32,
601
            })
602
        }
603
        Commands::Report2Tsv(cmd) => {
604
            let input = pick_one(cmd.input_positional, cmd.input_flag, "-i", "/dev/stdin");
605
            let output = pick_one(cmd.output_positional, cmd.output_flag, "-o", "/dev/stdout");
606
            kraklib::run_report2tsv(Report2TsvArgs { input, output })
607
        }
608
    };
609

610
    match result {
611
        Ok(()) => process::exit(0),
612
        Err(e) => {
613
            error!("{e:#}");
614
            process::exit(1);
615
        }
616
    }
617
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc