• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

clintval / krak / 25939513200

15 May 2026 08:21PM UTC coverage: 79.419% (-5.0%) from 84.378%
25939513200

push

github

web-flow
feat: support multiple files for FASTX IO (#3)

1 of 152 new or added lines in 2 files covered. (0.66%)

1 existing line in 1 file now uncovered.

1995 of 2512 relevant lines covered (79.42%)

1.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.36
/src/lib/annotate.rs
1
//! Annotate SAM/BAM/CRAM records with Kraken classifications.
2

3
use std::io::{BufReader, Write};
4
use std::path::Path;
5

6
use ahash::AHashMap;
7
use anyhow::{Context, Result};
8
use log::info;
9
use noodles::sam::alignment::io::Write as AlignmentWrite;
10

11
use crate::kraken_report::KrakenReportEntry;
12
use crate::kraken_report_embed::entries_to_header_comment;
13
use crate::kraken_result::{KrakenResult, StreamingLookup};
14
use crate::kraken_taxonomy::{read_taxo_k2d, read_taxonomy_dmp};
15
use crate::AlignmentFormat;
16

17
/// Arguments for the `annotate` command.
18
pub struct AnnotateArgs {
19
    /// Input SAM/BAM/CRAM file.
20
    pub input: std::path::PathBuf,
21
    /// Kraken classification output file.
22
    pub assignments: std::path::PathBuf,
23
    /// Output SAM/BAM/CRAM file with `ti` tags.
24
    pub output: std::path::PathBuf,
25
    /// Optional Kraken report file; embeds the taxonomy tree in the output
26
    /// header as a `@CO krak:report:<base64>` line. Mutually exclusive with
27
    /// `kraken_db`.
28
    pub kraken_report: Option<std::path::PathBuf>,
29
    /// Optional Kraken database directory; reads the DB and embeds the
30
    /// full taxonomy tree in the output header. Mutually exclusive with
31
    /// `kraken_report`.
32
    pub kraken_db: Option<std::path::PathBuf>,
33
    /// When `true`, load all assignments into a HashMap before reading the
34
    /// input file (order-independent). When `false` (default), stream
35
    /// assignments in lock-step with input records (requires both to be at
36
    /// least weakly in QNAME order).
37
    pub unordered: bool,
38
    /// Optional reference FASTA for CRAM decompression (requires `.fai` index).
39
    pub cram_reference: Option<std::path::PathBuf>,
40
    /// Number of bgzf compression worker threads for BAM output. Default 1
41
    /// (one compressor + one writer thread pipelined with the annotation loop).
42
    /// Ignored for SAM (no compression) and CRAM (per-block codecs).
43
    pub threads: usize,
44
    /// bgzf compression level (0-9) for BAM output. Default 5. Ignored for
45
    /// SAM (no compression) and CRAM (per-block codecs).
46
    pub compression_level: u32,
47
}
48

49
/// Source of taxon-id assignments for the annotation loop.
50
enum Source<'a> {
51
    /// All assignments pre-loaded into a HashMap (order-independent lookup).
52
    Map(&'a AHashMap<String, u32>),
53
    /// Path to a Kraken assignments file streamed in lock-step with the input.
54
    Stream(&'a Path),
55
}
56

57
/// Run the `annotate` command.
58
///
59
/// By default, assignments are streamed record-by-record in lock-step with the
60
/// input, requiring both to be in the same QNAME order (the natural output of a
61
/// queryname-sorted SAM/BAM/CRAM -> `prep` -> `kraken2` pipeline). With
62
/// `args.unordered = true`, all assignments are loaded into a `HashMap` first
63
/// so order does not matter at all.
64
pub fn run_annotate(args: AnnotateArgs) -> Result<()> {
1✔
65
    let header_comments: Vec<String> = if let Some(db_path) = &args.kraken_db {
1✔
66
        let taxo_path = db_path.join("taxo.k2d");
×
67
        let entries = if taxo_path.exists() {
×
68
            info!("Reading taxonomy from: {}", taxo_path.display());
×
69
            let e = read_taxo_k2d(&taxo_path)?;
×
70
            info!("Loaded {} taxonomy nodes from taxo.k2d.", e.len());
×
71
            e
×
72
        } else {
73
            let nodes_path = db_path.join("taxonomy").join("nodes.dmp");
×
74
            info!("Reading taxonomy from: {}", nodes_path.display());
×
75
            let e = read_taxonomy_dmp(db_path)?;
×
76
            info!("Loaded {} taxonomy nodes from taxonomy/nodes.dmp.", e.len());
×
77
            e
×
78
        };
79
        entries_to_header_comment(&entries)?
×
80
    } else if let Some(report_path) = &args.kraken_report {
2✔
81
        info!("Loading Kraken report from: {}", report_path.display());
3✔
82
        let entries = KrakenReportEntry::read_file(report_path)?;
2✔
83
        entries_to_header_comment(&entries)?
2✔
84
    } else {
85
        Vec::new()
2✔
86
    };
87

88
    let map = if args.unordered {
3✔
89
        info!(
×
90
            "Loading Kraken assignments (unordered) from: {}",
91
            args.assignments.display()
92
        );
93
        let m = KrakenResult::load_as_map(&args.assignments)?;
2✔
94
        info!("Loaded {} Kraken assignments.", m.len());
3✔
95
        Some(m)
1✔
96
    } else {
UNCOV
97
        info!(
×
98
            "Streaming Kraken assignments from: {}",
99
            args.assignments.display()
100
        );
101
        None
1✔
102
    };
103
    let source = map
104
        .as_ref()
105
        .map_or(Source::Stream(&args.assignments), Source::Map);
1✔
106

107
    let fmt = AlignmentFormat::from_path(&args.input);
1✔
108
    // Pseudo-paths (/dev/stdin, /dev/fd/N) default to BAM by extension, but
109
    // the byte content may be SAM/CRAM. Sniff and thread the buffered reader
110
    // through; re-opening the pseudo-path after sniff would lose the bytes
111
    // already read off the shared file description.
112
    if matches!(fmt, AlignmentFormat::Bam) && crate::is_pseudo_path(&args.input) {
2✔
113
        let (sniffed, gzipped, peek_reader) = crate::sniff_input(&args.input)
3✔
114
            .with_context(|| format!("failed to open input: {}", args.input.display()))?;
1✔
115
        return match sniffed {
1✔
116
            crate::SniffedFormat::Sam => {
117
                let r = crate::into_text_bufread(peek_reader, gzipped);
2✔
118
                let mut reader = noodles::sam::io::Reader::new(r);
1✔
119
                annotate_sam_from_reader(&mut reader, &args.output, source, &header_comments)
2✔
120
            }
121
            crate::SniffedFormat::Cram => {
122
                use noodles::cram;
123
                let mut reader = cram::io::reader::Builder::default()
3✔
124
                    .set_reference_sequence_repository(crate::build_fasta_repo(
3✔
125
                        args.cram_reference.as_deref(),
1✔
126
                    )?)
127
                    .build_from_reader(peek_reader);
1✔
128
                annotate_cram_from_reader(
1✔
129
                    &mut reader,
130
                    &args.output,
1✔
131
                    source,
132
                    &header_comments,
1✔
133
                    args.cram_reference.as_deref(),
1✔
134
                )
135
            }
136
            crate::SniffedFormat::Bam => {
137
                use noodles::bam;
138
                use noodles::bgzf;
139
                let mut reader = bam::io::Reader::from(bgzf::io::Reader::new(peek_reader));
2✔
140
                annotate_bam_from_reader(
1✔
141
                    &mut reader,
142
                    &args.output,
1✔
143
                    source,
144
                    &header_comments,
1✔
145
                    args.threads,
1✔
146
                    args.compression_level,
1✔
147
                )
148
            }
149
            _ => annotate_bam(
×
150
                &args.input,
×
151
                &args.output,
×
152
                source,
153
                &header_comments,
×
154
                args.threads,
×
155
                args.compression_level,
×
156
            ),
157
        };
158
    }
159

160
    match fmt {
1✔
161
        AlignmentFormat::Bam => annotate_bam(
162
            &args.input,
1✔
163
            &args.output,
1✔
164
            source,
165
            &header_comments,
1✔
166
            args.threads,
1✔
167
            args.compression_level,
1✔
168
        ),
169
        AlignmentFormat::Cram => annotate_cram(
170
            &args.input,
1✔
171
            &args.output,
1✔
172
            source,
173
            &header_comments,
1✔
174
            args.cram_reference.as_deref(),
1✔
175
        ),
176
        AlignmentFormat::Sam => annotate_sam(&args.input, &args.output, source, &header_comments),
2✔
177
    }
178
}
179

180
fn annotate_bam(
1✔
181
    input: &Path,
182
    output: &Path,
183
    source: Source<'_>,
184
    header_comments: &[String],
185
    threads: usize,
186
    compression_level: u32,
187
) -> Result<()> {
188
    let mut reader = crate::open_bam_reader(input)?;
1✔
189
    annotate_bam_from_reader(
190
        &mut reader,
191
        output,
192
        source,
193
        header_comments,
194
        threads,
195
        compression_level,
196
    )
197
}
198

199
fn annotate_bam_from_reader<R: std::io::Read>(
2✔
200
    reader: &mut noodles::bam::io::Reader<R>,
201
    output: &Path,
202
    source: Source<'_>,
203
    header_comments: &[String],
204
    threads: usize,
205
    compression_level: u32,
206
) -> Result<()> {
207
    use noodles::bam;
208

209
    let mut header = reader.read_header().context("failed to read BAM header")?;
2✔
210
    for c in header_comments {
4✔
211
        header.add_comment(c.clone());
2✔
212
    }
213

214
    let file = std::fs::File::create(output)
4✔
215
        .with_context(|| format!("failed to create BAM file: {}", output.display()))?;
2✔
216
    let parz = gzp::par::compress::ParCompressBuilder::<gzp::deflate::Bgzf>::new()
6✔
217
        .num_threads(threads.max(1))
2✔
218
        .with_context(|| "invalid --threads value for BAM bgzf writer")?
2✔
219
        .compression_level(gzp::Compression::new(compression_level))
4✔
220
        .from_writer(file);
2✔
221
    let mut writer = bam::io::Writer::from(parz);
4✔
222
    writer
2✔
223
        .write_header(&header)
2✔
224
        .context("failed to write BAM header")?;
225

226
    run_annotate_pipeline(
227
        writer,
2✔
228
        &header,
×
229
        reader.record_bufs(&header),
2✔
230
        source,
×
231
        "BAM",
232
        output,
×
233
        Some(AlignmentFormat::Bam),
×
234
        |w, _| {
2✔
235
            use gzp::ZWriter as _;
×
236
            let mut parz = w.into_inner();
2✔
237
            parz.finish()
4✔
238
                .map_err(|e| anyhow::anyhow!("failed to finish BAM BGZF stream: {e}"))?;
4✔
239
            Ok(())
2✔
240
        },
241
    )
242
}
243

244
fn annotate_cram(
1✔
245
    input: &Path,
246
    output: &Path,
247
    source: Source<'_>,
248
    header_comments: &[String],
249
    cram_reference: Option<&Path>,
250
) -> Result<()> {
251
    let mut reader = crate::open_cram_reader(input, cram_reference)?;
1✔
252
    annotate_cram_from_reader(&mut reader, output, source, header_comments, cram_reference)
2✔
253
}
254

255
fn annotate_cram_from_reader<R: std::io::Read>(
2✔
256
    reader: &mut noodles::cram::io::Reader<R>,
257
    output: &Path,
258
    source: Source<'_>,
259
    header_comments: &[String],
260
    cram_reference: Option<&Path>,
261
) -> Result<()> {
262
    let mut header = reader.read_header().context("failed to read CRAM header")?;
2✔
263
    crate::require_cram_reference_if_mapped(&header, cram_reference)?;
5✔
264
    for c in header_comments {
2✔
265
        header.add_comment(c.clone());
×
266
    }
267

268
    let mut writer = crate::open_cram_writer(output, cram_reference)?;
2✔
269
    writer
2✔
270
        .write_header(&header)
2✔
271
        .context("failed to write CRAM header")?;
272

273
    run_annotate_pipeline(
274
        writer,
2✔
275
        &header,
×
276
        reader.records(&header),
2✔
277
        source,
×
278
        "CRAM",
279
        output,
×
280
        Some(AlignmentFormat::Cram),
×
281
        |mut w, header| w.try_finish(header).context("failed to finish CRAM writer"),
4✔
282
    )
283
}
284

285
fn annotate_sam(
1✔
286
    input: &Path,
287
    output: &Path,
288
    source: Source<'_>,
289
    header_comments: &[String],
290
) -> Result<()> {
291
    use noodles::sam;
292
    let file = std::fs::File::open(input)
3✔
293
        .with_context(|| format!("failed to open SAM file: {}", input.display()))?;
1✔
294
    let mut reader = sam::io::Reader::new(BufReader::new(file));
2✔
295
    annotate_sam_from_reader(&mut reader, output, source, header_comments)
2✔
296
}
297

298
fn annotate_sam_from_reader<R: std::io::BufRead>(
2✔
299
    reader: &mut noodles::sam::io::Reader<R>,
300
    output: &Path,
301
    source: Source<'_>,
302
    header_comments: &[String],
303
) -> Result<()> {
304
    use noodles::sam;
305

306
    let mut header = reader.read_header().context("failed to read SAM header")?;
2✔
307
    for c in header_comments {
4✔
308
        header.add_comment(c.clone());
×
309
    }
310

311
    let out_file = std::fs::File::create(output)
4✔
312
        .with_context(|| format!("failed to create SAM file: {}", output.display()))?;
2✔
313
    let mut writer = sam::io::Writer::new(std::io::BufWriter::new(out_file));
4✔
314
    writer
2✔
315
        .write_header(&header)
2✔
316
        .context("failed to write SAM header")?;
317

318
    run_annotate_pipeline(
319
        writer,
2✔
320
        &header,
×
321
        reader.record_bufs(&header),
2✔
322
        source,
×
323
        "SAM",
324
        output,
×
325
        None,
×
326
        |w, _| w.into_inner().flush().context("failed to flush SAM writer"),
4✔
327
    )
328
}
329

330
/// Dispatch to the appropriate lookup strategy for `source` and run the loop.
331
///
332
/// Returns `(annotated, total, missing)` where `missing` is the number of
333
/// records whose QNAME was `*` (no name) and were written unannotated.
334
fn annotate_records<I, W>(
6✔
335
    iter: I,
336
    writer: &mut W,
337
    header: &noodles::sam::Header,
338
    source: Source<'_>,
339
    fmt: &str,
340
) -> Result<(u64, u64, u64)>
341
where
342
    I: Iterator<Item = std::io::Result<noodles::sam::alignment::record_buf::RecordBuf>>,
343
    W: AlignmentWrite,
344
{
345
    match source {
6✔
346
        Source::Map(map) => {
6✔
347
            annotate_loop(iter, writer, header, fmt, |name| Ok(map.get(name).copied()))
24✔
348
        }
349
        Source::Stream(path) => {
2✔
350
            let kfile = std::fs::File::open(path)
4✔
351
                .with_context(|| format!("failed to open assignments: {}", path.display()))?;
2✔
352
            let mut state = StreamingLookup::new(BufReader::new(kfile));
4✔
353
            annotate_loop(iter, writer, header, fmt, |name| state.lookup(name))
6✔
354
        }
355
    }
356
}
357

358
/// Run the per-record annotation loop, finalize the writer, log the summary,
359
/// and (when `output_format` is `Some`) emit a sidecar index. Owns the writer
360
/// so format-specific finalize semantics (BGZF `finish`, CRAM `try_finish`,
361
/// SAM `flush`) can each consume it.
362
#[allow(clippy::too_many_arguments)]
363
fn run_annotate_pipeline<W, I, F>(
6✔
364
    mut writer: W,
365
    header: &noodles::sam::Header,
366
    records: I,
367
    source: Source<'_>,
368
    fmt: &str,
369
    output: &Path,
370
    output_format: Option<AlignmentFormat>,
371
    finalize: F,
372
) -> Result<()>
373
where
374
    W: AlignmentWrite,
375
    I: Iterator<Item = std::io::Result<noodles::sam::alignment::record_buf::RecordBuf>>,
376
    F: FnOnce(W, &noodles::sam::Header) -> Result<()>,
377
{
378
    let (annotated, total, missing) = annotate_records(records, &mut writer, header, source, fmt)?;
13✔
379
    finalize(writer, header)?;
6✔
380
    info!("Annotated {annotated} / {total} records ({missing} records had no name).");
12✔
381
    if let Some(fmt) = output_format {
10✔
382
        crate::maybe_index_alignment_output(output, header, fmt)?;
12✔
383
    }
384
    Ok(())
6✔
385
}
386

387
/// Per-record annotation loop. `lookup(name)` returns the taxon id for the
388
/// read, `Ok(None)` if the read is absent (treated as a fatal mismatch), or
389
/// `Err` for any unrecoverable lookup failure.
390
fn annotate_loop<I, W, F>(
8✔
391
    iter: I,
392
    writer: &mut W,
393
    header: &noodles::sam::Header,
394
    fmt: &str,
395
    mut lookup: F,
396
) -> Result<(u64, u64, u64)>
397
where
398
    I: Iterator<Item = std::io::Result<noodles::sam::alignment::record_buf::RecordBuf>>,
399
    W: AlignmentWrite,
400
    F: FnMut(&str) -> Result<Option<u32>>,
401
{
402
    use noodles::sam::alignment::record_buf::data::field::Value;
403

404
    let mut annotated = 0u64;
8✔
405
    let mut missing = 0u64;
8✔
406
    let mut total = 0u64;
8✔
407

408
    for result in iter {
24✔
409
        let mut record = result.with_context(|| format!("failed to read {fmt} record"))?;
16✔
410
        total += 1;
8✔
411

412
        if let Some(name_bytes) = record.name() {
25✔
413
            let name = std::str::from_utf8(name_bytes).context("non-UTF-8 read name")?;
16✔
414
            match lookup(name)? {
8✔
415
                Some(taxon_id) => {
8✔
416
                    record
8✔
417
                        .data_mut()
418
                        .insert(crate::TI_TAG.into(), Value::Int32(taxon_id as i32));
16✔
419
                    annotated += 1;
8✔
420
                }
421
                None => {
422
                    anyhow::bail!(
1✔
423
                        "read {name:?} (record {total}) is not present in the assignments file; \
×
424
                         ensure the Kraken assignments file contains every read in the input"
×
425
                    );
426
                }
427
            }
428
        } else {
429
            missing += 1;
2✔
430
        }
431

432
        writer
8✔
433
            .write_alignment_record(header, &record)
8✔
434
            .with_context(|| format!("failed to write {fmt} record"))?;
8✔
435
    }
436

437
    Ok((annotated, total, missing))
8✔
438
}
439

440
#[cfg(test)]
441
mod tests {
442
    use super::*;
443
    use crate::AlignmentFormat;
444
    use noodles::sam::alignment::record_buf::{data::field::Value, RecordBuf};
445

446
    fn make_record(name: &str) -> RecordBuf {
447
        let mut r = RecordBuf::default();
448
        *r.name_mut() = Some(name.as_bytes().into());
449
        r
450
    }
451

452
    #[test]
453
    fn test_ti_tag_inserted() {
454
        let mut record = make_record("read1");
455
        record
456
            .data_mut()
457
            .insert(crate::TI_TAG.into(), Value::Int32(9606));
458
        match record.data().get(&crate::TI_TAG) {
459
            Some(Value::Int32(n)) => assert_eq!(*n, 9606),
460
            _ => panic!("ti tag not found or wrong type"),
461
        }
462
    }
463

464
    #[test]
465
    fn test_ti_tag_overwrite() {
466
        let mut record = make_record("read2");
467
        record
468
            .data_mut()
469
            .insert(crate::TI_TAG.into(), Value::Int32(1));
470
        record
471
            .data_mut()
472
            .insert(crate::TI_TAG.into(), Value::Int32(9606));
473
        match record.data().get(&crate::TI_TAG) {
474
            Some(Value::Int32(n)) => assert_eq!(*n, 9606),
475
            _ => panic!("ti tag not updated"),
476
        }
477
    }
478

479
    /// Verify that after BAM round-trip the `ti` tag is preserved as the SAM
480
    /// `i` (signed 32-bit integer) aux type rather than `I` (unsigned).
481
    #[test]
482
    fn test_ti_tag_is_int32_after_bam_round_trip() {
483
        use noodles::bam;
484
        use noodles::sam;
485
        use noodles::sam::alignment::io::Write as _;
486
        use noodles::sam::alignment::record_buf::{QualityScores, RecordBuf, Sequence};
487
        use std::io::Write as _;
488

489
        let dir = tempfile::TempDir::new().unwrap();
490

491
        let in_bam = dir.path().join("input.bam");
492
        let header = sam::Header::default();
493
        {
494
            let mut w = bam::io::writer::Builder.build_from_path(&in_bam).unwrap();
495
            w.write_header(&header).unwrap();
496
            let mut r = RecordBuf::default();
497
            *r.name_mut() = Some("readZ".as_bytes().into());
498
            *r.sequence_mut() = Sequence::from(b"ACGT".to_vec());
499
            *r.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
500
            w.write_alignment_record(&header, &r).unwrap();
501
            w.into_inner().finish().unwrap();
502
        }
503

504
        let assignments_path = dir.path().join("assignments.txt");
505
        {
506
            let mut f = std::fs::File::create(&assignments_path).unwrap();
507
            writeln!(f, "C\treadZ\t9606\t4\t9606:1").unwrap();
508
        }
509

510
        let out_bam = dir.path().join("output.bam");
511
        super::run_annotate(super::AnnotateArgs {
512
            input: in_bam,
513
            assignments: assignments_path,
514
            output: out_bam.clone(),
515
            kraken_report: None,
516
            kraken_db: None,
517
            unordered: true,
518
            cram_reference: None,
519
            threads: 1,
520
            compression_level: 5,
521
        })
522
        .unwrap();
523

524
        let mut reader = crate::open_bam_reader(&out_bam).unwrap();
525
        let out_header = reader.read_header().unwrap();
526
        let records: Vec<RecordBuf> = reader
527
            .record_bufs(&out_header)
528
            .collect::<std::io::Result<Vec<_>>>()
529
            .unwrap();
530
        assert_eq!(records.len(), 1);
531
        match records[0].data().get(&crate::TI_TAG) {
532
            Some(Value::Int32(n)) => assert_eq!(*n, 9606),
533
            other => panic!("expected Int32 (SAM type 'i'); got: {:?}", other),
534
        }
535
    }
536

537
    /// Write a single-record BAM, then run `annotate` writing to `out_name`
538
    /// with the given `threads` and `compression_level`. Returns the output
539
    /// file size in bytes and the round-tripped `ti` tag value.
540
    fn run_annotate_bam(
541
        tmpdir: &std::path::Path,
542
        out_name: &str,
543
        threads: usize,
544
        compression_level: u32,
545
    ) -> (u64, i32) {
546
        use noodles::bam;
547
        use noodles::sam;
548
        use noodles::sam::alignment::io::Write as _;
549
        use noodles::sam::alignment::record_buf::{QualityScores, RecordBuf, Sequence};
550
        use std::io::Write as _;
551

552
        let in_bam = tmpdir.join("in.bam");
553
        let header = sam::Header::default();
554
        {
555
            let mut w = bam::io::writer::Builder.build_from_path(&in_bam).unwrap();
556
            w.write_header(&header).unwrap();
557
            // A few records with varied sequences give the compressor enough
558
            // material that level 1 vs 9 differ measurably while staying tiny.
559
            for i in 0..32 {
560
                let name = format!("read{i}");
561
                let mut r = RecordBuf::default();
562
                *r.name_mut() = Some(name.as_bytes().into());
563
                let bases: Vec<u8> = (0..96)
564
                    .map(|j| match (i + j) % 4 {
565
                        0 => b'A',
566
                        1 => b'C',
567
                        2 => b'G',
568
                        _ => b'T',
569
                    })
570
                    .collect();
571
                *r.sequence_mut() = Sequence::from(bases.clone());
572
                *r.quality_scores_mut() = QualityScores::from(vec![30u8; bases.len()]);
573
                w.write_alignment_record(&header, &r).unwrap();
574
            }
575
            w.into_inner().finish().unwrap();
576
        }
577

578
        let assignments = tmpdir.join("assignments.txt");
579
        {
580
            let mut f = std::fs::File::create(&assignments).unwrap();
581
            for i in 0..32 {
582
                writeln!(f, "C\tread{i}\t9606\t96\t9606:1").unwrap();
583
            }
584
        }
585

586
        let out_bam = tmpdir.join(out_name);
587
        super::run_annotate(super::AnnotateArgs {
588
            input: in_bam,
589
            assignments,
590
            output: out_bam.clone(),
591
            kraken_report: None,
592
            kraken_db: None,
593
            unordered: true,
594
            cram_reference: None,
595
            threads,
596
            compression_level,
597
        })
598
        .unwrap();
599

600
        let size = std::fs::metadata(&out_bam).unwrap().len();
601
        let mut reader = crate::open_bam_reader(&out_bam).unwrap();
602
        let h = reader.read_header().unwrap();
603
        let first: RecordBuf = reader.record_bufs(&h).next().unwrap().unwrap();
604
        let ti = match first.data().get(&crate::TI_TAG) {
605
            Some(Value::Int32(n)) => *n,
606
            other => panic!("expected Int32 ti tag; got {other:?}"),
607
        };
608
        (size, ti)
609
    }
610

611
    #[test]
612
    fn test_annotate_bam_higher_compression_level_yields_smaller_file() {
613
        let dir = tempfile::TempDir::new().unwrap();
614
        let (size_low, ti_low) = run_annotate_bam(dir.path(), "low.bam", 1, 1);
615
        let (size_high, ti_high) = run_annotate_bam(dir.path(), "high.bam", 1, 9);
616
        assert_eq!(ti_low, 9606);
617
        assert_eq!(ti_high, 9606);
618
        assert!(
619
            size_high < size_low,
620
            "expected level 9 ({size_high} bytes) < level 1 ({size_low} bytes)"
621
        );
622
    }
623

624
    #[test]
625
    fn test_annotate_bam_threads_one_and_many_round_trip_identically() {
626
        let dir = tempfile::TempDir::new().unwrap();
627
        let (_, ti_serial) = run_annotate_bam(dir.path(), "t1.bam", 1, 5);
628
        let (_, ti_parallel) = run_annotate_bam(dir.path(), "t4.bam", 4, 5);
629
        assert_eq!(ti_serial, 9606);
630
        assert_eq!(ti_parallel, 9606);
631
    }
632

633
    #[test]
634
    fn test_annotate_cram_unordered() {
635
        use noodles::sam;
636
        use noodles::sam::alignment::io::Write as _;
637
        use noodles::sam::alignment::record_buf::{
638
            data::field::Value, QualityScores, RecordBuf, Sequence,
639
        };
640
        use std::io::Write as _;
641

642
        let dir = tempfile::TempDir::new().unwrap();
643

644
        // Write input CRAM with two named records; sequences must be non-empty
645
        // to avoid empty external data blocks in the CRAM writer.
646
        let in_cram = dir.path().join("input.cram");
647
        let header = sam::Header::default();
648
        {
649
            let mut w = crate::open_cram_writer(&in_cram, None).unwrap();
650
            w.write_header(&header).unwrap();
651
            let mut r1 = RecordBuf::default();
652
            *r1.name_mut() = Some("read1".as_bytes().into());
653
            *r1.sequence_mut() = Sequence::from(b"ACGT".to_vec());
654
            *r1.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
655
            w.write_alignment_record(&header, &r1).unwrap();
656
            let mut r2 = RecordBuf::default();
657
            *r2.name_mut() = Some("read2".as_bytes().into());
658
            *r2.sequence_mut() = Sequence::from(b"ACGT".to_vec());
659
            *r2.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
660
            w.write_alignment_record(&header, &r2).unwrap();
661
            w.try_finish(&header).unwrap();
662
        }
663

664
        // Kraken assignments: read1 -> 9606, read2 -> 1234
665
        let assignments_path = dir.path().join("assignments.txt");
666
        {
667
            let mut f = std::fs::File::create(&assignments_path).unwrap();
668
            writeln!(f, "C\tread1\t9606\t4\t9606:1").unwrap();
669
            writeln!(f, "C\tread2\t1234\t4\t1234:1").unwrap();
670
        }
671

672
        let out_cram = dir.path().join("output.cram");
673

674
        super::run_annotate(super::AnnotateArgs {
675
            input: in_cram,
676
            assignments: assignments_path,
677
            output: out_cram.clone(),
678
            kraken_report: None,
679
            kraken_db: None,
680
            unordered: true,
681
            cram_reference: None,
682
            threads: 1,
683
            compression_level: 5,
684
        })
685
        .unwrap();
686

687
        // Verify ti tags in output CRAM
688
        let mut reader = crate::open_cram_reader(&out_cram, None).unwrap();
689
        let out_header = reader.read_header().unwrap();
690
        let records: Vec<RecordBuf> = reader
691
            .records(&out_header)
692
            .collect::<Result<Vec<_>, _>>()
693
            .unwrap();
694
        assert_eq!(records.len(), 2);
695
        match records[0].data().get(&crate::TI_TAG) {
696
            Some(Value::Int32(n)) => assert_eq!(*n, 9606),
697
            other => panic!("unexpected ti tag value: {:?}", other),
698
        }
699
        match records[1].data().get(&crate::TI_TAG) {
700
            Some(Value::Int32(n)) => assert_eq!(*n, 1234),
701
            other => panic!("unexpected ti tag value: {:?}", other),
702
        }
703
    }
704

705
    #[test]
706
    fn test_annotate_cram_streaming() {
707
        use noodles::sam;
708
        use noodles::sam::alignment::io::Write as _;
709
        use noodles::sam::alignment::record_buf::{
710
            data::field::Value, QualityScores, RecordBuf, Sequence,
711
        };
712
        use std::io::Write as _;
713

714
        let dir = tempfile::TempDir::new().unwrap();
715

716
        // Non-empty sequence avoids empty external data blocks in the CRAM writer.
717
        let in_cram = dir.path().join("input.cram");
718
        let header = sam::Header::default();
719
        {
720
            let mut w = crate::open_cram_writer(&in_cram, None).unwrap();
721
            w.write_header(&header).unwrap();
722
            let mut r = RecordBuf::default();
723
            *r.name_mut() = Some("readA".as_bytes().into());
724
            *r.sequence_mut() = Sequence::from(b"ACGT".to_vec());
725
            *r.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
726
            w.write_alignment_record(&header, &r).unwrap();
727
            w.try_finish(&header).unwrap();
728
        }
729

730
        let assignments_path = dir.path().join("assignments.txt");
731
        {
732
            let mut f = std::fs::File::create(&assignments_path).unwrap();
733
            writeln!(f, "C\treadA\t9606\t4\t9606:1").unwrap();
734
        }
735

736
        let out_cram = dir.path().join("output.cram");
737

738
        // unordered: false -> streaming mode
739
        super::run_annotate(super::AnnotateArgs {
740
            input: in_cram,
741
            assignments: assignments_path,
742
            output: out_cram.clone(),
743
            kraken_report: None,
744
            kraken_db: None,
745
            unordered: false,
746
            cram_reference: None,
747
            threads: 1,
748
            compression_level: 5,
749
        })
750
        .unwrap();
751

752
        let mut reader = crate::open_cram_reader(&out_cram, None).unwrap();
753
        let out_header = reader.read_header().unwrap();
754
        let records: Vec<RecordBuf> = reader
755
            .records(&out_header)
756
            .collect::<Result<Vec<_>, _>>()
757
            .unwrap();
758
        assert_eq!(records.len(), 1);
759
        match records[0].data().get(&crate::TI_TAG) {
760
            Some(Value::Int32(n)) => assert_eq!(*n, 9606),
761
            other => panic!("unexpected ti tag value: {:?}", other),
762
        }
763
    }
764

765
    #[test]
766
    fn test_run_annotate_unnamed_record_is_skipped() {
767
        // Records with `*` QNAME (no name) are passed through with no `ti`
768
        // tag and counted in the missing log message (line ~364).
769
        use noodles::bam;
770
        use noodles::sam;
771
        use noodles::sam::alignment::io::Write as _;
772
        use noodles::sam::alignment::record_buf::data::field::Value;
773
        use noodles::sam::alignment::record_buf::{QualityScores, Sequence};
774

775
        let dir = tempfile::TempDir::new().unwrap();
776
        let in_bam = dir.path().join("in.bam");
777
        {
778
            let mut w = bam::io::writer::Builder.build_from_path(&in_bam).unwrap();
779
            let header = sam::Header::default();
780
            w.write_header(&header).unwrap();
781
            // Named record + unnamed (None) record; neither in assignments.
782
            // Use unordered=true so the named record errors only on missing,
783
            // not on a streaming gap. Actually pre-load the named record.
784
            let mut named = RecordBuf::default();
785
            *named.name_mut() = Some(b"named".as_ref().into());
786
            *named.sequence_mut() = Sequence::from(b"ACGT".to_vec());
787
            *named.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
788
            w.write_alignment_record(&header, &named).unwrap();
789
            // No name set → record.name() returns None.
790
            let mut unnamed = RecordBuf::default();
791
            *unnamed.sequence_mut() = Sequence::from(b"TTTT".to_vec());
792
            *unnamed.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
793
            w.write_alignment_record(&header, &unnamed).unwrap();
794
        }
795
        let assignments = dir.path().join("assignments.txt");
796
        std::fs::write(&assignments, b"C\tnamed\t9606\t4\t9606:1\n").unwrap();
797

798
        let out = dir.path().join("out.bam");
799
        super::run_annotate(super::AnnotateArgs {
800
            input: in_bam,
801
            assignments,
802
            output: out.clone(),
803
            kraken_report: None,
804
            kraken_db: None,
805
            unordered: true,
806
            cram_reference: None,
807
            threads: 1,
808
            compression_level: 5,
809
        })
810
        .unwrap();
811

812
        let mut r = bam::io::reader::Builder.build_from_path(&out).unwrap();
813
        let h = r.read_header().unwrap();
814
        let recs: Vec<RecordBuf> = r
815
            .record_bufs(&h)
816
            .collect::<std::io::Result<Vec<_>>>()
817
            .unwrap();
818
        assert_eq!(recs.len(), 2);
819
        // Named record gets ti tag.
820
        assert!(matches!(
821
            recs[0].data().get(&crate::TI_TAG),
822
            Some(Value::Int32(9606))
823
        ));
824
        // Unnamed record passes through with no ti tag.
825
        assert!(recs[1].data().get(&crate::TI_TAG).is_none());
826
    }
827

828
    #[test]
829
    fn test_run_annotate_bam_unambiguous_path() {
830
        // Direct .bam-extension dispatch through run_annotate -> annotate_bam.
831
        use noodles::bam;
832
        use noodles::sam;
833
        use noodles::sam::alignment::io::Write as _;
834
        use noodles::sam::alignment::record_buf::data::field::Value;
835
        use noodles::sam::alignment::record_buf::{QualityScores, Sequence};
836

837
        let dir = tempfile::TempDir::new().unwrap();
838
        let in_bam = dir.path().join("input.bam");
839
        let header = sam::Header::default();
840
        {
841
            let mut w = bam::io::writer::Builder.build_from_path(&in_bam).unwrap();
842
            w.write_header(&header).unwrap();
843
            let mut r = RecordBuf::default();
844
            *r.name_mut() = Some(b"readB".as_ref().into());
845
            *r.sequence_mut() = Sequence::from(b"ACGT".to_vec());
846
            *r.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
847
            w.write_alignment_record(&header, &r).unwrap();
848
        }
849
        let assignments = dir.path().join("assignments.txt");
850
        std::fs::write(&assignments, b"C\treadB\t9606\t4\t9606:1\n").unwrap();
851

852
        let out_bam = dir.path().join("out.bam");
853
        super::run_annotate(super::AnnotateArgs {
854
            input: in_bam,
855
            assignments,
856
            output: out_bam.clone(),
857
            kraken_report: None,
858
            kraken_db: None,
859
            unordered: false,
860
            cram_reference: None,
861
            threads: 1,
862
            compression_level: 5,
863
        })
864
        .unwrap();
865

866
        let mut r = bam::io::reader::Builder.build_from_path(&out_bam).unwrap();
867
        let h = r.read_header().unwrap();
868
        let recs: Vec<RecordBuf> = r
869
            .record_bufs(&h)
870
            .collect::<std::io::Result<Vec<_>>>()
871
            .unwrap();
872
        assert_eq!(recs.len(), 1);
873
        match recs[0].data().get(&crate::TI_TAG) {
874
            Some(Value::Int32(n)) => assert_eq!(*n, 9606),
875
            other => panic!("expected ti:i:9606, got {other:?}"),
876
        }
877
    }
878

879
    #[test]
880
    fn test_run_annotate_with_kraken_report_embeds_header() {
881
        // When --kraken-report is supplied, run_annotate must serialize the
882
        // taxonomy and add @CO krak:report:* line(s) to the output header.
883
        use noodles::bam;
884
        use noodles::sam;
885
        use noodles::sam::alignment::io::Write as _;
886
        use noodles::sam::alignment::record_buf::{QualityScores, Sequence};
887

888
        let dir = tempfile::TempDir::new().unwrap();
889
        let in_bam = dir.path().join("input.bam");
890
        let header = sam::Header::default();
891
        {
892
            let mut w = bam::io::writer::Builder.build_from_path(&in_bam).unwrap();
893
            w.write_header(&header).unwrap();
894
            let mut r = RecordBuf::default();
895
            *r.name_mut() = Some(b"readR".as_ref().into());
896
            *r.sequence_mut() = Sequence::from(b"ACGT".to_vec());
897
            *r.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
898
            w.write_alignment_record(&header, &r).unwrap();
899
        }
900
        let assignments = dir.path().join("assignments.txt");
901
        std::fs::write(&assignments, b"C\treadR\t9606\t4\t9606:1\n").unwrap();
902
        let report = dir.path().join("report.k2report");
903
        std::fs::write(
904
            &report,
905
            b"100.00\t1\t1\tR\t1\troot\n100.00\t1\t1\tS\t9606\t  Homo sapiens\n",
906
        )
907
        .unwrap();
908

909
        let out_bam = dir.path().join("out.bam");
910
        super::run_annotate(super::AnnotateArgs {
911
            input: in_bam,
912
            assignments,
913
            output: out_bam.clone(),
914
            kraken_report: Some(report),
915
            kraken_db: None,
916
            unordered: false,
917
            cram_reference: None,
918
            threads: 1,
919
            compression_level: 5,
920
        })
921
        .unwrap();
922

923
        let mut r = bam::io::reader::Builder.build_from_path(&out_bam).unwrap();
924
        let h = r.read_header().unwrap();
925
        let has_embed = h.comments().iter().any(|c| {
926
            std::str::from_utf8(c)
927
                .map(|s| s.starts_with("krak:report:"))
928
                .unwrap_or(false)
929
        });
930
        assert!(
931
            has_embed,
932
            "expected at least one krak:report: header comment"
933
        );
934
    }
935

936
    /// B9: when path-based detection says BAM only because the input path is a
937
    /// pseudo-path (here a symlink under `/dev/fd/` is awkward to construct in
938
    /// a unit test, so we directly verify the `from_path` heuristic plus the
939
    /// sniff fallback by symlinking via /dev/fd is platform-specific).
940
    /// Instead, simulate the flow by routing a SAM file through the SAM handler
941
    /// when input has no extension yet sniffs as SAM.
942
    #[test]
943
    fn test_run_annotate_sniff_fallback_extensionless_sam() {
944
        use std::io::Write as _;
945

946
        let dir = tempfile::TempDir::new().unwrap();
947

948
        // Write a SAM stream to a file with no recognized extension.
949
        // AlignmentFormat::from_path falls back to Sam for unknown
950
        // extensions, NOT Bam; pseudo-paths are the only ones forcing Bam.
951
        // To exercise the B9 code path proper, use a path that mimics a
952
        // pseudo-path tail so detection picks Bam, then sniff overrides to Sam.
953
        let sam_path = dir.path().join("stream.sam");
954
        {
955
            let mut f = std::fs::File::create(&sam_path).unwrap();
956
            writeln!(f, "@HD\tVN:1.6").unwrap();
957
            writeln!(f, "readP\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tIIII").unwrap();
958
        }
959
        let assignments_path = dir.path().join("assignments.txt");
960
        {
961
            let mut f = std::fs::File::create(&assignments_path).unwrap();
962
            writeln!(f, "U\treadP\t0\t4\t0:4").unwrap();
963
        }
964
        let out_path = dir.path().join("out.sam");
965

966
        super::run_annotate(super::AnnotateArgs {
967
            input: sam_path,
968
            assignments: assignments_path,
969
            output: out_path.clone(),
970
            kraken_report: None,
971
            kraken_db: None,
972
            unordered: true,
973
            cram_reference: None,
974
            threads: 1,
975
            compression_level: 5,
976
        })
977
        .unwrap();
978

979
        // Output should be a valid SAM with the ti tag.
980
        let body = std::fs::read_to_string(&out_path).unwrap();
981
        assert!(
982
            body.contains("ti:i:0"),
983
            "expected ti aux tag in SAM output, got:\n{body}"
984
        );
985
    }
986

987
    /// /dev/fd/N pointing at a real BAM exercises the sniff-fallback Bam arm
988
    /// in run_annotate (lines ~131-136). The path-based detection defaults to
989
    /// Bam for pseudo-paths, the sniffer confirms it; the buffered reader is
990
    /// threaded into annotate_bam_from_reader.
991
    #[cfg(unix)]
992
    #[test]
993
    fn test_run_annotate_sniff_fallback_dev_fd_routes_bam() {
994
        use noodles::bam;
995
        use noodles::sam;
996
        use noodles::sam::alignment::io::Write as _;
997
        use noodles::sam::alignment::record_buf::data::field::Value;
998
        use noodles::sam::alignment::record_buf::{QualityScores, Sequence};
999
        use std::io::Write as _;
1000
        use std::os::fd::AsRawFd;
1001

1002
        let dir = tempfile::TempDir::new().unwrap();
1003
        let bam_path = dir.path().join("in.bam");
1004
        {
1005
            let mut w = bam::io::writer::Builder.build_from_path(&bam_path).unwrap();
1006
            let header = sam::Header::default();
1007
            w.write_header(&header).unwrap();
1008
            let mut r = RecordBuf::default();
1009
            *r.name_mut() = Some(b"readD".as_ref().into());
1010
            *r.sequence_mut() = Sequence::from(b"ACGT".to_vec());
1011
            *r.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
1012
            w.write_alignment_record(&header, &r).unwrap();
1013
        }
1014
        let assignments = dir.path().join("assignments.txt");
1015
        {
1016
            let mut f = std::fs::File::create(&assignments).unwrap();
1017
            writeln!(f, "C\treadD\t9606\t4\t9606:4").unwrap();
1018
        }
1019

1020
        let f = std::fs::File::open(&bam_path).unwrap();
1021
        let fd = f.as_raw_fd();
1022
        let pseudo = std::path::PathBuf::from(format!("/dev/fd/{fd}"));
1023

1024
        let out = dir.path().join("out.bam");
1025
        super::run_annotate(super::AnnotateArgs {
1026
            input: pseudo,
1027
            assignments,
1028
            output: out.clone(),
1029
            kraken_report: None,
1030
            kraken_db: None,
1031
            unordered: true,
1032
            cram_reference: None,
1033
            threads: 1,
1034
            compression_level: 5,
1035
        })
1036
        .unwrap();
1037

1038
        let mut r = bam::io::reader::Builder.build_from_path(&out).unwrap();
1039
        let h = r.read_header().unwrap();
1040
        let recs: Vec<RecordBuf> = r
1041
            .record_bufs(&h)
1042
            .collect::<std::io::Result<Vec<_>>>()
1043
            .unwrap();
1044
        assert_eq!(recs.len(), 1);
1045
        match recs[0].data().get(&crate::TI_TAG) {
1046
            Some(Value::Int32(n)) => assert_eq!(*n, 9606),
1047
            other => panic!("expected ti:i:9606, got {other:?}"),
1048
        }
1049
    }
1050

1051
    /// /dev/fd/N pointing at a real CRAM exercises the sniff-fallback Cram arm
1052
    /// in run_annotate (lines ~115-129).
1053
    #[cfg(unix)]
1054
    #[test]
1055
    fn test_run_annotate_sniff_fallback_dev_fd_routes_cram() {
1056
        use noodles::sam;
1057
        use noodles::sam::alignment::io::Write as _;
1058
        use noodles::sam::alignment::record_buf::data::field::Value;
1059
        use noodles::sam::alignment::record_buf::{QualityScores, Sequence};
1060
        use std::io::Write as _;
1061
        use std::os::fd::AsRawFd;
1062

1063
        let dir = tempfile::TempDir::new().unwrap();
1064
        let cram_path = dir.path().join("in.cram");
1065
        {
1066
            let mut w = crate::open_cram_writer(&cram_path, None).unwrap();
1067
            let header = sam::Header::default();
1068
            w.write_header(&header).unwrap();
1069
            let mut r = RecordBuf::default();
1070
            *r.name_mut() = Some(b"readC".as_ref().into());
1071
            *r.sequence_mut() = Sequence::from(b"ACGT".to_vec());
1072
            *r.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
1073
            w.write_alignment_record(&header, &r).unwrap();
1074
            w.try_finish(&header).unwrap();
1075
        }
1076
        let assignments = dir.path().join("assignments.txt");
1077
        {
1078
            let mut f = std::fs::File::create(&assignments).unwrap();
1079
            writeln!(f, "C\treadC\t9606\t4\t9606:4").unwrap();
1080
        }
1081

1082
        let f = std::fs::File::open(&cram_path).unwrap();
1083
        let fd = f.as_raw_fd();
1084
        let pseudo = std::path::PathBuf::from(format!("/dev/fd/{fd}"));
1085

1086
        let out = dir.path().join("out.cram");
1087
        super::run_annotate(super::AnnotateArgs {
1088
            input: pseudo,
1089
            assignments,
1090
            output: out.clone(),
1091
            kraken_report: None,
1092
            kraken_db: None,
1093
            unordered: true,
1094
            cram_reference: None,
1095
            threads: 1,
1096
            compression_level: 5,
1097
        })
1098
        .unwrap();
1099

1100
        let mut reader = crate::open_cram_reader(&out, None).unwrap();
1101
        let h = reader.read_header().unwrap();
1102
        let recs: Vec<RecordBuf> = reader.records(&h).collect::<Result<Vec<_>, _>>().unwrap();
1103
        assert_eq!(recs.len(), 1);
1104
        match recs[0].data().get(&crate::TI_TAG) {
1105
            Some(Value::Int32(n)) => assert_eq!(*n, 9606),
1106
            other => panic!("expected ti:i:9606, got {other:?}"),
1107
        }
1108
    }
1109

1110
    /// Regression: a mapped CRAM (with `@SQ` in its header) without
1111
    /// `--cram-reference` used to panic inside noodles' decoder ("invalid
1112
    /// slice reference sequence name") when annotate began iterating
1113
    /// records. The fix bails with a clear error after reading the header.
1114
    #[test]
1115
    fn test_run_annotate_mapped_cram_without_reference_errors_cleanly() {
1116
        use noodles::sam;
1117
        use noodles::sam::header::record::value::{map::ReferenceSequence, Map};
1118
        use std::num::NonZeroUsize;
1119

1120
        let dir = tempfile::TempDir::new().unwrap();
1121
        let fa_path = dir.path().join("ref.fa");
1122
        let fai_path = dir.path().join("ref.fa.fai");
1123
        std::fs::write(&fa_path, b">chr1\nACGT\n").unwrap();
1124
        std::fs::write(&fai_path, b"chr1\t4\t6\t4\t5\n").unwrap();
1125

1126
        let in_cram = dir.path().join("in.cram");
1127
        let mut header = sam::Header::default();
1128
        let len = NonZeroUsize::new(4).unwrap();
1129
        header
1130
            .reference_sequences_mut()
1131
            .insert(b"chr1".as_ref().into(), Map::<ReferenceSequence>::new(len));
1132
        {
1133
            let mut w = crate::open_cram_writer(&in_cram, Some(&fa_path)).unwrap();
1134
            w.write_header(&header).unwrap();
1135
            w.try_finish(&header).unwrap();
1136
        }
1137

1138
        let assignments = dir.path().join("assignments.txt");
1139
        std::fs::write(&assignments, b"").unwrap();
1140

1141
        let out_cram = dir.path().join("out.cram");
1142
        let err = super::run_annotate(super::AnnotateArgs {
1143
            input: in_cram,
1144
            assignments,
1145
            output: out_cram,
1146
            kraken_report: None,
1147
            kraken_db: None,
1148
            unordered: true,
1149
            cram_reference: None,
1150
            threads: 1,
1151
            compression_level: 5,
1152
        })
1153
        .unwrap_err();
1154
        let msg = format!("{err:#}");
1155
        assert!(msg.contains("--cram-reference"), "got: {msg}");
1156
        assert!(msg.contains("reference sequences"), "got: {msg}");
1157
    }
1158

1159
    #[test]
1160
    fn test_run_annotate_missing_read_in_assignments_errors() {
1161
        // A record present in the BAM but absent from the unordered map source
1162
        // must trigger the "not present in the assignments file" error.
1163
        use noodles::bam;
1164
        use noodles::sam;
1165
        use noodles::sam::alignment::io::Write as _;
1166
        use noodles::sam::alignment::record_buf::{QualityScores, Sequence};
1167

1168
        let dir = tempfile::TempDir::new().unwrap();
1169
        let in_bam = dir.path().join("in.bam");
1170
        {
1171
            let mut w = bam::io::writer::Builder.build_from_path(&in_bam).unwrap();
1172
            let header = sam::Header::default();
1173
            w.write_header(&header).unwrap();
1174
            let mut r = RecordBuf::default();
1175
            *r.name_mut() = Some(b"readMissing".as_ref().into());
1176
            *r.sequence_mut() = Sequence::from(b"ACGT".to_vec());
1177
            *r.quality_scores_mut() = QualityScores::from(vec![30u8; 4]);
1178
            w.write_alignment_record(&header, &r).unwrap();
1179
        }
1180
        let assignments = dir.path().join("assignments.txt");
1181
        std::fs::write(&assignments, b"C\tdifferent\t9606\t4\t9606:4\n").unwrap();
1182

1183
        let out = dir.path().join("out.bam");
1184
        let err = super::run_annotate(super::AnnotateArgs {
1185
            input: in_bam,
1186
            assignments,
1187
            output: out,
1188
            kraken_report: None,
1189
            kraken_db: None,
1190
            unordered: true,
1191
            cram_reference: None,
1192
            threads: 1,
1193
            compression_level: 5,
1194
        })
1195
        .unwrap_err();
1196
        assert!(format!("{err:#}").contains("not present in the assignments"));
1197
    }
1198

1199
    /// B9 (pseudo-path branch): construct a `/dev/fd/N` reference to a real
1200
    /// SAM file and verify that `run_annotate` routes through the SAM handler
1201
    /// rather than failing in the BAM reader. Requires Unix `/dev/fd/`.
1202
    #[cfg(unix)]
1203
    #[test]
1204
    fn test_run_annotate_sniff_fallback_dev_fd_routes_sam() {
1205
        use std::io::Write as _;
1206
        use std::os::fd::AsRawFd;
1207

1208
        let dir = tempfile::TempDir::new().unwrap();
1209
        let sam_path = dir.path().join("stream.sam");
1210
        {
1211
            let mut f = std::fs::File::create(&sam_path).unwrap();
1212
            writeln!(f, "@HD\tVN:1.6").unwrap();
1213
            writeln!(f, "readQ\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tIIII").unwrap();
1214
        }
1215
        let assignments_path = dir.path().join("assignments.txt");
1216
        {
1217
            let mut f = std::fs::File::create(&assignments_path).unwrap();
1218
            writeln!(f, "U\treadQ\t0\t4\t0:4").unwrap();
1219
        }
1220
        let out_path = dir.path().join("out.sam");
1221

1222
        // Open the SAM file and pass /dev/fd/N as the input path. Path-based
1223
        // detection returns Bam for /dev/fd/* but sniff_input should report
1224
        // Sam, triggering the fallback.
1225
        let f = std::fs::File::open(&sam_path).unwrap();
1226
        let fd = f.as_raw_fd();
1227
        let pseudo = std::path::PathBuf::from(format!("/dev/fd/{fd}"));
1228

1229
        super::run_annotate(super::AnnotateArgs {
1230
            input: pseudo,
1231
            assignments: assignments_path,
1232
            output: out_path.clone(),
1233
            kraken_report: None,
1234
            kraken_db: None,
1235
            unordered: true,
1236
            cram_reference: None,
1237
            threads: 1,
1238
            compression_level: 5,
1239
        })
1240
        .unwrap();
1241

1242
        let body = std::fs::read_to_string(&out_path).unwrap();
1243
        assert!(body.contains("ti:i:0"), "expected ti aux tag, got:\n{body}");
1244
    }
1245

1246
    #[test]
1247
    fn test_format_detection() {
1248
        assert_eq!(
1249
            AlignmentFormat::from_path(std::path::Path::new("foo.bam")),
1250
            AlignmentFormat::Bam
1251
        );
1252
        assert_eq!(
1253
            AlignmentFormat::from_path(std::path::Path::new("foo.BAM")),
1254
            AlignmentFormat::Bam
1255
        );
1256
        assert_eq!(
1257
            AlignmentFormat::from_path(std::path::Path::new("foo.cram")),
1258
            AlignmentFormat::Cram
1259
        );
1260
        assert_eq!(
1261
            AlignmentFormat::from_path(std::path::Path::new("foo.CRAM")),
1262
            AlignmentFormat::Cram
1263
        );
1264
        assert_eq!(
1265
            AlignmentFormat::from_path(std::path::Path::new("foo.sam")),
1266
            AlignmentFormat::Sam
1267
        );
1268
        assert_eq!(
1269
            AlignmentFormat::from_path(std::path::Path::new("foo.txt")),
1270
            AlignmentFormat::Sam
1271
        );
1272
    }
1273
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc