• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rust-bio / rust-htslib / 19161687300

07 Nov 2025 07:43AM UTC coverage: 81.912% (-0.02%) from 81.935%
19161687300

Pull #488

github

web-flow
Merge 907dabbe9 into 8f1cdd75c
Pull Request #488: fix: Reason about Send/Sync-ness of types and change Rcs to Arcs

34 of 36 new or added lines in 7 files covered. (94.44%)

20 existing lines in 6 files now uncovered.

2785 of 3400 relevant lines covered (81.91%)

27209.26 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.97
/src/bam/record.rs
1
// Copyright 2014 Christopher Schröder, Johannes Köster.
2
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3
// This file may not be copied, modified, or distributed
4
// except according to those terms.
5

6
use std::convert::TryFrom;
7
use std::convert::TryInto;
8
use std::ffi;
9
use std::fmt;
10
use std::marker::PhantomData;
11
use std::mem::{size_of, MaybeUninit};
12
use std::ops;
13
use std::os::raw::c_char;
14
use std::slice;
15
use std::str;
16
use std::sync::Arc;
17

18
use byteorder::{LittleEndian, ReadBytesExt};
19

20
use crate::bam::Error;
21
use crate::bam::HeaderView;
22
use crate::errors::Result;
23
use crate::htslib;
24
use crate::utils;
25
#[cfg(feature = "serde_feature")]
26
use serde::{self, Deserialize, Serialize};
27

28
use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
29
use bio_types::genome;
30
use bio_types::sequence::SequenceRead;
31
use bio_types::sequence::SequenceReadPairOrientation;
32
use bio_types::strand::ReqStrand;
33

34
/// A macro creating methods for flag access.
35
macro_rules! flag {
36
    ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
37
        pub fn $get(&self) -> bool {
104✔
38
            self.inner().core.flag & $bit != 0
115✔
39
        }
40

41
        pub fn $set(&mut self) {
51,057✔
42
            self.inner_mut().core.flag |= $bit;
102,100✔
43
        }
44

45
        pub fn $unset(&mut self) {
6✔
46
            self.inner_mut().core.flag &= !$bit;
7✔
47
        }
48
    };
49
}
50

51
/// A BAM record.
52
pub struct Record {
53
    pub inner: htslib::bam1_t,
54
    own: bool,
55
    cigar: Option<CigarStringView>,
56
    header: Option<Arc<HeaderView>>,
57
}
58

59
unsafe impl Send for Record {}
60
unsafe impl Sync for Record {}
61

62
impl Clone for Record {
63
    fn clone(&self) -> Self {
9✔
64
        let mut copy = Record::new();
17✔
65
        unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
42✔
66
        copy
9✔
67
    }
68
}
69

70
impl PartialEq for Record {
71
    fn eq(&self, other: &Record) -> bool {
20✔
72
        self.tid() == other.tid()
58✔
73
            && self.pos() == other.pos()
58✔
74
            && self.bin() == other.bin()
58✔
75
            && self.mapq() == other.mapq()
58✔
76
            && self.flags() == other.flags()
58✔
77
            && self.mtid() == other.mtid()
58✔
78
            && self.mpos() == other.mpos()
58✔
79
            && self.insert_size() == other.insert_size()
58✔
80
            && self.data() == other.data()
39✔
81
            && self.inner().core.l_extranul == other.inner().core.l_extranul
39✔
82
    }
83
}
84

85
impl Eq for Record {}
86

87
impl fmt::Debug for Record {
88
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
×
89
        fmt.write_fmt(format_args!(
×
90
            "Record(tid: {}, pos: {})",
91
            self.tid(),
×
92
            self.pos()
×
93
        ))
94
    }
95
}
96

97
impl Default for Record {
98
    fn default() -> Self {
×
99
        Self::new()
×
100
    }
101
}
102

103
#[inline]
104
fn extranul_from_qname(qname: &[u8]) -> usize {
71,108✔
105
    let qlen = qname.len() + 1;
142,207✔
106
    if !qlen.is_multiple_of(4) {
71,118✔
107
        4 - qlen % 4
64,440✔
108
    } else {
109
        0
6,678✔
110
    }
111
}
112

113
impl Record {
114
    /// Create an empty BAM record.
115
    pub fn new() -> Self {
51,050✔
116
        let mut record = Record {
117
            inner: unsafe { MaybeUninit::zeroed().assume_init() },
153,132✔
118
            own: true,
119
            cigar: None,
120
            header: None,
121
        };
122
        // The read/query name needs to be set as empty to properly initialize
123
        // the record
124
        record.set_qname(b"");
153,132✔
125
        // Developer note: these are needed so the returned record is properly
126
        // initialized as unmapped.
127
        record.set_unmapped();
102,091✔
128
        record.set_tid(-1);
102,091✔
129
        record.set_pos(-1);
102,091✔
130
        record.set_mpos(-1);
102,091✔
131
        record.set_mtid(-1);
102,091✔
132
        record
51,050✔
133
    }
134

135
    pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
914✔
136
        Record {
137
            inner: {
912✔
138
                #[allow(clippy::uninit_assumed_init, invalid_value)]
139
                let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
140
                unsafe {
141
                    ::libc::memcpy(
142
                        &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
143
                        from as *const ::libc::c_void,
144
                        size_of::<htslib::bam1_t>(),
145
                    );
146
                }
147
                inner
148
            },
149
            own: false,
150
            cigar: None,
151
            header: None,
152
        }
153
    }
154

155
    // Create a BAM record from a line SAM text. SAM slice need not be 0-terminated.
156
    pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
15✔
157
        let mut record = Self::new();
27✔
158

159
        let mut sam_copy = Vec::with_capacity(sam.len() + 1);
39✔
160
        sam_copy.extend(sam);
39✔
161
        sam_copy.push(0);
27✔
162

163
        let mut sam_string = htslib::kstring_t {
164
            s: sam_copy.as_ptr() as *mut c_char,
27✔
165
            l: sam_copy.len(),
39✔
166
            m: sam_copy.len(),
15✔
167
        };
168

169
        let succ = unsafe {
170
            htslib::sam_parse1(
171
                &mut sam_string,
12✔
172
                header_view.inner_ptr() as *mut htslib::bam_hdr_t,
15✔
173
                record.inner_ptr_mut(),
27✔
174
            )
175
        };
176

177
        if succ == 0 {
18✔
178
            Ok(record)
15✔
179
        } else {
180
            Err(Error::BamParseSAM {
×
181
                rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
×
182
            })
183
        }
184
    }
185

186
    pub fn set_header(&mut self, header: Arc<HeaderView>) {
30,940✔
187
        self.header = Some(header);
61,880✔
188
    }
189

190
    pub(super) fn data(&self) -> &[u8] {
122,181✔
191
        unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
366,535✔
192
    }
193

194
    #[inline]
195
    pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
721,907✔
196
        &mut self.inner
721,907✔
197
    }
198

199
    #[inline]
200
    pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
50,883✔
201
        &mut self.inner as *mut htslib::bam1_t
50,883✔
202
    }
203

204
    #[inline]
205
    pub fn inner(&self) -> &htslib::bam1_t {
1,210,869✔
206
        &self.inner
1,210,869✔
207
    }
208

209
    #[inline]
210
    pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
30,168✔
211
        &self.inner as *const htslib::bam1_t
30,168✔
212
    }
213

214
    /// Get target id.
215
    pub fn tid(&self) -> i32 {
58✔
216
        self.inner().core.tid
58✔
217
    }
218

219
    /// Set target id.
220
    pub fn set_tid(&mut self, tid: i32) {
51,050✔
221
        self.inner_mut().core.tid = tid;
102,091✔
222
    }
223

224
    /// Get position (0-based).
225
    pub fn pos(&self) -> i64 {
61,476✔
226
        self.inner().core.pos
61,476✔
227
    }
228

229
    /// Set position (0-based).
230
    pub fn set_pos(&mut self, pos: i64) {
71,051✔
231
        self.inner_mut().core.pos = pos;
142,093✔
232
    }
233

234
    pub fn bin(&self) -> u16 {
39✔
235
        self.inner().core.bin
39✔
236
    }
237

238
    pub fn set_bin(&mut self, bin: u16) {
×
239
        self.inner_mut().core.bin = bin;
×
240
    }
241

242
    /// Get MAPQ.
243
    pub fn mapq(&self) -> u8 {
39✔
244
        self.inner().core.qual
39✔
245
    }
246

247
    /// Set MAPQ.
248
    pub fn set_mapq(&mut self, mapq: u8) {
×
249
        self.inner_mut().core.qual = mapq;
×
250
    }
251

252
    /// Get strand information from record flags.
253
    pub fn strand(&self) -> ReqStrand {
×
254
        let reverse = self.flags() & 0x10 != 0;
×
255
        if reverse {
×
256
            ReqStrand::Reverse
×
257
        } else {
258
            ReqStrand::Forward
×
259
        }
260
    }
261

262
    /// Get raw flags.
263
    pub fn flags(&self) -> u16 {
82✔
264
        self.inner().core.flag
82✔
265
    }
266

267
    /// Set raw flags.
268
    pub fn set_flags(&mut self, flags: u16) {
×
269
        self.inner_mut().core.flag = flags;
×
270
    }
271

272
    /// Unset all flags.
273
    pub fn unset_flags(&mut self) {
×
274
        self.inner_mut().core.flag = 0;
×
275
    }
276

277
    /// Get target id of mate.
278
    pub fn mtid(&self) -> i32 {
51✔
279
        self.inner().core.mtid
51✔
280
    }
281

282
    /// Set target id of mate.
283
    pub fn set_mtid(&mut self, mtid: i32) {
51,050✔
284
        self.inner_mut().core.mtid = mtid;
102,091✔
285
    }
286

287
    /// Get mate position.
288
    pub fn mpos(&self) -> i64 {
62✔
289
        self.inner().core.mpos
62✔
290
    }
291

292
    /// Set mate position.
293
    pub fn set_mpos(&mut self, mpos: i64) {
51,050✔
294
        self.inner_mut().core.mpos = mpos;
102,091✔
295
    }
296

297
    /// Get insert size.
298
    pub fn insert_size(&self) -> i64 {
39✔
299
        self.inner().core.isize_
39✔
300
    }
301

302
    /// Set insert size.
303
    pub fn set_insert_size(&mut self, insert_size: i64) {
×
304
        self.inner_mut().core.isize_ = insert_size;
×
305
    }
306

307
    fn qname_capacity(&self) -> usize {
193,248✔
308
        self.inner().core.l_qname as usize
193,248✔
309
    }
310

311
    fn qname_len(&self) -> usize {
30,165✔
312
        // discount all trailing zeros (the default one and extra nulls)
313
        self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
60,328✔
314
    }
315

316
    /// Get qname (read name). Complexity: O(1).
317
    pub fn qname(&self) -> &[u8] {
30,165✔
318
        &self.data()[..self.qname_len()]
60,328✔
319
    }
320

321
    /// Set the variable length data buffer
322
    pub fn set_data(&mut self, new_data: &[u8]) {
13✔
323
        self.cigar = None;
26✔
324

325
        self.inner_mut().l_data = new_data.len() as i32;
37✔
326
        if (self.inner().m_data as i32) < self.inner().l_data {
37✔
327
            // Verbosity due to lexical borrowing
328
            let l_data = self.inner().l_data;
37✔
329
            self.realloc_var_data(l_data as usize);
25✔
330
        }
331

332
        // Copy new data into buffer
333
        let data =
13✔
334
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
36✔
335
        utils::copy_memory(new_data, data);
37✔
336
    }
337

338
    /// Set variable length data (qname, cigar, seq, qual).
339
    /// The aux data is left unchanged.
340
    /// `qual` is Phred-scaled quality values, without any offset.
341
    /// NOTE: seq.len() must equal qual.len() or this method
342
    /// will panic. If you don't have quality values use
343
    /// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
344
    /// be recognized as missing QVs by `samtools`.
345
    pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
20,033✔
346
        assert!(qname.len() < 255);
40,065✔
347
        assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
100,161✔
348

349
        self.cigar = None;
40,066✔
350

351
        let cigar_width = if let Some(cigar_string) = cigar {
60,098✔
352
            cigar_string.len()
20,033✔
353
        } else {
354
            0
×
355
        } * 4;
356
        let q_len = qname.len() + 1;
40,066✔
357
        let extranul = extranul_from_qname(qname);
60,097✔
358

359
        let orig_aux_offset = self.qname_capacity()
120,196✔
360
            + 4 * self.cigar_len()
60,097✔
361
            + self.seq_len().div_ceil(2)
60,097✔
362
            + self.seq_len();
20,033✔
363
        let new_aux_offset = q_len + extranul + cigar_width + seq.len().div_ceil(2) + qual.len();
100,162✔
364
        assert!(orig_aux_offset <= self.inner.l_data as usize);
40,065✔
365
        let aux_len = self.inner.l_data as usize - orig_aux_offset;
40,065✔
366
        self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
60,098✔
367
        if (self.inner().m_data as i32) < self.inner().l_data {
60,084✔
368
            // Verbosity due to lexical borrowing
369
            let l_data = self.inner().l_data;
60,058✔
370
            self.realloc_var_data(l_data as usize);
40,039✔
371
        }
372

373
        // Copy the aux data.
374
        if aux_len > 0 && orig_aux_offset != new_aux_offset {
20,038✔
375
            let data =
5✔
376
                unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
8✔
377
            data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
9✔
378
        }
379

380
        let data =
20,033✔
381
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
60,096✔
382

383
        // qname
384
        utils::copy_memory(qname, data);
60,097✔
385
        for i in 0..=extranul {
100,203✔
386
            data[qname.len() + i] = b'\0';
80,170✔
387
        }
388
        let mut i = q_len + extranul;
40,065✔
389
        self.inner_mut().core.l_qname = i as u16;
40,065✔
390
        self.inner_mut().core.l_extranul = extranul as u8;
40,065✔
391

392
        // cigar
393
        if let Some(cigar_string) = cigar {
40,067✔
394
            let cigar_data = unsafe {
395
                //cigar is always aligned to 4 bytes (see extranul above) - so this is safe
396
                #[allow(clippy::cast_ptr_alignment)]
397
                slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
60,097✔
398
            };
399
            for (i, c) in cigar_string.iter().enumerate() {
240,333✔
400
                cigar_data[i] = c.encode();
120,158✔
401
            }
402
            self.inner_mut().core.n_cigar = cigar_string.len() as u32;
60,097✔
403
            i += cigar_string.len() * 4;
20,033✔
404
        } else {
405
            self.inner_mut().core.n_cigar = 0;
×
406
        };
407

408
        // seq
409
        {
410
            for j in (0..seq.len()).step_by(2) {
1,041,042✔
411
                data[i + j / 2] = (ENCODE_BASE[seq[j] as usize] << 4)
2,001,954✔
412
                    | (if j + 1 < seq.len() {
2,001,954✔
413
                        ENCODE_BASE[seq[j + 1] as usize]
1,000,976✔
414
                    } else {
415
                        0
2✔
416
                    });
417
            }
418
            self.inner_mut().core.l_qseq = seq.len() as i32;
60,097✔
419
            i += seq.len().div_ceil(2);
40,065✔
420
        }
421

422
        // qual
423
        utils::copy_memory(qual, &mut data[i..]);
60,097✔
424
    }
425

426
    /// Replace current qname with a new one.
427
    pub fn set_qname(&mut self, new_qname: &[u8]) {
51,076✔
428
        // 251 + 1NUL is the max 32-bit aligned value that fits in u8
429
        assert!(new_qname.len() < 252);
102,143✔
430

431
        let old_q_len = self.qname_capacity();
153,210✔
432
        // We're going to add a terminal NUL
433
        let extranul = extranul_from_qname(new_qname);
153,210✔
434
        let new_q_len = new_qname.len() + 1 + extranul;
102,143✔
435

436
        // Length of data after qname
437
        let other_len = self.inner_mut().l_data - old_q_len as i32;
102,152✔
438

439
        if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
51,094✔
440
            self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
26✔
441
        } else if new_q_len > old_q_len {
51,076✔
442
            self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
153,153✔
443

444
            // Reallocate if necessary
445
            if (self.inner().m_data as i32) < self.inner().l_data {
153,152✔
446
                // Verbosity due to lexical borrowing
447
                let l_data = self.inner().l_data;
153,150✔
448
                self.realloc_var_data(l_data as usize);
102,103✔
449
            }
450
        }
451

452
        if new_q_len != old_q_len {
51,076✔
453
            // Move other data to new location
454
            unsafe {
455
                let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
255,289✔
456

457
                ::libc::memmove(
458
                    data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
204,233✔
459
                    data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
153,177✔
460
                    other_len as usize,
51,065✔
461
                );
462
            }
463
        }
464

465
        // Copy qname data
466
        let data =
51,076✔
467
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
153,201✔
468
        utils::copy_memory(new_qname, data);
153,210✔
469
        for i in 0..=extranul {
459,572✔
470
            data[new_q_len - i - 1] = b'\0';
204,257✔
471
        }
472
        self.inner_mut().core.l_qname = new_q_len as u16;
102,143✔
473
        self.inner_mut().core.l_extranul = extranul as u8;
102,143✔
474
    }
475

476
    /// Replace current cigar with a new one.
477
    pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) {
19✔
478
        self.cigar = None;
38✔
479

480
        let qname_data_len = self.qname_capacity();
55✔
481
        let old_cigar_data_len = self.cigar_len() * 4;
37✔
482

483
        // Length of data after cigar
484
        let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32;
56✔
485

486
        let new_cigar_len = match new_cigar {
37✔
487
            Some(x) => x.len(),
25✔
488
            None => 0,
7✔
489
        };
490
        let new_cigar_data_len = new_cigar_len * 4;
37✔
491

492
        if new_cigar_data_len < old_cigar_data_len {
32✔
493
            self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32;
38✔
494
        } else if new_cigar_data_len > old_cigar_data_len {
19✔
495
            self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32;
19✔
496

497
            // Reallocate if necessary
498
            if (self.inner().m_data as i32) < self.inner().l_data {
19✔
499
                // Verbosity due to lexical borrowing
500
                let l_data = self.inner().l_data;
19✔
501
                self.realloc_var_data(l_data as usize);
13✔
502
            }
503
        }
504

505
        if new_cigar_data_len != old_cigar_data_len {
19✔
506
            // Move other data to new location
507
            unsafe {
508
                ::libc::memmove(
509
                    self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void,
73✔
510
                    self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void,
56✔
511
                    other_data_len as usize,
19✔
512
                );
513
            }
514
        }
515

516
        // Copy cigar data
517
        if let Some(cigar_string) = new_cigar {
32✔
518
            let cigar_data = unsafe {
519
                #[allow(clippy::cast_ptr_alignment)]
520
                slice::from_raw_parts_mut(
521
                    self.inner.data.add(qname_data_len) as *mut u32,
37✔
522
                    cigar_string.len(),
13✔
523
                )
524
            };
525
            for (i, c) in cigar_string.iter().enumerate() {
1,857✔
526
                cigar_data[i] = c.encode();
1,213✔
527
            }
528
        }
529
        self.inner_mut().core.n_cigar = new_cigar_len as u32;
37✔
530
    }
531

532
    fn realloc_var_data(&mut self, new_len: usize) {
71,093✔
533
        // pad request
534
        let new_len = new_len as u32;
142,177✔
535
        let new_request = new_len + 32 - (new_len % 32);
142,177✔
536

537
        let ptr = unsafe {
538
            ::libc::realloc(
142,177✔
539
                self.inner().data as *mut ::libc::c_void,
71,093✔
540
                new_request as usize,
71,093✔
541
            ) as *mut u8
71,084✔
542
        };
543

544
        if ptr.is_null() {
142,177✔
545
            panic!("ran out of memory in rust_htslib trying to realloc");
×
546
        }
547

548
        // don't update m_data until we know we have
549
        // a successful allocation.
550
        self.inner_mut().m_data = new_request;
142,177✔
551
        self.inner_mut().data = ptr;
142,177✔
552

553
        // we now own inner.data
554
        self.own = true;
71,093✔
555
    }
556

557
    pub fn cigar_len(&self) -> usize {
112,011✔
558
        self.inner().core.n_cigar as usize
112,011✔
559
    }
560

561
    /// Get reference to raw cigar string representation (as stored in BAM file).
562
    /// Usually, the method `Record::cigar` should be used instead.
563
    pub fn raw_cigar(&self) -> &[u32] {
30,856✔
564
        //cigar is always aligned to 4 bytes - so this is safe
565
        #[allow(clippy::cast_ptr_alignment)]
566
        unsafe {
567
            slice::from_raw_parts(
568
                self.data()[self.qname_capacity()..].as_ptr() as *const u32,
92,566✔
569
                self.cigar_len(),
30,856✔
570
            )
571
        }
572
    }
573

574
    /// Return unpacked cigar string. This will create a fresh copy the Cigar data.
575
    pub fn cigar(&self) -> CigarStringView {
30,856✔
576
        match self.cigar {
30,856✔
577
            Some(ref c) => c.clone(),
×
578
            None => self.unpack_cigar(),
61,711✔
579
        }
580
    }
581

582
    // Return unpacked cigar string. This returns None unless you have first called `bam::Record::cache_cigar`.
583
    pub fn cigar_cached(&self) -> Option<&CigarStringView> {
×
584
        self.cigar.as_ref()
×
585
    }
586

587
    /// Decode the cigar string and cache it inside the `Record`
588
    pub fn cache_cigar(&mut self) {
×
589
        self.cigar = Some(self.unpack_cigar())
×
590
    }
591

592
    /// Unpack cigar string. Complexity: O(k) with k being the length of the cigar string.
593
    fn unpack_cigar(&self) -> CigarStringView {
30,856✔
594
        CigarString(
595
            self.raw_cigar()
30,856✔
596
                .iter()
30,856✔
597
                .map(|&c| {
124,595✔
598
                    let len = c >> 4;
187,475✔
599
                    match c & 0b1111 {
93,738✔
600
                        0 => Cigar::Match(len),
62,093✔
601
                        1 => Cigar::Ins(len),
643✔
602
                        2 => Cigar::Del(len),
30,164✔
603
                        3 => Cigar::RefSkip(len),
457✔
604
                        4 => Cigar::SoftClip(len),
377✔
605
                        5 => Cigar::HardClip(len),
5✔
606
                        6 => Cigar::Pad(len),
×
607
                        7 => Cigar::Equal(len),
3✔
608
                        8 => Cigar::Diff(len),
3✔
609
                        _ => panic!("Unexpected cigar operation"),
×
610
                    }
611
                })
612
                .collect(),
30,856✔
613
        )
614
        .into_view(self.pos())
92,567✔
615
    }
616

617
    pub fn seq_len(&self) -> usize {
162,285✔
618
        self.inner().core.l_qseq as usize
162,285✔
619
    }
620

621
    fn seq_data(&self) -> &[u8] {
30,854✔
622
        let offset = self.qname_capacity() + self.cigar_len() * 4;
123,407✔
623
        &self.data()[offset..][..self.seq_len().div_ceil(2)]
92,556✔
624
    }
625

626
    /// Get read sequence. Complexity: O(1).
627
    pub fn seq(&self) -> Seq<'_> {
30,854✔
628
        Seq {
629
            encoded: self.seq_data(),
61,705✔
630
            len: self.seq_len(),
30,854✔
631
        }
632
    }
633

634
    /// Get base qualities (PHRED-scaled probability that base is wrong).
635
    /// This does not entail any offsets, hence the qualities can be used directly without
636
    /// e.g. subtracting 33. Complexity: O(1).
637
    pub fn qual(&self) -> &[u8] {
30,251✔
638
        &self.data()[self.qname_capacity() + self.cigar_len() * 4 + self.seq_len().div_ceil(2)..]
211,751✔
639
            [..self.seq_len()]
30,251✔
640
    }
641

642
    /// Look up an auxiliary field by its tag.
643
    ///
644
    /// Only the first two bytes of a given tag are used for the look-up of a field.
645
    /// See [`Aux`] for more details.
646
    pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
50,217✔
647
        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
200,859✔
648
        let aux = unsafe {
649
            htslib::bam_aux_get(
650
                &self.inner as *const htslib::bam1_t,
50,217✔
651
                c_str.as_ptr() as *mut c_char,
50,220✔
652
            )
653
        };
654
        unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
150,651✔
655
    }
656

657
    unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
50,240✔
658
        const TAG_LEN: isize = 2;
659
        // Used for skipping type identifier
660
        const TYPE_ID_LEN: isize = 1;
661

662
        if aux.is_null() {
100,477✔
663
            return Err(Error::BamAuxTagNotFound);
20,103✔
664
        }
665

666
        let (data, type_size) = match *aux {
90,417✔
667
            b'A' => {
×
668
                let type_size = size_of::<u8>();
×
669
                (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
×
670
            }
671
            b'c' => {
×
672
                let type_size = size_of::<i8>();
17✔
673
                (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
18✔
674
            }
675
            b'C' => {
×
676
                let type_size = size_of::<u8>();
5✔
677
                (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
4✔
678
            }
679
            b's' => {
×
680
                let type_size = size_of::<i16>();
5✔
681
                (
682
                    Aux::I16(
1✔
683
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
8✔
684
                            .read_i16::<LittleEndian>()
3✔
685
                            .map_err(|_| Error::BamAuxParsingError)?,
3✔
686
                    ),
687
                    type_size,
2✔
688
                )
689
            }
690
            b'S' => {
×
691
                let type_size = size_of::<u16>();
5✔
692
                (
693
                    Aux::U16(
1✔
694
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
8✔
695
                            .read_u16::<LittleEndian>()
3✔
696
                            .map_err(|_| Error::BamAuxParsingError)?,
3✔
697
                    ),
698
                    type_size,
2✔
699
                )
700
            }
701
            b'i' => {
×
702
                let type_size = size_of::<i32>();
60,124✔
703
                (
704
                    Aux::I32(
2✔
705
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
90,187✔
706
                            .read_i32::<LittleEndian>()
30,063✔
707
                            .map_err(|_| Error::BamAuxParsingError)?,
30,063✔
708
                    ),
709
                    type_size,
30,061✔
710
                )
711
            }
712
            b'I' => {
×
713
                let type_size = size_of::<u32>();
5✔
714
                (
715
                    Aux::U32(
1✔
716
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
8✔
717
                            .read_u32::<LittleEndian>()
3✔
718
                            .map_err(|_| Error::BamAuxParsingError)?,
3✔
719
                    ),
720
                    type_size,
2✔
721
                )
722
            }
723
            b'f' => {
×
724
                let type_size = size_of::<f32>();
3✔
725
                (
726
                    Aux::Float(
1✔
727
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
5✔
728
                            .read_f32::<LittleEndian>()
2✔
729
                            .map_err(|_| Error::BamAuxParsingError)?,
2✔
730
                    ),
731
                    type_size,
1✔
732
                )
733
            }
734
            b'd' => {
×
735
                let type_size = size_of::<f64>();
3✔
736
                (
737
                    Aux::Double(
1✔
738
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
5✔
739
                            .read_f64::<LittleEndian>()
2✔
740
                            .map_err(|_| Error::BamAuxParsingError)?,
2✔
741
                    ),
742
                    type_size,
1✔
743
                )
744
            }
745
            b'Z' | b'H' => {
×
746
                let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
31✔
747
                let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
25✔
748
                (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
13✔
749
            }
750
            b'B' => {
×
751
                const ARRAY_INNER_TYPE_LEN: isize = 1;
752
                const ARRAY_COUNT_LEN: isize = 4;
753

754
                // Used for skipping metadata
755
                let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
110✔
756

757
                let length =
61✔
758
                    slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
156✔
759
                        .read_u32::<LittleEndian>()
55✔
760
                        .map_err(|_| Error::BamAuxParsingError)? as usize;
55✔
761

762
                // Return tuples of an `Aux` enum and the length of data + metadata in bytes
763
                let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
162✔
764
                    b'c' => (
15✔
765
                        Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
43✔
766
                            aux.offset(array_data_offset),
43✔
767
                            length,
14✔
768
                        ))),
769
                        length,
14✔
770
                    ),
771
                    b'C' => (
7✔
772
                        Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
19✔
773
                            aux.offset(array_data_offset),
19✔
774
                            length,
6✔
775
                        ))),
776
                        length,
6✔
777
                    ),
778
                    b's' => (
7✔
779
                        Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
25✔
780
                            aux.offset(array_data_offset),
31✔
781
                            length * size_of::<i16>(),
13✔
782
                        ))),
783
                        length * std::mem::size_of::<i16>(),
7✔
784
                    ),
785
                    b'S' => (
7✔
786
                        Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
25✔
787
                            aux.offset(array_data_offset),
31✔
788
                            length * size_of::<u16>(),
13✔
789
                        ))),
790
                        length * std::mem::size_of::<u16>(),
7✔
791
                    ),
792
                    b'i' => (
7✔
793
                        Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
25✔
794
                            aux.offset(array_data_offset),
31✔
795
                            length * size_of::<i32>(),
13✔
796
                        ))),
797
                        length * std::mem::size_of::<i32>(),
7✔
798
                    ),
799
                    b'I' => (
7✔
800
                        Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
25✔
801
                            aux.offset(array_data_offset),
31✔
802
                            length * size_of::<u32>(),
13✔
803
                        ))),
804
                        length * std::mem::size_of::<u32>(),
7✔
805
                    ),
806
                    b'f' => (
11✔
807
                        Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
35✔
808
                            aux.offset(array_data_offset),
43✔
809
                            length * size_of::<f32>(),
19✔
810
                        ))),
811
                        length * std::mem::size_of::<f32>(),
11✔
812
                    ),
813
                    _ => {
×
814
                        return Err(Error::BamAuxUnknownType);
×
815
                    }
816
                };
817
                (
818
                    array_data,
104✔
819
                    // Offset: array-specific metadata + array size
820
                    ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
55✔
821
                )
822
            }
823
            _ => {
×
824
                return Err(Error::BamAuxUnknownType);
×
825
            }
826
        };
827

828
        // Offset: metadata + type size
829
        Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
60,280✔
830
    }
831

832
    /// Returns an iterator over the auxiliary fields of the record.
833
    ///
834
    /// When an error occurs, the `Err` variant will be returned
835
    /// and the iterator will not be able to advance anymore.
836
    pub fn aux_iter(&'_ self) -> AuxIter<'_> {
3✔
837
        AuxIter {
838
            // In order to get to the aux data section of a `bam::Record`
839
            // we need to skip fields in front of it
840
            aux: &self.data()[
6✔
841
                // NUL terminated read name:
842
                self.qname_capacity()
843
                // CIGAR (uint32_t):
844
                + self.cigar_len() * std::mem::size_of::<u32>()
845
                // Read sequence (4-bit encoded):
846
                + self.seq_len().div_ceil(2)
847
                // Base qualities (char):
848
                + self.seq_len()..],
849
        }
850
    }
851

852
    /// Add auxiliary data.
853
    pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
20,065✔
854
        // Don't allow pushing aux data when the given tag is already present in the record.
855
        // `htslib` seems to allow this (for non-array values), which can lead to problems
856
        // since retrieving aux fields consumes &[u8; 2] and yields one field only.
857
        if self.aux(tag).is_ok() {
60,190✔
858
            return Err(Error::BamAuxTagAlreadyPresent);
8✔
859
        }
860

861
        let ctag = tag.as_ptr() as *mut c_char;
40,113✔
862
        let ret = unsafe {
863
            match value {
20,058✔
864
                Aux::Char(v) => htslib::bam_aux_append(
UNCOV
865
                    self.inner_ptr_mut(),
×
UNCOV
866
                    ctag,
UNCOV
867
                    b'A' as c_char,
UNCOV
868
                    size_of::<u8>() as i32,
×
UNCOV
869
                    [v].as_mut_ptr(),
×
870
                ),
871
                Aux::I8(v) => htslib::bam_aux_append(
872
                    self.inner_ptr_mut(),
7✔
873
                    ctag,
3✔
874
                    b'c' as c_char,
3✔
875
                    size_of::<i8>() as i32,
4✔
876
                    [v].as_mut_ptr() as *mut u8,
4✔
877
                ),
878
                Aux::U8(v) => htslib::bam_aux_append(
879
                    self.inner_ptr_mut(),
5✔
880
                    ctag,
2✔
881
                    b'C' as c_char,
2✔
882
                    size_of::<u8>() as i32,
3✔
883
                    [v].as_mut_ptr(),
3✔
884
                ),
885
                Aux::I16(v) => htslib::bam_aux_append(
886
                    self.inner_ptr_mut(),
5✔
887
                    ctag,
2✔
888
                    b's' as c_char,
2✔
889
                    size_of::<i16>() as i32,
3✔
890
                    [v].as_mut_ptr() as *mut u8,
3✔
891
                ),
892
                Aux::U16(v) => htslib::bam_aux_append(
893
                    self.inner_ptr_mut(),
5✔
894
                    ctag,
2✔
895
                    b'S' as c_char,
2✔
896
                    size_of::<u16>() as i32,
3✔
897
                    [v].as_mut_ptr() as *mut u8,
3✔
898
                ),
899
                Aux::I32(v) => htslib::bam_aux_append(
900
                    self.inner_ptr_mut(),
40,048✔
901
                    ctag,
20,023✔
902
                    b'i' as c_char,
20,023✔
903
                    size_of::<i32>() as i32,
20,025✔
904
                    [v].as_mut_ptr() as *mut u8,
20,025✔
905
                ),
906
                Aux::U32(v) => htslib::bam_aux_append(
907
                    self.inner_ptr_mut(),
5✔
908
                    ctag,
2✔
909
                    b'I' as c_char,
2✔
910
                    size_of::<u32>() as i32,
3✔
911
                    [v].as_mut_ptr() as *mut u8,
3✔
912
                ),
913
                Aux::Float(v) => htslib::bam_aux_append(
914
                    self.inner_ptr_mut(),
3✔
915
                    ctag,
1✔
916
                    b'f' as c_char,
1✔
917
                    size_of::<f32>() as i32,
2✔
918
                    [v].as_mut_ptr() as *mut u8,
2✔
919
                ),
920
                // Not part of specs but implemented in `htslib`:
921
                Aux::Double(v) => htslib::bam_aux_append(
922
                    self.inner_ptr_mut(),
3✔
923
                    ctag,
1✔
924
                    b'd' as c_char,
1✔
925
                    size_of::<f64>() as i32,
2✔
926
                    [v].as_mut_ptr() as *mut u8,
2✔
927
                ),
928
                Aux::String(v) => {
2✔
929
                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
5✔
930
                    htslib::bam_aux_append(
931
                        self.inner_ptr_mut(),
3✔
932
                        ctag,
1✔
933
                        b'Z' as c_char,
1✔
934
                        (v.len() + 1) as i32,
2✔
935
                        c_str.as_ptr() as *mut u8,
3✔
936
                    )
937
                }
938
                Aux::HexByteArray(v) => {
×
939
                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
×
940
                    htslib::bam_aux_append(
UNCOV
941
                        self.inner_ptr_mut(),
×
UNCOV
942
                        ctag,
UNCOV
943
                        b'H' as c_char,
UNCOV
944
                        (v.len() + 1) as i32,
×
UNCOV
945
                        c_str.as_ptr() as *mut u8,
×
946
                    )
947
                }
948
                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
949
                Aux::ArrayI8(aux_array) => match aux_array {
9✔
950
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
951
                        self.inner_ptr_mut(),
7✔
952
                        ctag,
3✔
953
                        b'c',
954
                        inner.len() as u32,
4✔
955
                        inner.slice.as_ptr() as *mut ::libc::c_void,
4✔
956
                    ),
957
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
958
                        self.inner_ptr_mut(),
3✔
959
                        ctag,
1✔
960
                        b'c',
961
                        inner.len() as u32,
2✔
962
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
963
                    ),
964
                },
965
                Aux::ArrayU8(aux_array) => match aux_array {
5✔
966
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
967
                        self.inner_ptr_mut(),
3✔
968
                        ctag,
1✔
969
                        b'C',
970
                        inner.len() as u32,
2✔
971
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
972
                    ),
973
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
974
                        self.inner_ptr_mut(),
3✔
975
                        ctag,
1✔
976
                        b'C',
977
                        inner.len() as u32,
2✔
978
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
979
                    ),
980
                },
981
                Aux::ArrayI16(aux_array) => match aux_array {
5✔
982
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
983
                        self.inner_ptr_mut(),
3✔
984
                        ctag,
1✔
985
                        b's',
986
                        inner.len() as u32,
2✔
987
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
988
                    ),
989
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
990
                        self.inner_ptr_mut(),
3✔
991
                        ctag,
1✔
992
                        b's',
993
                        inner.len() as u32,
2✔
994
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
995
                    ),
996
                },
997
                Aux::ArrayU16(aux_array) => match aux_array {
5✔
998
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
999
                        self.inner_ptr_mut(),
3✔
1000
                        ctag,
1✔
1001
                        b'S',
1002
                        inner.len() as u32,
2✔
1003
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
1004
                    ),
1005
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1006
                        self.inner_ptr_mut(),
3✔
1007
                        ctag,
1✔
1008
                        b'S',
1009
                        inner.len() as u32,
2✔
1010
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
1011
                    ),
1012
                },
1013
                Aux::ArrayI32(aux_array) => match aux_array {
5✔
1014
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1015
                        self.inner_ptr_mut(),
3✔
1016
                        ctag,
1✔
1017
                        b'i',
1018
                        inner.len() as u32,
2✔
1019
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
1020
                    ),
1021
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1022
                        self.inner_ptr_mut(),
3✔
1023
                        ctag,
1✔
1024
                        b'i',
1025
                        inner.len() as u32,
2✔
1026
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
1027
                    ),
1028
                },
1029
                Aux::ArrayU32(aux_array) => match aux_array {
5✔
1030
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1031
                        self.inner_ptr_mut(),
3✔
1032
                        ctag,
1✔
1033
                        b'I',
1034
                        inner.len() as u32,
2✔
1035
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
1036
                    ),
1037
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1038
                        self.inner_ptr_mut(),
3✔
1039
                        ctag,
1✔
1040
                        b'I',
1041
                        inner.len() as u32,
2✔
1042
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
1043
                    ),
1044
                },
1045
                Aux::ArrayFloat(aux_array) => match aux_array {
11✔
1046
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1047
                        self.inner_ptr_mut(),
9✔
1048
                        ctag,
3✔
1049
                        b'f',
1050
                        inner.len() as u32,
6✔
1051
                        inner.slice.as_ptr() as *mut ::libc::c_void,
6✔
1052
                    ),
1053
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1054
                        self.inner_ptr_mut(),
3✔
1055
                        ctag,
1✔
1056
                        b'f',
1057
                        inner.len() as u32,
2✔
1058
                        inner.slice.as_ptr() as *mut ::libc::c_void,
2✔
1059
                    ),
1060
                },
1061
            }
1062
        };
1063

1064
        if ret < 0 {
20,061✔
1065
            Err(Error::BamAux)
×
1066
        } else {
1067
            Ok(())
20,058✔
1068
        }
1069
    }
1070

1071
    /// Update or add auxiliary data.
1072
    pub fn update_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
×
1073
        // Update existing aux data for the given tag if already present in the record
1074
        // without changing the ordering of tags in the record or append aux data at
1075
        // the end of the existing aux records if it is a new tag.
1076

1077
        let ctag = tag.as_ptr() as *mut c_char;
×
1078
        let ret = unsafe {
1079
            match value {
×
1080
                Aux::Char(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
×
1081
                Aux::I8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
×
1082
                Aux::U8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
×
1083
                Aux::I16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
×
1084
                Aux::U16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
×
1085
                Aux::I32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
×
1086
                Aux::U32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
×
1087
                Aux::Float(v) => htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v),
×
1088
                // Not part of specs but implemented in `htslib`:
1089
                Aux::Double(v) => {
×
1090
                    htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v as f32)
×
1091
                }
1092
                Aux::String(v) => {
×
1093
                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
×
1094
                    htslib::bam_aux_update_str(
1095
                        self.inner_ptr_mut(),
×
1096
                        ctag,
1097
                        (v.len() + 1) as i32,
×
1098
                        c_str.as_ptr() as *const c_char,
×
1099
                    )
1100
                }
1101
                Aux::HexByteArray(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
×
1102
                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
1103
                Aux::ArrayI8(aux_array) => match aux_array {
×
1104
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1105
                        self.inner_ptr_mut(),
×
1106
                        ctag,
1107
                        b'c',
1108
                        inner.len() as u32,
×
1109
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1110
                    ),
1111
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1112
                        self.inner_ptr_mut(),
×
1113
                        ctag,
1114
                        b'c',
1115
                        inner.len() as u32,
×
1116
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1117
                    ),
1118
                },
1119
                Aux::ArrayU8(aux_array) => match aux_array {
×
1120
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1121
                        self.inner_ptr_mut(),
×
1122
                        ctag,
1123
                        b'C',
1124
                        inner.len() as u32,
×
1125
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1126
                    ),
1127
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1128
                        self.inner_ptr_mut(),
×
1129
                        ctag,
1130
                        b'C',
1131
                        inner.len() as u32,
×
1132
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1133
                    ),
1134
                },
1135
                Aux::ArrayI16(aux_array) => match aux_array {
×
1136
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1137
                        self.inner_ptr_mut(),
×
1138
                        ctag,
1139
                        b's',
1140
                        inner.len() as u32,
×
1141
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1142
                    ),
1143
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1144
                        self.inner_ptr_mut(),
×
1145
                        ctag,
1146
                        b's',
1147
                        inner.len() as u32,
×
1148
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1149
                    ),
1150
                },
1151
                Aux::ArrayU16(aux_array) => match aux_array {
×
1152
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1153
                        self.inner_ptr_mut(),
×
1154
                        ctag,
1155
                        b'S',
1156
                        inner.len() as u32,
×
1157
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1158
                    ),
1159
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1160
                        self.inner_ptr_mut(),
×
1161
                        ctag,
1162
                        b'S',
1163
                        inner.len() as u32,
×
1164
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1165
                    ),
1166
                },
1167
                Aux::ArrayI32(aux_array) => match aux_array {
×
1168
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1169
                        self.inner_ptr_mut(),
×
1170
                        ctag,
1171
                        b'i',
1172
                        inner.len() as u32,
×
1173
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1174
                    ),
1175
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1176
                        self.inner_ptr_mut(),
×
1177
                        ctag,
1178
                        b'i',
1179
                        inner.len() as u32,
×
1180
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1181
                    ),
1182
                },
1183
                Aux::ArrayU32(aux_array) => match aux_array {
×
1184
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1185
                        self.inner_ptr_mut(),
×
1186
                        ctag,
1187
                        b'I',
1188
                        inner.len() as u32,
×
1189
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1190
                    ),
1191
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1192
                        self.inner_ptr_mut(),
×
1193
                        ctag,
1194
                        b'I',
1195
                        inner.len() as u32,
×
1196
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1197
                    ),
1198
                },
1199
                Aux::ArrayFloat(aux_array) => match aux_array {
×
1200
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1201
                        self.inner_ptr_mut(),
×
1202
                        ctag,
1203
                        b'f',
1204
                        inner.len() as u32,
×
1205
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1206
                    ),
1207
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1208
                        self.inner_ptr_mut(),
×
1209
                        ctag,
1210
                        b'f',
1211
                        inner.len() as u32,
×
1212
                        inner.slice.as_ptr() as *mut ::libc::c_void,
×
1213
                    ),
1214
                },
1215
            }
1216
        };
1217

1218
        if ret < 0 {
×
1219
            Err(Error::BamAux)
×
1220
        } else {
1221
            Ok(())
×
1222
        }
1223
    }
1224

1225
    // Delete auxiliary tag.
1226
    pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
24✔
1227
        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
93✔
1228
        let aux = unsafe {
1229
            htslib::bam_aux_get(
1230
                &self.inner as *const htslib::bam1_t,
24✔
1231
                c_str.as_ptr() as *mut c_char,
25✔
1232
            )
1233
        };
1234
        unsafe {
1235
            if aux.is_null() {
48✔
1236
                Err(Error::BamAuxTagNotFound)
7✔
1237
            } else {
1238
                htslib::bam_aux_del(self.inner_ptr_mut(), aux);
70✔
1239
                Ok(())
18✔
1240
            }
1241
        }
1242
    }
1243

1244
    /// Access the base modifications associated with this Record through the MM tag.
1245
    /// Example:
1246
    /// ```
1247
    ///    use rust_htslib::bam::{Read, Reader, Record};
1248
    ///    let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
1249
    ///    let mut mod_count = 0;
1250
    ///    for r in bam.records() {
1251
    ///        let record = r.unwrap();
1252
    ///        if let Ok(mods) = record.basemods_iter() {
1253
    ///            // print metadata for the modifications present in this record
1254
    ///            for mod_code in mods.recorded() {
1255
    ///                if let Ok(mod_metadata) = mods.query_type(*mod_code) {
1256
    ///                    println!("mod found with code {}/{} flags: [{} {} {}]",
1257
    ///                              mod_code, *mod_code as u8 as char,
1258
    ///                              mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char);
1259
    ///                }
1260
    ///            }
1261
    ///
1262
    ///            // iterate over the modifications in this record
1263
    ///            // the modifications are returned as a tuple with the
1264
    ///            // position within SEQ and an hts_base_mod struct
1265
    ///            for res in mods {
1266
    ///                if let Ok( (position, m) ) = res {
1267
    ///                    println!("{} {},{}", position, m.modified_base as u8 as char, m.qual);
1268
    ///                    mod_count += 1;
1269
    ///                }
1270
    ///            }
1271
    ///        };
1272
    ///    }
1273
    ///    assert_eq!(mod_count, 14);
1274
    /// ```
1275
    pub fn basemods_iter(&'_ self) -> Result<BaseModificationsIter<'_>> {
15✔
1276
        BaseModificationsIter::new(self)
22✔
1277
    }
1278

1279
    /// An iterator that returns all of the modifications for each position as a vector.
1280
    /// This is useful for the case where multiple possible modifications can be annotated
1281
    /// at a single position (for example a C could be 5-mC or 5-hmC)
1282
    pub fn basemods_position_iter(&'_ self) -> Result<BaseModificationsPositionIter<'_>> {
2✔
1283
        BaseModificationsPositionIter::new(self)
3✔
1284
    }
1285

1286
    /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record
1287
    /// is not paired, mates are not mapping to the same contig, or mates start at the
1288
    /// same position.
1289
    pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
12✔
1290
        if self.is_paired()
23✔
1291
            && !self.is_unmapped()
12✔
1292
            && !self.is_mate_unmapped()
12✔
1293
            && self.tid() == self.mtid()
34✔
1294
        {
1295
            if self.pos() == self.mpos() {
34✔
1296
                // both reads start at the same position, we cannot decide on the orientation.
1297
                return SequenceReadPairOrientation::None;
×
1298
            }
1299

1300
            let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
69✔
1301
                (
1302
                    self.pos(),
19✔
1303
                    self.mpos(),
19✔
1304
                    !self.is_reverse(),
13✔
1305
                    !self.is_mate_reverse(),
7✔
1306
                )
1307
            } else {
1308
                (
1309
                    self.mpos(),
16✔
1310
                    self.pos(),
16✔
1311
                    !self.is_mate_reverse(),
11✔
1312
                    !self.is_reverse(),
6✔
1313
                )
1314
            };
1315

1316
            if pos_1 < pos_2 {
12✔
1317
                match (fwd_1, fwd_2) {
7✔
1318
                    (true, true) => SequenceReadPairOrientation::F1F2,
×
1319
                    (true, false) => SequenceReadPairOrientation::F1R2,
7✔
1320
                    (false, true) => SequenceReadPairOrientation::R1F2,
×
1321
                    (false, false) => SequenceReadPairOrientation::R1R2,
×
1322
                }
1323
            } else {
1324
                match (fwd_2, fwd_1) {
6✔
1325
                    (true, true) => SequenceReadPairOrientation::F2F1,
×
1326
                    (true, false) => SequenceReadPairOrientation::F2R1,
6✔
1327
                    (false, true) => SequenceReadPairOrientation::R2F1,
×
1328
                    (false, false) => SequenceReadPairOrientation::R2R1,
×
1329
                }
1330
            }
1331
        } else {
1332
            SequenceReadPairOrientation::None
×
1333
        }
1334
    }
1335

1336
    flag!(is_paired, set_paired, unset_paired, 1u16);
1337
    flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1338
    flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1339
    flag!(
1340
        is_mate_unmapped,
1341
        set_mate_unmapped,
1342
        unset_mate_unmapped,
1343
        8u16
1344
    );
1345
    flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1346
    flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1347
    flag!(
1348
        is_first_in_template,
1349
        set_first_in_template,
1350
        unset_first_in_template,
1351
        64u16
1352
    );
1353
    flag!(
1354
        is_last_in_template,
1355
        set_last_in_template,
1356
        unset_last_in_template,
1357
        128u16
1358
    );
1359
    flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1360
    flag!(
1361
        is_quality_check_failed,
1362
        set_quality_check_failed,
1363
        unset_quality_check_failed,
1364
        512u16
1365
    );
1366
    flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1367
    flag!(
1368
        is_supplementary,
1369
        set_supplementary,
1370
        unset_supplementary,
1371
        2048u16
1372
    );
1373
}
1374

1375
impl Drop for Record {
1376
    fn drop(&mut self) {
51,963✔
1377
        if self.own {
51,963✔
1378
            unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
102,091✔
1379
        }
1380
    }
1381
}
1382

1383
impl SequenceRead for Record {
1384
    fn name(&self) -> &[u8] {
×
1385
        self.qname()
×
1386
    }
1387

1388
    fn base(&self, i: usize) -> u8 {
×
1389
        *decode_base_unchecked(encoded_base(self.seq_data(), i))
×
1390
    }
1391

1392
    fn base_qual(&self, i: usize) -> u8 {
×
1393
        self.qual()[i]
×
1394
    }
1395

1396
    fn len(&self) -> usize {
×
1397
        self.seq_len()
×
1398
    }
1399

1400
    fn is_empty(&self) -> bool {
×
1401
        self.len() == 0
×
1402
    }
1403
}
1404

1405
impl genome::AbstractInterval for Record {
1406
    /// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
1407
    fn contig(&self) -> &str {
×
1408
        let tid = self.tid();
×
1409
        if tid < 0 {
×
1410
            panic!("invalid tid, must be at least zero");
×
1411
        }
1412
        str::from_utf8(
1413
            self.header
×
1414
                .as_ref()
×
1415
                .expect(
×
1416
                    "header must be set (this is the case if the record has been read from a file)",
1417
                )
1418
                .tid2name(tid as u32),
×
1419
        )
1420
        .expect("unable to interpret contig name as UTF-8")
1421
    }
1422

1423
    /// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
1424
    fn range(&self) -> ops::Range<genome::Position> {
×
1425
        let end_pos = self
×
1426
            .cigar_cached()
×
1427
            .expect("cigar has not been cached yet, call cache_cigar() first")
×
1428
            .end_pos() as u64;
×
1429

1430
        if self.pos() < 0 {
×
1431
            panic!("invalid position, must be positive")
×
1432
        }
1433

1434
        self.pos() as u64..end_pos
×
1435
    }
1436
}
1437

1438
/// Auxiliary record data
1439
///
1440
/// The specification allows a wide range of types to be stored as an auxiliary data field of a BAM record.
1441
///
1442
/// Please note that the [`Aux::Double`] variant is _not_ part of the specification, but it is supported by `htslib`.
1443
///
1444
/// # Examples
1445
///
1446
/// ```
1447
/// use rust_htslib::{
1448
///     bam,
1449
///     bam::record::{Aux, AuxArray},
1450
///     errors::Error,
1451
/// };
1452
///
1453
/// //Set up BAM record
1454
/// let bam_header = bam::Header::new();
1455
/// let mut record = bam::Record::from_sam(
1456
///     &mut bam::HeaderView::from_header(&bam_header),
1457
///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1458
/// )
1459
/// .unwrap();
1460
///
1461
/// // Add an integer field
1462
/// let aux_integer_field = Aux::I32(1234);
1463
/// record.push_aux(b"XI", aux_integer_field).unwrap();
1464
///
1465
/// match record.aux(b"XI") {
1466
///     Ok(value) => {
1467
///         // Typically, callers expect an aux field to be of a certain type.
1468
///         // If that's not the case, the value can be `match`ed exhaustively.
1469
///         if let Aux::I32(v) = value {
1470
///             assert_eq!(v, 1234);
1471
///         }
1472
///     }
1473
///     Err(e) => {
1474
///         panic!("Error reading aux field: {}", e);
1475
///     }
1476
/// }
1477
///
1478
/// // Add an array field
1479
/// let array_like_data = vec![0.4, 0.3, 0.2, 0.1];
1480
/// let slice_of_data = &array_like_data;
1481
/// let aux_array: AuxArray<f32> = slice_of_data.into();
1482
/// let aux_array_field = Aux::ArrayFloat(aux_array);
1483
/// record.push_aux(b"XA", aux_array_field).unwrap();
1484
///
1485
/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1486
///     let read_array = array.iter().collect::<Vec<_>>();
1487
///     assert_eq!(read_array, array_like_data);
1488
/// } else {
1489
///     panic!("Could not read array data");
1490
/// }
1491
/// ```
1492
#[derive(Debug, PartialEq)]
1493
pub enum Aux<'a> {
1494
    Char(u8),
1495
    I8(i8),
1496
    U8(u8),
1497
    I16(i16),
1498
    U16(u16),
1499
    I32(i32),
1500
    U32(u32),
1501
    Float(f32),
1502
    Double(f64), // Not part of specs but implemented in `htslib`
1503
    String(&'a str),
1504
    HexByteArray(&'a str),
1505
    ArrayI8(AuxArray<'a, i8>),
1506
    ArrayU8(AuxArray<'a, u8>),
1507
    ArrayI16(AuxArray<'a, i16>),
1508
    ArrayU16(AuxArray<'a, u16>),
1509
    ArrayI32(AuxArray<'a, i32>),
1510
    ArrayU32(AuxArray<'a, u32>),
1511
    ArrayFloat(AuxArray<'a, f32>),
1512
}
1513

1514
unsafe impl Send for Aux<'_> {}
1515
unsafe impl Sync for Aux<'_> {}
1516

1517
/// Types that can be used in aux arrays.
1518
pub trait AuxArrayElement: Copy {
1519
    fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1520
}
1521

1522
impl AuxArrayElement for i8 {
1523
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
47✔
1524
        std::io::Cursor::new(bytes).read_i8().ok()
139✔
1525
    }
1526
}
1527
impl AuxArrayElement for u8 {
1528
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
17✔
1529
        std::io::Cursor::new(bytes).read_u8().ok()
49✔
1530
    }
1531
}
1532
impl AuxArrayElement for i16 {
1533
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
21✔
1534
        std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
61✔
1535
    }
1536
}
1537
impl AuxArrayElement for u16 {
1538
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
17✔
1539
        std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
49✔
1540
    }
1541
}
1542
impl AuxArrayElement for i32 {
1543
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
21✔
1544
        std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
61✔
1545
    }
1546
}
1547
impl AuxArrayElement for u32 {
1548
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
17✔
1549
        std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
49✔
1550
    }
1551
}
1552
impl AuxArrayElement for f32 {
1553
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
36✔
1554
        std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
102✔
1555
    }
1556
}
1557

1558
/// Provides access to aux arrays.
1559
///
1560
/// Provides methods to either retrieve single elements or an iterator over the
1561
/// array.
1562
///
1563
/// This type is used for wrapping both, array data that was read from a
1564
/// BAM record and slices of data that are going to be stored in one.
1565
///
1566
/// In order to be able to add an `AuxArray` field to a BAM record, `AuxArray`s
1567
/// can be constructed via the `From` trait which is implemented for all
1568
/// supported types (see [`AuxArrayElement`] for a list).
1569
///
1570
/// # Examples
1571
///
1572
/// ```
1573
/// use rust_htslib::{
1574
///     bam,
1575
///     bam::record::{Aux, AuxArray},
1576
/// };
1577
///
1578
/// //Set up BAM record
1579
/// let bam_header = bam::Header::new();
1580
/// let mut record = bam::Record::from_sam(
1581
///     &mut bam::HeaderView::from_header(&bam_header),
1582
///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1583
/// ).unwrap();
1584
///
1585
/// let data = vec![0.4, 0.3, 0.2, 0.1];
1586
/// let slice_of_data = &data;
1587
/// let aux_array: AuxArray<f32> = slice_of_data.into();
1588
/// let aux_field = Aux::ArrayFloat(aux_array);
1589
/// record.push_aux(b"XA", aux_field);
1590
///
1591
/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1592
///     // Retrieve the second element from the array
1593
///     assert_eq!(array.get(1).unwrap(), 0.3);
1594
///     // Iterate over the array and collect it into a `Vec`
1595
///     let read_array = array.iter().collect::<Vec<_>>();
1596
///     assert_eq!(read_array, data);
1597
/// } else {
1598
///     panic!("Could not read array data");
1599
/// }
1600
/// ```
1601
#[derive(Debug)]
1602
pub enum AuxArray<'a, T> {
1603
    TargetType(AuxArrayTargetType<'a, T>),
1604
    RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1605
}
1606

1607
impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1608
where
1609
    T: AuxArrayElement + PartialEq,
1610
{
1611
    fn eq(&self, other: &AuxArray<'_, T>) -> bool {
22✔
1612
        use AuxArray::*;
1613
        match (self, other) {
37✔
1614
            (TargetType(v), TargetType(v_other)) => v == v_other,
7✔
1615
            (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
7✔
1616
            (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
×
1617
            (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
62✔
1618
        }
1619
    }
1620
}
1621

1622
/// Create AuxArrays from slices of allowed target types.
1623
impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1624
where
1625
    I: AuxArrayElement,
1626
    T: AsRef<[I]> + ?Sized,
1627
{
1628
    fn from(src: &'a T) -> Self {
31✔
1629
        AuxArray::TargetType(AuxArrayTargetType {
31✔
1630
            slice: src.as_ref(),
31✔
1631
        })
1632
    }
1633
}
1634

1635
impl<'a, T> AuxArray<'a, T>
1636
where
1637
    T: AuxArrayElement,
1638
{
1639
    /// Returns the element at a position or None if out of bounds.
1640
    pub fn get(&self, index: usize) -> Option<T> {
278✔
1641
        match self {
278✔
1642
            AuxArray::TargetType(v) => v.get(index),
283✔
1643
            AuxArray::RawLeBytes(v) => v.get(index),
809✔
1644
        }
1645
    }
1646

1647
    /// Returns the number of elements in the array.
1648
    pub fn len(&self) -> usize {
32✔
1649
        match self {
32✔
1650
            AuxArray::TargetType(a) => a.len(),
×
1651
            AuxArray::RawLeBytes(a) => a.len(),
78✔
1652
        }
1653
    }
1654

1655
    /// Returns true if the array contains no elements.
1656
    pub fn is_empty(&self) -> bool {
×
1657
        self.len() == 0
×
1658
    }
1659

1660
    /// Returns an iterator over the array.
1661
    pub fn iter(&'_ self) -> AuxArrayIter<'_, T> {
54✔
1662
        AuxArrayIter {
1663
            index: 0,
1664
            array: self,
1665
        }
1666
    }
1667

1668
    /// Create AuxArrays from raw byte slices borrowed from `bam::Record`.
1669
    fn from_bytes(bytes: &'a [u8]) -> Self {
61✔
1670
        Self::RawLeBytes(AuxArrayRawLeBytes {
61✔
1671
            slice: bytes,
52✔
1672
            phantom_data: PhantomData,
52✔
1673
        })
1674
    }
1675
}
1676

1677
/// Encapsulates slice of target type.
1678
#[doc(hidden)]
1679
#[derive(Debug, PartialEq)]
1680
pub struct AuxArrayTargetType<'a, T> {
1681
    slice: &'a [T],
1682
}
1683

1684
impl<T> AuxArrayTargetType<'_, T>
1685
where
1686
    T: AuxArrayElement,
1687
{
1688
    fn get(&self, index: usize) -> Option<T> {
76✔
1689
        self.slice.get(index).copied()
283✔
1690
    }
1691

1692
    fn len(&self) -> usize {
20✔
1693
        self.slice.len()
31✔
1694
    }
1695
}
1696

1697
/// Encapsulates slice of raw bytes to prevent it from being accidentally accessed.
1698
#[doc(hidden)]
1699
#[derive(Debug, PartialEq)]
1700
pub struct AuxArrayRawLeBytes<'a, T> {
1701
    slice: &'a [u8],
1702
    phantom_data: PhantomData<T>,
1703
}
1704

1705
impl<T> AuxArrayRawLeBytes<'_, T>
1706
where
1707
    T: AuxArrayElement,
1708
{
1709
    fn get(&self, index: usize) -> Option<T> {
209✔
1710
        let type_size = std::mem::size_of::<T>();
409✔
1711
        if index * type_size + type_size > self.slice.len() {
409✔
1712
            return None;
42✔
1713
        }
1714
        T::from_le_bytes(&self.slice[index * type_size..][..type_size])
519✔
1715
    }
1716

1717
    fn len(&self) -> usize {
39✔
1718
        self.slice.len() / std::mem::size_of::<T>()
69✔
1719
    }
1720
}
1721

1722
/// Aux array iterator
1723
///
1724
/// This struct is created by the [`AuxArray::iter`] method.
1725
pub struct AuxArrayIter<'a, T> {
1726
    index: usize,
1727
    array: &'a AuxArray<'a, T>,
1728
}
1729

1730
impl<T> Iterator for AuxArrayIter<'_, T>
1731
where
1732
    T: AuxArrayElement,
1733
{
1734
    type Item = T;
1735
    fn next(&mut self) -> Option<Self::Item> {
277✔
1736
        let value = self.array.get(self.index);
1,081✔
1737
        self.index += 1;
277✔
1738
        value
268✔
1739
    }
1740

1741
    fn size_hint(&self) -> (usize, Option<usize>) {
32✔
1742
        let array_length = self.array.len() - self.index;
78✔
1743
        (array_length, Some(array_length))
32✔
1744
    }
1745
}
1746

1747
/// Auxiliary data iterator
1748
///
1749
/// This struct is created by the [`Record::aux_iter`] method.
1750
///
1751
/// This iterator returns `Result`s that wrap tuples containing
1752
/// a slice which represents the two-byte tag (`&[u8; 2]`) as
1753
/// well as an `Aux` enum that wraps the associated value.
1754
///
1755
/// When an error occurs, the `Err` variant will be returned
1756
/// and the iterator will not be able to advance anymore.
1757
pub struct AuxIter<'a> {
1758
    aux: &'a [u8],
1759
}
1760

1761
impl<'a> Iterator for AuxIter<'a> {
1762
    type Item = Result<(&'a [u8], Aux<'a>)>;
1763

1764
    fn next(&mut self) -> Option<Self::Item> {
26✔
1765
        // We're finished
1766
        if self.aux.is_empty() {
51✔
1767
            return None;
3✔
1768
        }
1769
        // Incomplete aux data
1770
        if (1..=3).contains(&self.aux.len()) {
70✔
1771
            // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1772
            self.aux = &[];
×
1773
            return Some(Err(Error::BamAuxParsingError));
×
1774
        }
1775
        let tag = &self.aux[..2];
47✔
1776
        Some(unsafe {
1✔
1777
            let data_ptr = self.aux[2..].as_ptr();
70✔
1778
            Record::read_aux_field(data_ptr)
47✔
1779
                .map(|(aux, offset)| {
49✔
1780
                    self.aux = &self.aux[offset..];
24✔
1781
                    (tag, aux)
24✔
1782
                })
1783
                .inspect_err(|_e| {
24✔
1784
                    // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1785
                    self.aux = &[];
×
1786
                })
1787
        })
1788
    }
1789
}
1790

1791
static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1792
static ENCODE_BASE: [u8; 256] = [
1793
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1794
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1795
    1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1796
    15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1797
    13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1798
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1799
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1800
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1801
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1802
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1803
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1804
];
1805

1806
#[inline]
1807
fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
3,009,968✔
1808
    (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
3,009,968✔
1809
}
1810

1811
#[inline]
1812
unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
×
1813
    (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
×
1814
}
1815

1816
#[inline]
1817
fn decode_base_unchecked(base: u8) -> &'static u8 {
3,009,968✔
1818
    unsafe { DECODE_BASE.get_unchecked(base as usize) }
6,019,933✔
1819
}
1820

1821
/// The sequence of a record.
1822
#[derive(Debug, Copy, Clone)]
1823
pub struct Seq<'a> {
1824
    pub encoded: &'a [u8],
1825
    len: usize,
1826
}
1827

1828
impl Seq<'_> {
1829
    /// Return encoded base. Complexity: O(1).
1830
    #[inline]
1831
    pub fn encoded_base(&self, i: usize) -> u8 {
3,009,968✔
1832
        encoded_base(self.encoded, i)
9,029,898✔
1833
    }
1834

1835
    /// Return encoded base. Complexity: O(1).
1836
    ///
1837
    /// # Safety
1838
    ///
1839
    /// TODO
1840
    #[inline]
1841
    pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
×
1842
        encoded_base_unchecked(self.encoded, i)
×
1843
    }
1844

1845
    /// Obtain decoded base without performing bounds checking.
1846
    /// Use index based access seq()[i], for checked, safe access.
1847
    /// Complexity: O(1).
1848
    ///
1849
    /// # Safety
1850
    ///
1851
    /// TODO
1852
    #[inline]
1853
    pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
×
1854
        *decode_base_unchecked(self.encoded_base_unchecked(i))
×
1855
    }
1856

1857
    /// Return decoded sequence. Complexity: O(m) with m being the read length.
1858
    pub fn as_bytes(&self) -> Vec<u8> {
30,097✔
1859
        (0..self.len()).map(|i| self[i]).collect()
3,099,500✔
1860
    }
1861

1862
    /// Return length (in bases) of the sequence.
1863
    pub fn len(&self) -> usize {
30,097✔
1864
        self.len
30,097✔
1865
    }
1866

1867
    pub fn is_empty(&self) -> bool {
×
1868
        self.len() == 0
×
1869
    }
1870
}
1871

1872
impl ops::Index<usize> for Seq<'_> {
1873
    type Output = u8;
1874

1875
    /// Return decoded base at given position within read. Complexity: O(1).
1876
    fn index(&self, index: usize) -> &u8 {
3,009,968✔
1877
        decode_base_unchecked(self.encoded_base(index))
9,029,898✔
1878
    }
1879
}
1880

1881
unsafe impl Send for Seq<'_> {}
1882
unsafe impl Sync for Seq<'_> {}
1883

1884
#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1885
#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1886
pub enum Cigar {
1887
    Match(u32),    // M
1888
    Ins(u32),      // I
1889
    Del(u32),      // D
1890
    RefSkip(u32),  // N
1891
    SoftClip(u32), // S
1892
    HardClip(u32), // H
1893
    Pad(u32),      // P
1894
    Equal(u32),    // =
1895
    Diff(u32),     // X
1896
}
1897

1898
impl Cigar {
1899
    fn encode(self) -> u32 {
60,685✔
1900
        match self {
60,685✔
1901
            Cigar::Match(len) => len << 4, // | 0,
80,713✔
1902
            Cigar::Ins(len) => (len << 4) | 1,
603✔
1903
            Cigar::Del(len) => (len << 4) | 2,
40,041✔
1904
            Cigar::RefSkip(len) => (len << 4) | 3,
×
1905
            Cigar::SoftClip(len) => (len << 4) | 4,
7✔
1906
            Cigar::HardClip(len) => (len << 4) | 5,
5✔
1907
            Cigar::Pad(len) => (len << 4) | 6,
×
1908
            Cigar::Equal(len) => (len << 4) | 7,
3✔
1909
            Cigar::Diff(len) => (len << 4) | 8,
3✔
1910
        }
1911
    }
1912

1913
    /// Return the length of the CIGAR.
1914
    pub fn len(self) -> u32 {
17✔
1915
        match self {
17✔
1916
            Cigar::Match(len) => len,
12✔
1917
            Cigar::Ins(len) => len,
3✔
1918
            Cigar::Del(len) => len,
3✔
1919
            Cigar::RefSkip(len) => len,
×
1920
            Cigar::SoftClip(len) => len,
12✔
1921
            Cigar::HardClip(len) => len,
3✔
1922
            Cigar::Pad(len) => len,
×
1923
            Cigar::Equal(len) => len,
3✔
1924
            Cigar::Diff(len) => len,
3✔
1925
        }
1926
    }
1927

1928
    pub fn is_empty(self) -> bool {
×
1929
        self.len() == 0
×
1930
    }
1931

1932
    /// Return the character representing the CIGAR.
1933
    pub fn char(self) -> char {
17✔
1934
        match self {
17✔
1935
            Cigar::Match(_) => 'M',
7✔
1936
            Cigar::Ins(_) => 'I',
2✔
1937
            Cigar::Del(_) => 'D',
2✔
1938
            Cigar::RefSkip(_) => 'N',
×
1939
            Cigar::SoftClip(_) => 'S',
7✔
1940
            Cigar::HardClip(_) => 'H',
2✔
1941
            Cigar::Pad(_) => 'P',
×
1942
            Cigar::Equal(_) => '=',
2✔
1943
            Cigar::Diff(_) => 'X',
2✔
1944
        }
1945
    }
1946
}
1947

1948
impl fmt::Display for Cigar {
1949
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
6✔
1950
        fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
34✔
1951
    }
1952
}
1953

1954
unsafe impl Send for Cigar {}
1955
unsafe impl Sync for Cigar {}
1956

1957
custom_derive! {
1958
    /// A CIGAR string. This type wraps around a `Vec<Cigar>`.
1959
    ///
1960
    /// # Example
1961
    ///
1962
    /// ```
1963
    /// use rust_htslib::bam::record::{Cigar, CigarString};
1964
    ///
1965
    /// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
1966
    ///
1967
    /// // access by index
1968
    /// assert_eq!(cigar[0], Cigar::Match(100));
1969
    /// // format into classical string representation
1970
    /// assert_eq!(format!("{}", cigar), "100M10S");
1971
    /// // iterate
1972
    /// for op in &cigar {
1973
    ///    println!("{}", op);
1974
    /// }
1975
    /// ```
1976
    #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1977
    #[derive(NewtypeDeref,
1978
            NewtypeDerefMut,
1979
             NewtypeIndex(usize),
1980
             NewtypeIndexMut(usize),
1981
             NewtypeFrom,
1982
             PartialEq,
1983
             PartialOrd,
1984
             Eq,
1985
             NewtypeDebug,
1986
             Clone,
1987
             Hash
1988
    )]
1989
    pub struct CigarString(pub Vec<Cigar>);
3✔
1990
}
1991

1992
impl CigarString {
1993
    /// Create a `CigarStringView` from this CigarString at position `pos`
1994
    pub fn into_view(self, pos: i64) -> CigarStringView {
30,877✔
1995
        CigarStringView::new(self, pos)
92,629✔
1996
    }
1997

1998
    /// Calculate the bam cigar from the alignment struct. x is the target string
1999
    /// and y is the reference. `hard_clip` controls how unaligned read bases are encoded in the
2000
    /// cigar string. Set to true to use the hard clip (`H`) code, or false to use soft clip
2001
    /// (`S`) code. See the [SAM spec](https://samtools.github.io/hts-specs/SAMv1.pdf) for more details.
2002
    pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
6✔
2003
        match alignment.mode {
6✔
2004
            AlignmentMode::Global => {
2005
                panic!(" Bam cigar fn not supported for Global Alignment mode")
×
2006
            }
2007
            AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
×
2008
            _ => {}
5✔
2009
        }
2010

2011
        let mut cigar = Vec::new();
11✔
2012
        if alignment.operations.is_empty() {
12✔
2013
            return CigarString(cigar);
×
2014
        }
2015

2016
        let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
31✔
2017
            AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
10✔
2018
            AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
10✔
2019
            AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
22✔
2020
            AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
16✔
2021
            _ => {}
6✔
2022
        };
2023

2024
        if alignment.xstart > 0 {
6✔
2025
            cigar.push(if hard_clip {
6✔
2026
                Cigar::HardClip(alignment.xstart as u32)
×
2027
            } else {
2028
                Cigar::SoftClip(alignment.xstart as u32)
2✔
2029
            });
2030
        }
2031

2032
        let mut last = alignment.operations[0];
12✔
2033
        let mut k = 1u32;
11✔
2034
        for &op in alignment.operations[1..].iter() {
39✔
2035
            if op == last {
38✔
2036
                k += 1;
10✔
2037
            } else {
2038
                add_op(last, k, &mut cigar);
58✔
2039
                k = 1;
20✔
2040
            }
2041
            last = op;
28✔
2042
        }
2043
        add_op(last, k, &mut cigar);
16✔
2044
        if alignment.xlen > alignment.xend {
6✔
2045
            cigar.push(if hard_clip {
14✔
2046
                Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
3✔
2047
            } else {
2048
                Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
4✔
2049
            });
2050
        }
2051

2052
        CigarString(cigar)
6✔
2053
    }
2054
}
2055

2056
impl TryFrom<&[u8]> for CigarString {
2057
    type Error = Error;
2058

2059
    /// Create a CigarString from given &[u8].
2060
    /// # Example
2061
    /// ```
2062
    /// use rust_htslib::bam::record::*;
2063
    /// use rust_htslib::bam::record::CigarString;
2064
    /// use rust_htslib::bam::record::Cigar::*;
2065
    /// use std::convert::TryFrom;
2066
    ///
2067
    /// let cigar_str = "2H10M5X3=2H".as_bytes();
2068
    /// let cigar = CigarString::try_from(cigar_str)
2069
    ///     .expect("Unable to parse cigar string.");
2070
    /// let expected_cigar = CigarString(vec![
2071
    ///     HardClip(2),
2072
    ///     Match(10),
2073
    ///     Diff(5),
2074
    ///     Equal(3),
2075
    ///     HardClip(2),
2076
    /// ]);
2077
    /// assert_eq!(cigar, expected_cigar);
2078
    /// ```
2079
    fn try_from(bytes: &[u8]) -> Result<Self> {
27✔
2080
        let mut inner = Vec::new();
45✔
2081
        let mut i = 0;
45✔
2082
        let text_len = bytes.len();
66✔
2083
        while i < text_len {
84✔
2084
            let mut j = i;
117✔
2085
            while j < text_len && bytes[j].is_ascii_digit() {
550✔
2086
                j += 1;
97✔
2087
            }
2088
            // check that length is provided
2089
            if i == j {
60✔
2090
                return Err(Error::BamParseCigar {
×
2091
                    msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
×
2092
                });
2093
            }
2094
            // get the length of the operation
2095
            let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
291✔
2096
                msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
×
2097
            })?;
2098
            let n = s.parse().map_err(|_| Error::BamParseCigar {
231✔
2099
                msg: format!("Unable to parse &str '{:?}' to u32.", s),
×
2100
            })?;
2101
            // get the operation
2102
            let op = &bytes[j];
117✔
2103
            inner.push(match op {
177✔
2104
                b'M' => Cigar::Match(n),
17✔
2105
                b'I' => Cigar::Ins(n),
4✔
2106
                b'D' => Cigar::Del(n),
4✔
2107
                b'N' => Cigar::RefSkip(n),
2✔
2108
                b'H' => {
2109
                    if i == 0 || j + 1 == text_len {
25✔
2110
                        Cigar::HardClip(n)
16✔
2111
                    } else {
2112
                        return Err(Error::BamParseCigar {
×
2113
                            msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
2114
                                .to_owned(),
×
2115
                        });
2116
                    }
2117
                }
2118
                b'S' => {
2119
                    if i == 0
10✔
2120
                        || j + 1 == text_len
6✔
2121
                        || bytes[i-1] == b'H'
3✔
2122
                        || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
12✔
2123
                        Cigar::SoftClip(n)
10✔
2124
                    } else {
2125
                        return Err(Error::BamParseCigar {
×
2126
                        msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
2127
                            .to_owned(),
×
2128
                        });
2129
                    }
2130
                },
2131
                b'P' => Cigar::Pad(n),
2✔
2132
                b'=' => Cigar::Equal(n),
11✔
2133
                b'X' => Cigar::Diff(n),
8✔
2134
                op => {
×
2135
                    return Err(Error::BamParseCigar {
×
2136
                        msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
×
2137
                    })
2138
                }
2139
            });
2140
            i = j + 1;
60✔
2141
        }
2142
        Ok(CigarString(inner))
24✔
2143
    }
2144
}
2145

2146
impl TryFrom<&str> for CigarString {
2147
    type Error = Error;
2148

2149
    /// Create a CigarString from given &str.
2150
    /// # Example
2151
    /// ```
2152
    /// use rust_htslib::bam::record::*;
2153
    /// use rust_htslib::bam::record::CigarString;
2154
    /// use rust_htslib::bam::record::Cigar::*;
2155
    /// use std::convert::TryFrom;
2156
    ///
2157
    /// let cigar_str = "2H10M5X3=2H";
2158
    /// let cigar = CigarString::try_from(cigar_str)
2159
    ///     .expect("Unable to parse cigar string.");
2160
    /// let expected_cigar = CigarString(vec![
2161
    ///     HardClip(2),
2162
    ///     Match(10),
2163
    ///     Diff(5),
2164
    ///     Equal(3),
2165
    ///     HardClip(2),
2166
    /// ]);
2167
    /// assert_eq!(cigar, expected_cigar);
2168
    /// ```
2169
    fn try_from(text: &str) -> Result<Self> {
26✔
2170
        let bytes = text.as_bytes();
65✔
2171
        if text.chars().count() != bytes.len() {
86✔
2172
            return Err(Error::BamParseCigar {
2✔
2173
                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2✔
2174
            });
2175
        }
2176
        CigarString::try_from(bytes)
42✔
2177
    }
2178
}
2179

2180
impl<'a> CigarString {
2181
    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
20,045✔
2182
        self.into_iter()
40,089✔
2183
    }
2184
}
2185

2186
impl<'a> IntoIterator for &'a CigarString {
2187
    type Item = &'a Cigar;
2188
    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2189

2190
    fn into_iter(self) -> Self::IntoIter {
20,376✔
2191
        self.0.iter()
20,376✔
2192
    }
2193
}
2194

2195
impl fmt::Display for CigarString {
2196
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
5✔
2197
        for op in self {
29✔
2198
            fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
90✔
2199
        }
2200
        Ok(())
5✔
2201
    }
2202
}
2203

2204
// Get number of leading/trailing softclips if a CigarString taking hardclips into account
2205
fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
6✔
2206
    match (cigar.next(), cigar.next()) {
24✔
2207
        (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
8✔
2208
            *s as i64
6✔
2209
        }
2210
        _ => 0,
×
2211
    }
2212
}
2213

2214
#[derive(Eq, PartialEq, Clone, Debug)]
2215
pub struct CigarStringView {
2216
    inner: CigarString,
2217
    pos: i64,
2218
}
2219

2220
impl CigarStringView {
2221
    /// Construct a new CigarStringView from a CigarString at a position
2222
    pub fn new(c: CigarString, pos: i64) -> CigarStringView {
30,877✔
2223
        CigarStringView { inner: c, pos }
2224
    }
2225

2226
    /// Get (exclusive) end position of alignment.
2227
    pub fn end_pos(&self) -> i64 {
13✔
2228
        let mut pos = self.pos;
25✔
2229
        for c in self {
86✔
2230
            match c {
39✔
2231
                Cigar::Match(l)
24✔
2232
                | Cigar::RefSkip(l)
2233
                | Cigar::Del(l)
12✔
2234
                | Cigar::Equal(l)
2235
                | Cigar::Diff(l) => pos += *l as i64,
36✔
2236
                // these don't add to end_pos on reference
2237
                Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2238
            }
2239
        }
2240
        pos
13✔
2241
    }
2242

2243
    /// Get the start position of the alignment (0-based).
2244
    pub fn pos(&self) -> i64 {
2✔
2245
        self.pos
2✔
2246
    }
2247

2248
    /// Get number of bases softclipped at the beginning of the alignment.
2249
    pub fn leading_softclips(&self) -> i64 {
3✔
2250
        calc_softclips(self.iter())
7✔
2251
    }
2252

2253
    /// Get number of bases softclipped at the end of the alignment.
2254
    pub fn trailing_softclips(&self) -> i64 {
3✔
2255
        calc_softclips(self.iter().rev())
9✔
2256
    }
2257

2258
    /// Get number of bases hardclipped at the beginning of the alignment.
2259
    pub fn leading_hardclips(&self) -> i64 {
×
2260
        self.first().map_or(0, |cigar| {
×
2261
            if let Cigar::HardClip(s) = cigar {
×
2262
                *s as i64
×
2263
            } else {
2264
                0
×
2265
            }
2266
        })
2267
    }
2268

2269
    /// Get number of bases hardclipped at the end of the alignment.
2270
    pub fn trailing_hardclips(&self) -> i64 {
×
2271
        self.last().map_or(0, |cigar| {
×
2272
            if let Cigar::HardClip(s) = cigar {
×
2273
                *s as i64
×
2274
            } else {
2275
                0
×
2276
            }
2277
        })
2278
    }
2279

2280
    /// For a given position in the reference, get corresponding position within read.
2281
    /// If reference position is outside of the read alignment, return None.
2282
    ///
2283
    /// # Arguments
2284
    ///
2285
    /// * `ref_pos` - the reference position
2286
    /// * `include_softclips` - if true, softclips will be considered as matches or mismatches
2287
    /// * `include_dels` - if true, positions within deletions will be considered (first reference matching read position after deletion will be returned)
2288
    ///
2289
    pub fn read_pos(
58✔
2290
        &self,
2291
        ref_pos: u32,
2292
        include_softclips: bool,
2293
        include_dels: bool,
2294
    ) -> Result<Option<u32>> {
2295
        let mut rpos = self.pos as u32; // reference position
115✔
2296
        let mut qpos = 0u32; // position within read
115✔
2297
        let mut j = 0; // index into cigar operation vector
115✔
2298

2299
        // find first cigar operation referring to qpos = 0 (and thus bases in record.seq()),
2300
        // because all augmentations of qpos and rpos before that are invalid
2301
        for (i, c) in self.iter().enumerate() {
299✔
2302
            match c {
6✔
2303
                Cigar::Match(_) |
2304
                Cigar::Diff(_)  |
2305
                Cigar::Equal(_) |
2306
                // this is unexpected, but bwa + GATK indel realignment can produce insertions
2307
                // before matching positions
2308
                Cigar::Ins(_) => {
2309
                    j = i;
51✔
2310
                    break;
51✔
2311
                },
2312
                Cigar::SoftClip(l) => {
6✔
2313
                    j = i;
6✔
2314
                    if include_softclips {
9✔
2315
                        // Alignment starts with softclip and we want to include it in the
2316
                        // projection of the reference position. However, the POS field does not
2317
                        // include the softclip. Hence we have to subtract its length.
2318
                        rpos = rpos.saturating_sub(*l);
5✔
2319
                    }
2320
                    break;
5✔
2321
                },
2322
                Cigar::Del(l) => {
×
2323
                    // METHOD: leading deletions can happen in case of trimmed reads where
2324
                    // a primer has been removed AFTER read mapping.
2325
                    // Example: 24M8I8D18M9S before trimming, 32H8D18M9S after trimming
2326
                    // with fgbio. While leading deletions should be impossible with
2327
                    // normal read mapping, they make perfect sense with primer trimming
2328
                    // because the mapper still had the evidence to decide in favor of
2329
                    // the deletion via the primer sequence.
UNCOV
2330
                    rpos += l;
×
2331
                },
2332
                Cigar::RefSkip(_) => {
2333
                    return Err(Error::BamUnexpectedCigarOperation {
×
2334
                        msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
×
2335
                    });
2336
                },
2337
                Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
13✔
2338
                    return Err(Error::BamUnexpectedCigarOperation{
2✔
2339
                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2✔
2340
                    });
2341
                },
2342
                // if we have reached the end of the CigarString with only pads and hard clips, we have no read position matching the variant
2343
                Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
15✔
2344
                // skip leading HardClips and Pads, as they consume neither read sequence nor reference sequence
2345
                Cigar::Pad(_) | Cigar::HardClip(_) => ()
6✔
2346
            }
2347
        }
2348

2349
        let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
150✔
2350
            cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
188✔
2351
        };
2352

2353
        while rpos <= ref_pos && j < self.len() {
443✔
2354
            match self[j] {
142✔
2355
                // potential SNV evidence
2356
                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
287✔
2357
                    // difference between desired position and first position of current cigar
2358
                    // operation
2359
                    qpos += ref_pos - rpos;
36✔
2360
                    return Ok(Some(qpos));
35✔
2361
                }
2362
                Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
14✔
2363
                    qpos += ref_pos - rpos;
3✔
2364
                    return Ok(Some(qpos));
2✔
2365
                }
2366
                Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
43✔
2367
                    // qpos shall resemble the start of the deletion
2368
                    return Ok(Some(qpos));
3✔
2369
                }
2370
                // for others, just increase pos and qpos as needed
2371
                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
113✔
2372
                    rpos += l;
112✔
2373
                    qpos += l;
57✔
2374
                    j += 1;
57✔
2375
                }
2376
                Cigar::SoftClip(l) => {
4✔
2377
                    qpos += l;
5✔
2378
                    j += 1;
5✔
2379
                    if include_softclips {
6✔
2380
                        rpos += l;
2✔
2381
                    }
2382
                }
2383
                Cigar::Ins(l) => {
9✔
2384
                    qpos += l;
6✔
2385
                    j += 1;
6✔
2386
                }
2387
                Cigar::RefSkip(l) | Cigar::Del(l) => {
66✔
2388
                    rpos += l;
33✔
2389
                    j += 1;
34✔
2390
                }
2391
                Cigar::Pad(_) => {
2✔
2392
                    j += 1;
3✔
2393
                }
2394
                Cigar::HardClip(_) if j < self.len() - 1 => {
5✔
2395
                    return Err(Error::BamUnexpectedCigarOperation{
2✔
2396
                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2✔
2397
                    });
2398
                }
2399
                Cigar::HardClip(_) => return Ok(None),
×
2400
            }
2401
        }
2402

2403
        Ok(None)
18✔
2404
    }
2405

2406
    /// transfer ownership of the Cigar out of the CigarView
2407
    pub fn take(self) -> CigarString {
465✔
2408
        self.inner
465✔
2409
    }
2410
}
2411

2412
impl ops::Deref for CigarStringView {
2413
    type Target = CigarString;
2414

2415
    fn deref(&self) -> &CigarString {
30,271✔
2416
        &self.inner
30,270✔
2417
    }
2418
}
2419

2420
impl ops::Index<usize> for CigarStringView {
2421
    type Output = Cigar;
2422

2423
    fn index(&self, index: usize) -> &Cigar {
134✔
2424
        self.inner.index(index)
267✔
2425
    }
2426
}
2427

2428
impl ops::IndexMut<usize> for CigarStringView {
2429
    fn index_mut(&mut self, index: usize) -> &mut Cigar {
×
2430
        self.inner.index_mut(index)
×
2431
    }
2432
}
2433

2434
impl<'a> CigarStringView {
2435
    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
314✔
2436
        self.inner.into_iter()
627✔
2437
    }
2438
}
2439

2440
impl<'a> IntoIterator for &'a CigarStringView {
2441
    type Item = &'a Cigar;
2442
    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2443

2444
    fn into_iter(self) -> Self::IntoIter {
13✔
2445
        self.inner.into_iter()
25✔
2446
    }
2447
}
2448

2449
impl fmt::Display for CigarStringView {
2450
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
×
2451
        self.inner.fmt(fmt)
×
2452
    }
2453
}
2454

2455
pub struct BaseModificationMetadata {
2456
    pub strand: i32,
2457
    pub implicit: i32,
2458
    pub canonical: u8,
2459
}
2460

2461
/// struct containing the internal state required to access
2462
/// the base modifications for a bam::Record
2463
pub struct BaseModificationState<'a> {
2464
    record: &'a Record,
2465
    state: *mut htslib::hts_base_mod_state,
2466
    buffer: Vec<htslib::hts_base_mod>,
2467
    buffer_pos: i32,
2468
}
2469

2470
impl BaseModificationState<'_> {
2471
    /// Initialize a new BaseModification struct from a bam::Record
2472
    /// This function allocates memory for the state structure
2473
    /// and initializes the iterator to the start of the modification
2474
    /// records.
2475
    fn new(r: &Record) -> Result<BaseModificationState<'_>> {
13✔
2476
        let mut bm = unsafe {
2477
            BaseModificationState {
2478
                record: r,
2479
                state: hts_sys::hts_base_mod_state_alloc(),
13✔
2480
                buffer: Vec::new(),
13✔
2481
                buffer_pos: -1,
11✔
2482
            }
2483
        };
2484

2485
        if bm.state.is_null() {
26✔
2486
            panic!("Unable to allocate memory for hts_base_mod_state");
×
2487
        }
2488

2489
        // parse the MM tag to initialize the state
2490
        unsafe {
2491
            let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
59✔
2492
            if ret != 0 {
13✔
2493
                return Err(Error::BamBaseModificationTagNotFound);
×
2494
            }
2495
        }
2496

2497
        let types = bm.recorded();
37✔
2498
        bm.buffer.reserve(types.len());
46✔
2499
        Ok(bm)
13✔
2500
    }
2501

2502
    pub fn buffer_next_mods(&mut self) -> Result<usize> {
36✔
2503
        unsafe {
2504
            let ret = hts_sys::bam_next_basemod(
2505
                self.record.inner_ptr(),
70✔
2506
                self.state,
36✔
2507
                self.buffer.as_mut_ptr(),
70✔
2508
                self.buffer.capacity() as i32,
36✔
2509
                &mut self.buffer_pos,
36✔
2510
            );
2511

2512
            if ret < 0 {
36✔
2513
                return Err(Error::BamBaseModificationIterationFailed);
×
2514
            }
2515

2516
            // the htslib API won't write more than buffer.capacity() mods to the output array but it will
2517
            // return the actual number of modifications found. We return an error to the caller
2518
            // in the case where there was insufficient storage to return all mods.
2519
            if ret as usize > self.buffer.capacity() {
70✔
2520
                return Err(Error::BamBaseModificationTooManyMods);
×
2521
            }
2522

2523
            // we read the modifications directly into the vector, which does
2524
            // not update the length so needs to be manually set
2525
            self.buffer.set_len(ret as usize);
104✔
2526

2527
            Ok(ret as usize)
36✔
2528
        }
2529
    }
2530

2531
    /// Return an array containing the modification codes listed for this record.
2532
    /// Positive values are ascii character codes (eg m), negative values are chEBI codes.
2533
    pub fn recorded<'a>(&self) -> &'a [i32] {
22✔
2534
        unsafe {
2535
            let mut n: i32 = 0;
62✔
2536
            let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
82✔
2537

2538
            // htslib should not return a null pointer, even when there are no base mods
2539
            if data_ptr.is_null() {
42✔
2540
                panic!("Unable to obtain pointer to base modifications");
×
2541
            }
2542
            assert!(n >= 0);
42✔
2543
            slice::from_raw_parts(data_ptr, n as usize)
42✔
2544
        }
2545
    }
2546

2547
    /// Return metadata for the specified character code indicating the strand
2548
    /// the base modification was called on, whether the tag uses implicit mode
2549
    /// and the ascii code for the canonical base.
2550
    /// If there are multiple modifications with the same code this will return the data
2551
    /// for the first mod.  See https://github.com/samtools/htslib/issues/1635
2552
    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
10✔
2553
        unsafe {
2554
            let mut strand: i32 = 0;
26✔
2555
            let mut implicit: i32 = 0;
26✔
2556
            // This may be i8 or u8 in hts_sys.
2557
            let mut canonical: c_char = 0;
26✔
2558

2559
            let ret = hts_sys::bam_mods_query_type(
2560
                self.state,
10✔
2561
                code,
8✔
2562
                &mut strand,
8✔
2563
                &mut implicit,
8✔
2564
                &mut canonical,
8✔
2565
            );
2566
            if ret == -1 {
10✔
2567
                Err(Error::BamBaseModificationTypeNotFound)
×
2568
            } else {
2569
                Ok(BaseModificationMetadata {
10✔
2570
                    strand,
18✔
2571
                    implicit,
18✔
2572
                    canonical: canonical.try_into().unwrap(),
18✔
2573
                })
2574
            }
2575
        }
2576
    }
2577
}
2578

2579
impl Drop for BaseModificationState<'_> {
2580
    fn drop<'a>(&mut self) {
13✔
2581
        unsafe {
2582
            hts_sys::hts_base_mod_state_free(self.state);
13✔
2583
        }
2584
    }
2585
}
2586

2587
/// Iterator over the base modifications that returns
2588
/// a vector for all of the mods at each position
2589
pub struct BaseModificationsPositionIter<'a> {
2590
    mod_state: BaseModificationState<'a>,
2591
}
2592

2593
impl BaseModificationsPositionIter<'_> {
2594
    fn new(r: &Record) -> Result<BaseModificationsPositionIter<'_>> {
2✔
2595
        let state = BaseModificationState::new(r)?;
4✔
2596
        Ok(BaseModificationsPositionIter { mod_state: state })
2✔
2597
    }
2598

2599
    pub fn recorded<'a>(&self) -> &'a [i32] {
×
2600
        self.mod_state.recorded()
×
2601
    }
2602

2603
    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
×
2604
        self.mod_state.query_type(code)
×
2605
    }
2606
}
2607

2608
impl Iterator for BaseModificationsPositionIter<'_> {
2609
    type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2610

2611
    fn next(&mut self) -> Option<Self::Item> {
9✔
2612
        let ret = self.mod_state.buffer_next_mods();
25✔
2613

2614
        // Three possible things happened in buffer_next_mods:
2615
        // 1. the htslib API call was successful but there are no more mods
2616
        // 2. ths htslib API call was successful and we read some mods
2617
        // 3. the htslib API call failed, we propogate the error wrapped in an option
2618
        match ret {
9✔
2619
            Ok(num_mods) => {
9✔
2620
                if num_mods == 0 {
10✔
2621
                    None
2✔
2622
                } else {
2623
                    let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
22✔
2624
                    Some(Ok(data))
8✔
2625
                }
2626
            }
2627
            Err(e) => Some(Err(e)),
×
2628
        }
2629
    }
2630
}
2631

2632
/// Iterator over the base modifications that returns
2633
/// the next modification found, one by one
2634
pub struct BaseModificationsIter<'a> {
2635
    mod_state: BaseModificationState<'a>,
2636
    buffer_idx: usize,
2637
}
2638

2639
impl BaseModificationsIter<'_> {
2640
    fn new(r: &Record) -> Result<BaseModificationsIter<'_>> {
12✔
2641
        let state = BaseModificationState::new(r)?;
32✔
2642
        Ok(BaseModificationsIter {
12✔
2643
            mod_state: state,
10✔
2644
            buffer_idx: 0,
10✔
2645
        })
2646
    }
2647

2648
    pub fn recorded<'a>(&self) -> &'a [i32] {
11✔
2649
        self.mod_state.recorded()
11✔
2650
    }
2651

2652
    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
10✔
2653
        self.mod_state.query_type(code)
26✔
2654
    }
2655
}
2656

2657
impl Iterator for BaseModificationsIter<'_> {
2658
    type Item = Result<(i32, hts_sys::hts_base_mod)>;
2659

2660
    fn next(&mut self) -> Option<Self::Item> {
29✔
2661
        if self.buffer_idx == self.mod_state.buffer.len() {
58✔
2662
            // need to use the internal state to read the next
2663
            // set of modifications into the buffer
2664
            let ret = self.mod_state.buffer_next_mods();
80✔
2665

2666
            match ret {
28✔
2667
                Ok(num_mods) => {
28✔
2668
                    if num_mods == 0 {
28✔
2669
                        // done iterating
2670
                        return None;
7✔
2671
                    } else {
2672
                        // we read some mods, reset the position in the buffer then fall through
2673
                        self.buffer_idx = 0;
23✔
2674
                    }
2675
                }
2676
                Err(e) => return Some(Err(e)),
×
2677
            }
2678
        }
2679

2680
        // if we got here when there are mods buffered that we haven't emitted yet
2681
        assert!(self.buffer_idx < self.mod_state.buffer.len());
68✔
2682
        let data = (
46✔
2683
            self.mod_state.buffer_pos,
46✔
2684
            self.mod_state.buffer[self.buffer_idx],
24✔
2685
        );
2686
        self.buffer_idx += 1;
24✔
2687
        Some(Ok(data))
24✔
2688
    }
2689
}
2690

2691
#[cfg(test)]
2692
mod tests {
2693
    use super::*;
2694

2695
    #[test]
2696
    fn test_cigar_string() {
2697
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2698

2699
        assert_eq!(cigar[0], Cigar::Match(100));
2700
        assert_eq!(format!("{}", cigar), "100M10S");
2701
        for op in &cigar {
2702
            println!("{}", op);
2703
        }
2704
    }
2705

2706
    #[test]
2707
    fn test_cigar_string_view_pos() {
2708
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2709
        assert_eq!(cigar.pos(), 5);
2710
    }
2711

2712
    #[test]
2713
    fn test_cigar_string_leading_softclips() {
2714
        let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2715
        assert_eq!(cigar.leading_softclips(), 10);
2716
        let cigar2 = CigarString(vec![
2717
            Cigar::HardClip(5),
2718
            Cigar::SoftClip(10),
2719
            Cigar::Match(100),
2720
        ])
2721
        .into_view(0);
2722
        assert_eq!(cigar2.leading_softclips(), 10);
2723
    }
2724

2725
    #[test]
2726
    fn test_cigar_string_trailing_softclips() {
2727
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2728
        assert_eq!(cigar.trailing_softclips(), 10);
2729
        let cigar2 = CigarString(vec![
2730
            Cigar::Match(100),
2731
            Cigar::SoftClip(10),
2732
            Cigar::HardClip(5),
2733
        ])
2734
        .into_view(0);
2735
        assert_eq!(cigar2.trailing_softclips(), 10);
2736
    }
2737

2738
    #[test]
2739
    fn test_cigar_read_pos() {
2740
        let vpos = 5; // variant position
2741

2742
        // Ignore leading HardClip
2743
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2744
        // var:                       V
2745
        // c01: 7H                 M  M
2746
        // qpos:                  00 01
2747
        let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2748
        assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2749

2750
        // Skip leading SoftClip or use as pre-POS matches
2751
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2752
        // var:                       V
2753
        // c02: 5H2S         M  M  M  M  M  M
2754
        // qpos:  00        02 03 04 05 06 07
2755
        // c02: 5H     S  S  M  M  M  M  M  M
2756
        // qpos:      00 01 02 03 04 05 06 07
2757
        let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2758
        assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2759
        assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2760

2761
        // Skip leading SoftClip returning None for unmatched reference positiong or use as
2762
        // pre-POS matches
2763
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2764
        // var:                       V
2765
        // c03:  3S                      M  M
2766
        // qpos: 00                     03 04
2767
        // c03:                 S  S  S  M  M
2768
        // qpos:               00 01 02 03 04
2769
        let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2770
        assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2771
        assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2772

2773
        // Skip leading Insertion before variant position
2774
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2775
        // var:                       V
2776
        // c04:  3I                X  X  X
2777
        // qpos: 00               03 04 05
2778
        let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2779
        assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2780

2781
        // Matches and deletion before variant position
2782
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2783
        // var:                       V
2784
        // c05:        =  =  D  D  X  =  =
2785
        // qpos:      00 01       02 03 04 05
2786
        let c05 = CigarString(vec![
2787
            Cigar::Equal(2),
2788
            Cigar::Del(2),
2789
            Cigar::Diff(1),
2790
            Cigar::Equal(2),
2791
        ])
2792
        .into_view(0);
2793
        assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2794

2795
        // single nucleotide Deletion covering variant position
2796
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2797
        // var:                       V
2798
        // c06:                 =  =  D  X  X
2799
        // qpos:               00 01    02 03
2800
        let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2801
        assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2802
        assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2803

2804
        // three nucleotide Deletion covering variant position
2805
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2806
        // var:                       V
2807
        // c07:              =  =  D  D  D  M  M
2808
        // qpos:            00 01          02 03
2809
        let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2810
        assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2811
        assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2812

2813
        // three nucleotide RefSkip covering variant position
2814
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2815
        // var:                       V
2816
        // c08:              =  X  N  N  N  M  M
2817
        // qpos:            00 01          02 03
2818
        let c08 = CigarString(vec![
2819
            Cigar::Equal(1),
2820
            Cigar::Diff(1),
2821
            Cigar::RefSkip(3),
2822
            Cigar::Match(2),
2823
        ])
2824
        .into_view(2);
2825
        assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2826
        assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2827

2828
        // internal hard clip before variant pos
2829
        // ref:       00 01 02 03    04 05 06 07 08 09 10 11 12 13 14 15
2830
        // var:                          V
2831
        // c09: 3H           =  = 3H  =  =
2832
        // qpos:            00 01    02 03
2833
        let c09 = CigarString(vec![
2834
            Cigar::HardClip(3),
2835
            Cigar::Equal(2),
2836
            Cigar::HardClip(3),
2837
            Cigar::Equal(2),
2838
        ])
2839
        .into_view(2);
2840
        assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2841

2842
        // Deletion right before variant position
2843
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2844
        // var:                       V
2845
        // c10:           M  M  D  D  M  M
2846
        // qpos:         00 01       02 03
2847
        let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2848
        assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2849

2850
        // Insertion right before variant position
2851
        // ref:       00 01 02 03 04    05 06 07 08 09 10 11 12 13 14 15
2852
        // var:                          V
2853
        // c11:                 M  M 3I  M
2854
        // qpos:               00 01 02 05 06
2855
        let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2856
        assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2857

2858
        // Insertion right after variant position
2859
        // ref:       00 01 02 03 04 05    06 07 08 09 10 11 12 13 14 15
2860
        // var:                       V
2861
        // c12:                 M  M  M 2I  =
2862
        // qpos:               00 01 02 03 05
2863
        let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2864
        assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2865

2866
        // Deletion right after variant position
2867
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2868
        // var:                       V
2869
        // c13:                 M  M  M  D  =
2870
        // qpos:               00 01 02    03
2871
        let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2872
        assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2873

2874
        // A messy and complicated example, including a Pad operation
2875
        let vpos2 = 15;
2876
        // ref:       00    01 02    03 04 05    06 07 08 09 10 11 12 13 14 15
2877
        // var:                                                           V
2878
        // c14: 5H3S   = 2P  M  X 3I  M  M  D 2I  =  =  N  N  N  M  M  M  =  =  5S2H
2879
        // qpos:  00  03    04 05 06 09 10    11 13 14          15 16 17 18 19
2880
        let c14 = CigarString(vec![
2881
            Cigar::HardClip(5),
2882
            Cigar::SoftClip(3),
2883
            Cigar::Equal(1),
2884
            Cigar::Pad(2),
2885
            Cigar::Match(1),
2886
            Cigar::Diff(1),
2887
            Cigar::Ins(3),
2888
            Cigar::Match(2),
2889
            Cigar::Del(1),
2890
            Cigar::Ins(2),
2891
            Cigar::Equal(2),
2892
            Cigar::RefSkip(3),
2893
            Cigar::Match(3),
2894
            Cigar::Equal(2),
2895
            Cigar::SoftClip(5),
2896
            Cigar::HardClip(2),
2897
        ])
2898
        .into_view(0);
2899
        assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2900

2901
        // HardClip after Pad
2902
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2903
        // var:                       V
2904
        // c15: 5P1H            =  =  =
2905
        // qpos:               00 01 02
2906
        let c15 =
2907
            CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2908
        assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2909

2910
        // only HardClip and Pad operations
2911
        // c16: 7H5P2H
2912
        let c16 =
2913
            CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2914
        assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2915
    }
2916

2917
    #[test]
2918
    fn test_clone() {
2919
        let mut rec = Record::new();
2920
        rec.set_pos(300);
2921
        rec.set_qname(b"read1");
2922
        let clone = rec.clone();
2923
        assert_eq!(rec, clone);
2924
    }
2925

2926
    #[test]
2927
    fn test_flags() {
2928
        let mut rec = Record::new();
2929

2930
        rec.set_paired();
2931
        assert_eq!(rec.is_paired(), true);
2932

2933
        rec.set_supplementary();
2934
        assert_eq!(rec.is_supplementary(), true);
2935
        assert_eq!(rec.is_supplementary(), true);
2936

2937
        rec.unset_paired();
2938
        assert_eq!(rec.is_paired(), false);
2939
        assert_eq!(rec.is_supplementary(), true);
2940

2941
        rec.unset_supplementary();
2942
        assert_eq!(rec.is_paired(), false);
2943
        assert_eq!(rec.is_supplementary(), false);
2944
    }
2945

2946
    #[test]
2947
    fn test_cigar_parse() {
2948
        let cigar = "1S20M1D2I3X1=2H";
2949
        let parsed = CigarString::try_from(cigar).unwrap();
2950
        assert_eq!(parsed.to_string(), cigar);
2951
    }
2952
}
2953

2954
#[cfg(test)]
2955
mod alignment_cigar_tests {
2956
    use super::*;
2957
    use crate::bam::{Read, Reader};
2958
    use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2959
    use bio_types::alignment::{Alignment, AlignmentMode};
2960

2961
    #[test]
2962
    fn test_cigar() {
2963
        let alignment = Alignment {
2964
            score: 5,
2965
            xstart: 3,
2966
            ystart: 0,
2967
            xend: 9,
2968
            yend: 10,
2969
            ylen: 10,
2970
            xlen: 10,
2971
            operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2972
            mode: AlignmentMode::Semiglobal,
2973
        };
2974
        assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2975
        assert_eq!(
2976
            CigarString::from_alignment(&alignment, false).0,
2977
            vec![
2978
                Cigar::SoftClip(3),
2979
                Cigar::Equal(3),
2980
                Cigar::Diff(1),
2981
                Cigar::Ins(2),
2982
                Cigar::Del(2),
2983
                Cigar::SoftClip(1),
2984
            ]
2985
        );
2986

2987
        let alignment = Alignment {
2988
            score: 5,
2989
            xstart: 0,
2990
            ystart: 5,
2991
            xend: 4,
2992
            yend: 10,
2993
            ylen: 10,
2994
            xlen: 5,
2995
            operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2996
            mode: AlignmentMode::Custom,
2997
        };
2998
        assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2999
        assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
3000
        assert_eq!(
3001
            CigarString::from_alignment(&alignment, false).0,
3002
            vec![
3003
                Cigar::Equal(1),
3004
                Cigar::Diff(2),
3005
                Cigar::Ins(1),
3006
                Cigar::Del(2),
3007
                Cigar::SoftClip(1),
3008
            ]
3009
        );
3010
        assert_eq!(
3011
            CigarString::from_alignment(&alignment, true).0,
3012
            vec![
3013
                Cigar::Equal(1),
3014
                Cigar::Diff(2),
3015
                Cigar::Ins(1),
3016
                Cigar::Del(2),
3017
                Cigar::HardClip(1),
3018
            ]
3019
        );
3020

3021
        let alignment = Alignment {
3022
            score: 5,
3023
            xstart: 0,
3024
            ystart: 5,
3025
            xend: 3,
3026
            yend: 8,
3027
            ylen: 10,
3028
            xlen: 3,
3029
            operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
3030
            mode: AlignmentMode::Custom,
3031
        };
3032
        assert_eq!(alignment.cigar(false), "1X1=1X");
3033
        assert_eq!(
3034
            CigarString::from_alignment(&alignment, false).0,
3035
            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3036
        );
3037

3038
        let alignment = Alignment {
3039
            score: 5,
3040
            xstart: 0,
3041
            ystart: 5,
3042
            xend: 3,
3043
            yend: 8,
3044
            ylen: 10,
3045
            xlen: 3,
3046
            operations: vec![Subst, Match, Subst],
3047
            mode: AlignmentMode::Semiglobal,
3048
        };
3049
        assert_eq!(alignment.cigar(false), "1X1=1X");
3050
        assert_eq!(
3051
            CigarString::from_alignment(&alignment, false).0,
3052
            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3053
        );
3054
    }
3055

3056
    #[test]
3057
    fn test_read_orientation_f1r2() {
3058
        let mut bam = Reader::from_path("test/test_paired.sam").unwrap();
3059

3060
        for res in bam.records() {
3061
            let record = res.unwrap();
3062
            assert_eq!(
3063
                record.read_pair_orientation(),
3064
                SequenceReadPairOrientation::F1R2
3065
            );
3066
        }
3067
    }
3068

3069
    #[test]
3070
    fn test_read_orientation_f2r1() {
3071
        let mut bam = Reader::from_path("test/test_nonstandard_orientation.sam").unwrap();
3072

3073
        for res in bam.records() {
3074
            let record = res.unwrap();
3075
            assert_eq!(
3076
                record.read_pair_orientation(),
3077
                SequenceReadPairOrientation::F2R1
3078
            );
3079
        }
3080
    }
3081

3082
    #[test]
3083
    fn test_read_orientation_supplementary() {
3084
        let mut bam = Reader::from_path("test/test_orientation_supplementary.sam").unwrap();
3085

3086
        for res in bam.records() {
3087
            let record = res.unwrap();
3088
            assert_eq!(
3089
                record.read_pair_orientation(),
3090
                SequenceReadPairOrientation::F2R1
3091
            );
3092
        }
3093
    }
3094

3095
    #[test]
3096
    pub fn test_cigar_parsing_non_ascii_error() {
3097
        let cigar_str = "43ጷ";
3098
        let expected_error = Err(Error::BamParseCigar {
3099
                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
3100
            });
3101

3102
        let result = CigarString::try_from(cigar_str);
3103
        assert_eq!(expected_error, result);
3104
    }
3105

3106
    #[test]
3107
    pub fn test_cigar_parsing() {
3108
        // parsing test cases
3109
        let cigar_strs = [
3110
            "1H10M4D100I300N1102=10P25X11S", // test every cigar opt
3111
            "100M",                          // test a single op
3112
            "",                              // test empty input
3113
            "1H1=1H",                        // test simple hardclip
3114
            "1S1=1S",                        // test simple softclip
3115
            "11H11S11=11S11H",               // test complex softclip
3116
            "10H",
3117
            "10S",
3118
        ];
3119
        // expected results
3120
        let cigars = [
3121
            CigarString(vec![
3122
                Cigar::HardClip(1),
3123
                Cigar::Match(10),
3124
                Cigar::Del(4),
3125
                Cigar::Ins(100),
3126
                Cigar::RefSkip(300),
3127
                Cigar::Equal(1102),
3128
                Cigar::Pad(10),
3129
                Cigar::Diff(25),
3130
                Cigar::SoftClip(11),
3131
            ]),
3132
            CigarString(vec![Cigar::Match(100)]),
3133
            CigarString(vec![]),
3134
            CigarString(vec![
3135
                Cigar::HardClip(1),
3136
                Cigar::Equal(1),
3137
                Cigar::HardClip(1),
3138
            ]),
3139
            CigarString(vec![
3140
                Cigar::SoftClip(1),
3141
                Cigar::Equal(1),
3142
                Cigar::SoftClip(1),
3143
            ]),
3144
            CigarString(vec![
3145
                Cigar::HardClip(11),
3146
                Cigar::SoftClip(11),
3147
                Cigar::Equal(11),
3148
                Cigar::SoftClip(11),
3149
                Cigar::HardClip(11),
3150
            ]),
3151
            CigarString(vec![Cigar::HardClip(10)]),
3152
            CigarString(vec![Cigar::SoftClip(10)]),
3153
        ];
3154
        // compare
3155
        for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
3156
            let cigar_parse = CigarString::try_from(cigar_str)
3157
                .unwrap_or_else(|_| panic!("Unable to parse cigar: {}", cigar_str));
3158
            assert_eq!(&cigar_parse, truth);
3159
        }
3160
    }
3161
}
3162

3163
#[cfg(test)]
3164
mod basemod_tests {
3165
    use crate::bam::{Read, Reader};
3166

3167
    #[test]
3168
    pub fn test_count_recorded() {
3169
        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3170

3171
        for r in bam.records() {
3172
            let record = r.unwrap();
3173
            if let Ok(mods) = record.basemods_iter() {
3174
                let n = mods.recorded().len();
3175
                assert_eq!(n, 3);
3176
            };
3177
        }
3178
    }
3179

3180
    #[test]
3181
    pub fn test_query_type() {
3182
        let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
3183

3184
        let mut n_fwd = 0;
3185
        let mut n_rev = 0;
3186

3187
        for r in bam.records() {
3188
            let record = r.unwrap();
3189
            if let Ok(mods) = record.basemods_iter() {
3190
                for mod_code in mods.recorded() {
3191
                    if let Ok(mod_metadata) = mods.query_type(*mod_code) {
3192
                        if mod_metadata.strand == 0 {
3193
                            n_fwd += 1;
3194
                        }
3195
                        if mod_metadata.strand == 1 {
3196
                            n_rev += 1;
3197
                        }
3198
                    }
3199
                }
3200
            };
3201
        }
3202
        assert_eq!(n_fwd, 2);
3203
        assert_eq!(n_rev, 2);
3204
    }
3205

3206
    #[test]
3207
    pub fn test_mod_iter() {
3208
        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3209
        let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3210
        let mut i = 0;
3211

3212
        for r in bam.records() {
3213
            let record = r.unwrap();
3214
            for res in record.basemods_iter().unwrap().flatten() {
3215
                let (position, _m) = res;
3216
                assert_eq!(position, expected_positions[i]);
3217
                i += 1;
3218
            }
3219
        }
3220
    }
3221

3222
    #[test]
3223
    pub fn test_position_iter() {
3224
        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3225
        let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3226
        let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3227
        let mut i = 0;
3228

3229
        for r in bam.records() {
3230
            let record = r.unwrap();
3231
            for res in record.basemods_position_iter().unwrap().flatten() {
3232
                let (position, elements) = res;
3233
                assert_eq!(position, expected_positions[i]);
3234
                assert_eq!(elements.len(), expected_counts[i]);
3235
                i += 1;
3236
            }
3237
        }
3238
    }
3239
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc