• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rust-bio / rust-htslib / 7822478621

07 Feb 2024 11:22PM UTC coverage: 79.683% (-0.1%) from 79.823%
7822478621

Pull #408

github

web-flow
Merge afba33ef8 into 4d146c6e5
Pull Request #408: feat: Add rust_htslib::bcf::index::build

23 of 33 new or added lines in 1 file covered. (69.7%)

1 existing line in 1 file now uncovered.

2463 of 3091 relevant lines covered (79.68%)

2.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.72
/src/bam/record.rs
1
// Copyright 2014 Christopher Schröder, Johannes Köster.
2
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3
// This file may not be copied, modified, or distributed
4
// except according to those terms.
12✔
5

6
use std::convert::TryFrom;
7
use std::convert::TryInto;
8
use std::ffi;
9
use std::fmt;
10
use std::marker::PhantomData;
11
use std::mem::{size_of, MaybeUninit};
12
use std::ops;
13
use std::os::raw::c_char;
14
use std::rc::Rc;
15
use std::slice;
16
use std::str;
17
use std::u32;
18

19
use byteorder::{LittleEndian, ReadBytesExt};
20

21
use crate::bam::Error;
22
use crate::bam::HeaderView;
23
use crate::errors::Result;
24
use crate::htslib;
25
use crate::utils;
26
#[cfg(feature = "serde_feature")]
27
use serde::{self, Deserialize, Serialize};
28

29
use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
30
use bio_types::genome;
31
use bio_types::sequence::SequenceRead;
32
use bio_types::sequence::SequenceReadPairOrientation;
33
use bio_types::strand::ReqStrand;
34

35
/// A macro creating methods for flag access.
36
macro_rules! flag {
37
    ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
2✔
38
        pub fn $get(&self) -> bool {
11✔
39
            self.inner().core.flag & $bit != 0
11✔
40
        }
41

42
        pub fn $set(&mut self) {
3✔
43
            self.inner_mut().core.flag |= $bit;
5✔
44
        }
45

46
        pub fn $unset(&mut self) {
3✔
47
            self.inner_mut().core.flag &= !$bit;
2✔
48
        }
49
    };
50
}
51

52
/// A BAM record.
53
pub struct Record {
54
    pub inner: htslib::bam1_t,
55
    own: bool,
56
    cigar: Option<CigarStringView>,
57
    header: Option<Rc<HeaderView>>,
58
}
59

60
unsafe impl Send for Record {}
61
unsafe impl Sync for Record {}
62

63
impl Clone for Record {
64
    fn clone(&self) -> Self {
1✔
65
        let mut copy = Record::new();
1✔
66
        unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
2✔
67
        copy
1✔
68
    }
69
}
70

71
impl PartialEq for Record {
72
    fn eq(&self, other: &Record) -> bool {
1✔
73
        self.tid() == other.tid()
1✔
74
            && self.pos() == other.pos()
1✔
75
            && self.bin() == other.bin()
1✔
76
            && self.mapq() == other.mapq()
1✔
77
            && self.flags() == other.flags()
1✔
78
            && self.mtid() == other.mtid()
1✔
79
            && self.mpos() == other.mpos()
1✔
80
            && self.insert_size() == other.insert_size()
1✔
81
            && self.data() == other.data()
1✔
82
            && self.inner().core.l_extranul == other.inner().core.l_extranul
1✔
83
    }
84
}
85

86
impl Eq for Record {}
87

88
impl fmt::Debug for Record {
89
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
×
90
        fmt.write_fmt(format_args!(
×
91
            "Record(tid: {}, pos: {})",
92
            self.tid(),
×
93
            self.pos()
×
94
        ))
95
    }
96
}
97

98
impl Default for Record {
99
    fn default() -> Self {
×
100
        Self::new()
×
101
    }
102
}
103

104
#[inline]
105
fn extranul_from_qname(qname: &[u8]) -> usize {
1✔
106
    let qlen = qname.len() + 1;
1✔
107
    if qlen % 4 != 0 {
3✔
108
        4 - qlen % 4
2✔
109
    } else {
110
        0
1✔
111
    }
112
}
113

114
impl Record {
115
    /// Create an empty BAM record.
116
    pub fn new() -> Self {
9✔
117
        Record {
118
            inner: unsafe { MaybeUninit::zeroed().assume_init() },
9✔
119
            own: true,
120
            cigar: None,
121
            header: None,
122
        }
123
    }
124

125
    pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
2✔
126
        Record {
127
            inner: {
128
                #[allow(clippy::uninit_assumed_init)]
129
                let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
130
                unsafe {
131
                    ::libc::memcpy(
132
                        &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
133
                        from as *const ::libc::c_void,
134
                        size_of::<htslib::bam1_t>(),
135
                    );
136
                }
137
                inner
138
            },
139
            own: false,
140
            cigar: None,
141
            header: None,
142
        }
143
    }
144

145
    // Create a BAM record from a line SAM text. SAM slice need not be 0-terminated.
146
    pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
3✔
147
        let mut record = Self::new();
3✔
148

149
        let mut sam_copy = Vec::with_capacity(sam.len() + 1);
3✔
150
        sam_copy.extend(sam);
3✔
151
        sam_copy.push(0);
3✔
152

153
        let mut sam_string = htslib::kstring_t {
154
            s: sam_copy.as_ptr() as *mut c_char,
3✔
155
            l: sam_copy.len() as u64,
3✔
156
            m: sam_copy.len() as u64,
3✔
157
        };
158

159
        let succ = unsafe {
160
            htslib::sam_parse1(
161
                &mut sam_string,
162
                header_view.inner_ptr() as *mut htslib::bam_hdr_t,
3✔
163
                record.inner_ptr_mut(),
3✔
164
            )
165
        };
166

167
        if succ == 0 {
6✔
168
            Ok(record)
3✔
169
        } else {
170
            Err(Error::BamParseSAM {
×
171
                rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
×
172
            })
173
        }
174
    }
175

176
    pub fn set_header(&mut self, header: Rc<HeaderView>) {
6✔
177
        self.header = Some(header);
12✔
178
    }
179

180
    pub(super) fn data(&self) -> &[u8] {
4✔
181
        unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
4✔
182
    }
183

184
    #[inline]
185
    pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
1✔
186
        &mut self.inner
1✔
187
    }
188

189
    #[inline]
190
    pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
7✔
191
        &mut self.inner as *mut htslib::bam1_t
7✔
192
    }
193

194
    #[inline]
195
    pub fn inner(&self) -> &htslib::bam1_t {
6✔
196
        &self.inner
6✔
197
    }
198

199
    #[inline]
200
    pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
3✔
201
        &self.inner as *const htslib::bam1_t
3✔
202
    }
203

204
    /// Get target id.
205
    pub fn tid(&self) -> i32 {
2✔
206
        self.inner().core.tid
2✔
207
    }
208

209
    /// Set target id.
210
    pub fn set_tid(&mut self, tid: i32) {
×
211
        self.inner_mut().core.tid = tid;
×
212
    }
213

214
    /// Get position (0-based).
215
    pub fn pos(&self) -> i64 {
1✔
216
        self.inner().core.pos
1✔
217
    }
218

219
    /// Set position (0-based).
220
    pub fn set_pos(&mut self, pos: i64) {
1✔
221
        self.inner_mut().core.pos = pos;
1✔
222
    }
223

224
    pub fn bin(&self) -> u16 {
1✔
225
        self.inner().core.bin
1✔
226
    }
227

228
    pub fn set_bin(&mut self, bin: u16) {
×
229
        self.inner_mut().core.bin = bin;
×
230
    }
231

232
    /// Get MAPQ.
233
    pub fn mapq(&self) -> u8 {
1✔
234
        self.inner().core.qual
1✔
235
    }
236

237
    /// Set MAPQ.
238
    pub fn set_mapq(&mut self, mapq: u8) {
×
239
        self.inner_mut().core.qual = mapq;
×
240
    }
241

242
    /// Get strand information from record flags.
243
    pub fn strand(&self) -> ReqStrand {
×
244
        let reverse = self.flags() & 0x10 != 0;
×
245
        if reverse {
×
246
            ReqStrand::Reverse
×
247
        } else {
248
            ReqStrand::Forward
×
249
        }
250
    }
251

252
    /// Get raw flags.
253
    pub fn flags(&self) -> u16 {
2✔
254
        self.inner().core.flag
2✔
255
    }
256

257
    /// Set raw flags.
258
    pub fn set_flags(&mut self, flags: u16) {
×
259
        self.inner_mut().core.flag = flags;
×
260
    }
261

262
    /// Unset all flags.
263
    pub fn unset_flags(&mut self) {
×
264
        self.inner_mut().core.flag = 0;
×
265
    }
266

267
    /// Get target id of mate.
268
    pub fn mtid(&self) -> i32 {
1✔
269
        self.inner().core.mtid
1✔
270
    }
271

272
    /// Set target id of mate.
273
    pub fn set_mtid(&mut self, mtid: i32) {
×
274
        self.inner_mut().core.mtid = mtid;
×
275
    }
276

277
    /// Get mate position.
278
    pub fn mpos(&self) -> i64 {
1✔
279
        self.inner().core.mpos
1✔
280
    }
281

282
    /// Set mate position.
283
    pub fn set_mpos(&mut self, mpos: i64) {
×
284
        self.inner_mut().core.mpos = mpos;
×
285
    }
286

287
    /// Get insert size.
288
    pub fn insert_size(&self) -> i64 {
1✔
289
        self.inner().core.isize
1✔
290
    }
291

292
    /// Set insert size.
293
    pub fn set_insert_size(&mut self, insert_size: i64) {
×
294
        self.inner_mut().core.isize = insert_size;
×
295
    }
296

297
    fn qname_capacity(&self) -> usize {
4✔
298
        self.inner().core.l_qname as usize
4✔
299
    }
300

301
    fn qname_len(&self) -> usize {
2✔
302
        // discount all trailing zeros (the default one and extra nulls)
303
        self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
2✔
304
    }
305

306
    /// Get qname (read name). Complexity: O(1).
307
    pub fn qname(&self) -> &[u8] {
2✔
308
        &self.data()[..self.qname_len()]
2✔
309
    }
310

311
    /// Set the variable length data buffer
312
    pub fn set_data(&mut self, new_data: &[u8]) {
1✔
313
        self.cigar = None;
2✔
314

315
        self.inner_mut().l_data = new_data.len() as i32;
1✔
316
        if (self.inner().m_data as i32) < self.inner().l_data {
1✔
317
            // Verbosity due to lexical borrowing
318
            let l_data = self.inner().l_data;
1✔
319
            self.realloc_var_data(l_data as usize);
1✔
320
        }
321

322
        // Copy new data into buffer
323
        let data =
1✔
324
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
325
        utils::copy_memory(new_data, data);
1✔
326
    }
327

328
    /// Set variable length data (qname, cigar, seq, qual).
329
    /// The aux data is left unchanged.
330
    /// `qual` is Phred-scaled quality values, without any offset.
331
    /// NOTE: seq.len() must equal qual.len() or this method
332
    /// will panic. If you don't have quality values use
333
    /// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
334
    /// be recognized as missing QVs by `samtools`.
335
    pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
1✔
336
        assert!(qname.len() < 255);
1✔
337
        assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
1✔
338

339
        self.cigar = None;
1✔
340

341
        let cigar_width = if let Some(cigar_string) = cigar {
2✔
342
            cigar_string.len()
1✔
343
        } else {
344
            0
×
345
        } * 4;
346
        let q_len = qname.len() + 1;
2✔
347
        let extranul = extranul_from_qname(qname);
1✔
348

349
        let orig_aux_offset = self.qname_capacity()
4✔
350
            + 4 * self.cigar_len()
1✔
351
            + (self.seq_len() + 1) / 2
2✔
352
            + self.seq_len();
1✔
353
        let new_aux_offset = q_len + extranul + cigar_width + (seq.len() + 1) / 2 + qual.len();
2✔
354
        assert!(orig_aux_offset <= self.inner.l_data as usize);
1✔
355
        let aux_len = self.inner.l_data as usize - orig_aux_offset;
1✔
356
        self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
2✔
357
        if (self.inner().m_data as i32) < self.inner().l_data {
1✔
358
            // Verbosity due to lexical borrowing
359
            let l_data = self.inner().l_data;
1✔
360
            self.realloc_var_data(l_data as usize);
1✔
361
        }
362

363
        // Copy the aux data.
364
        if aux_len > 0 && orig_aux_offset != new_aux_offset {
2✔
365
            let data =
1✔
366
                unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
367
            data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
1✔
368
        }
369

370
        let data =
1✔
371
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
372

373
        // qname
374
        utils::copy_memory(qname, data);
1✔
375
        for i in 0..=extranul {
4✔
376
            data[qname.len() + i] = b'\0';
2✔
377
        }
378
        let mut i = q_len + extranul;
2✔
379
        self.inner_mut().core.l_qname = i as u16;
1✔
380
        self.inner_mut().core.l_extranul = extranul as u8;
1✔
381

382
        // cigar
383
        if let Some(cigar_string) = cigar {
3✔
384
            let cigar_data = unsafe {
385
                //cigar is always aligned to 4 bytes (see extranul above) - so this is safe
386
                #[allow(clippy::cast_ptr_alignment)]
387
                slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
2✔
388
            };
389
            for (i, c) in cigar_string.iter().enumerate() {
4✔
390
                cigar_data[i] = c.encode();
2✔
391
            }
392
            self.inner_mut().core.n_cigar = cigar_string.len() as u32;
1✔
393
            i += cigar_string.len() * 4;
2✔
394
        } else {
395
            self.inner_mut().core.n_cigar = 0;
×
396
        };
397

398
        // seq
399
        {
400
            for j in (0..seq.len()).step_by(2) {
4✔
401
                data[i + j / 2] = ENCODE_BASE[seq[j] as usize] << 4
4✔
402
                    | (if j + 1 < seq.len() {
4✔
403
                        ENCODE_BASE[seq[j + 1] as usize]
2✔
404
                    } else {
405
                        0
1✔
406
                    });
407
            }
408
            self.inner_mut().core.l_qseq = seq.len() as i32;
1✔
409
            i += (seq.len() + 1) / 2;
2✔
410
        }
411

412
        // qual
413
        utils::copy_memory(qual, &mut data[i..]);
2✔
414
    }
415

416
    /// Replace current qname with a new one.
417
    pub fn set_qname(&mut self, new_qname: &[u8]) {
1✔
418
        // 251 + 1NUL is the max 32-bit aligned value that fits in u8
419
        assert!(new_qname.len() < 252);
1✔
420

421
        let old_q_len = self.qname_capacity();
1✔
422
        // We're going to add a terminal NUL
423
        let extranul = extranul_from_qname(new_qname);
1✔
424
        let new_q_len = new_qname.len() + 1 + extranul;
1✔
425

426
        // Length of data after qname
427
        let other_len = self.inner_mut().l_data - old_q_len as i32;
2✔
428

429
        if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
3✔
430
            self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
2✔
431
        } else if new_q_len > old_q_len {
1✔
432
            self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
1✔
433

434
            // Reallocate if necessary
435
            if (self.inner().m_data as i32) < self.inner().l_data {
1✔
436
                // Verbosity due to lexical borrowing
437
                let l_data = self.inner().l_data;
1✔
438
                self.realloc_var_data(l_data as usize);
1✔
439
            }
440
        }
441

442
        if new_q_len != old_q_len {
1✔
443
            // Move other data to new location
444
            unsafe {
445
                let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
1✔
446

447
                ::libc::memmove(
448
                    data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
449
                    data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
450
                    other_len as usize,
1✔
451
                );
452
            }
453
        }
454

455
        // Copy qname data
456
        let data =
1✔
457
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
458
        utils::copy_memory(new_qname, data);
1✔
459
        for i in 0..=extranul {
4✔
460
            data[new_q_len - i - 1] = b'\0';
1✔
461
        }
462
        self.inner_mut().core.l_qname = new_q_len as u16;
1✔
463
        self.inner_mut().core.l_extranul = extranul as u8;
1✔
464
    }
465

466
    fn realloc_var_data(&mut self, new_len: usize) {
1✔
467
        // pad request
468
        let new_len = new_len as u32;
1✔
469
        let new_request = new_len + 32 - (new_len % 32);
1✔
470

471
        let ptr = unsafe {
472
            ::libc::realloc(
1✔
473
                self.inner().data as *mut ::libc::c_void,
1✔
474
                new_request as usize,
1✔
475
            ) as *mut u8
476
        };
477

478
        if ptr.is_null() {
1✔
479
            panic!("ran out of memory in rust_htslib trying to realloc");
×
480
        }
481

482
        // don't update m_data until we know we have
483
        // a successful allocation.
484
        self.inner_mut().m_data = new_request;
1✔
485
        self.inner_mut().data = ptr;
1✔
486

487
        // we now own inner.data
488
        self.own = true;
1✔
489
    }
490

491
    pub fn cigar_len(&self) -> usize {
3✔
492
        self.inner().core.n_cigar as usize
3✔
493
    }
494

495
    /// Get reference to raw cigar string representation (as stored in BAM file).
496
    /// Usually, the method `Record::cigar` should be used instead.
497
    pub fn raw_cigar(&self) -> &[u32] {
1✔
498
        //cigar is always aligned to 4 bytes - so this is safe
499
        #[allow(clippy::cast_ptr_alignment)]
500
        unsafe {
501
            slice::from_raw_parts(
502
                self.data()[self.qname_capacity()..].as_ptr() as *const u32,
1✔
503
                self.cigar_len(),
1✔
504
            )
505
        }
506
    }
507

508
    /// Return unpacked cigar string. This will create a fresh copy the Cigar data.
509
    pub fn cigar(&self) -> CigarStringView {
1✔
510
        match self.cigar {
1✔
511
            Some(ref c) => c.clone(),
×
512
            None => self.unpack_cigar(),
1✔
513
        }
514
    }
515

516
    // Return unpacked cigar string. This returns None unless you have first called `bam::Record::cache_cigar`.
517
    pub fn cigar_cached(&self) -> Option<&CigarStringView> {
×
518
        self.cigar.as_ref()
×
519
    }
520

521
    /// Decode the cigar string and cache it inside the `Record`
522
    pub fn cache_cigar(&mut self) {
×
523
        self.cigar = Some(self.unpack_cigar())
×
524
    }
525

526
    /// Unpack cigar string. Complexity: O(k) with k being the length of the cigar string.
527
    fn unpack_cigar(&self) -> CigarStringView {
1✔
528
        CigarString(
529
            self.raw_cigar()
1✔
530
                .iter()
531
                .map(|&c| {
2✔
532
                    let len = c >> 4;
1✔
533
                    match c & 0b1111 {
1✔
534
                        0 => Cigar::Match(len),
1✔
535
                        1 => Cigar::Ins(len),
1✔
536
                        2 => Cigar::Del(len),
1✔
537
                        3 => Cigar::RefSkip(len),
1✔
538
                        4 => Cigar::SoftClip(len),
1✔
539
                        5 => Cigar::HardClip(len),
1✔
540
                        6 => Cigar::Pad(len),
×
541
                        7 => Cigar::Equal(len),
1✔
542
                        8 => Cigar::Diff(len),
1✔
543
                        _ => panic!("Unexpected cigar operation"),
×
544
                    }
545
                })
546
                .collect(),
547
        )
548
        .into_view(self.pos())
2✔
549
    }
550

551
    pub fn seq_len(&self) -> usize {
3✔
552
        self.inner().core.l_qseq as usize
3✔
553
    }
554

555
    fn seq_data(&self) -> &[u8] {
3✔
556
        let offset = self.qname_capacity() + self.cigar_len() * 4;
3✔
557
        &self.data()[offset..][..(self.seq_len() + 1) / 2]
9✔
558
    }
559

560
    /// Get read sequence. Complexity: O(1).
561
    pub fn seq(&self) -> Seq<'_> {
3✔
562
        Seq {
563
            encoded: self.seq_data(),
3✔
564
            len: self.seq_len(),
3✔
565
        }
566
    }
567

568
    /// Get base qualities (PHRED-scaled probability that base is wrong).
569
    /// This does not entail any offsets, hence the qualities can be used directly without
570
    /// e.g. subtracting 33. Complexity: O(1).
571
    pub fn qual(&self) -> &[u8] {
1✔
572
        &self.data()[self.qname_capacity() + self.cigar_len() * 4 + (self.seq_len() + 1) / 2..]
1✔
573
            [..self.seq_len()]
1✔
574
    }
575

576
    /// Look up an auxiliary field by its tag.
577
    ///
578
    /// Only the first two bytes of a given tag are used for the look-up of a field.
579
    /// See [`Aux`] for more details.
580
    pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
3✔
581
        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
3✔
582
        let aux = unsafe {
583
            htslib::bam_aux_get(
584
                &self.inner as *const htslib::bam1_t,
3✔
585
                c_str.as_ptr() as *mut c_char,
6✔
586
            )
587
        };
588
        unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
9✔
589
    }
590

591
    unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
3✔
592
        const TAG_LEN: isize = 2;
593
        // Used for skipping type identifier
594
        const TYPE_ID_LEN: isize = 1;
595

596
        if aux.is_null() {
3✔
597
            return Err(Error::BamAuxTagNotFound);
3✔
598
        }
599

600
        let (data, type_size) = match *aux {
6✔
601
            b'A' => {
×
602
                let type_size = size_of::<u8>();
×
603
                (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
×
604
            }
605
            b'c' => {
×
606
                let type_size = size_of::<i8>();
1✔
607
                (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
1✔
608
            }
609
            b'C' => {
×
610
                let type_size = size_of::<u8>();
1✔
611
                (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
1✔
612
            }
613
            b's' => {
×
614
                let type_size = size_of::<i16>();
1✔
615
                (
616
                    Aux::I16(
1✔
617
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
618
                            .read_i16::<LittleEndian>()
×
619
                            .map_err(|_| Error::BamAuxParsingError)?,
×
620
                    ),
621
                    type_size,
×
622
                )
623
            }
624
            b'S' => {
×
625
                let type_size = size_of::<u16>();
1✔
626
                (
627
                    Aux::U16(
1✔
628
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
629
                            .read_u16::<LittleEndian>()
×
630
                            .map_err(|_| Error::BamAuxParsingError)?,
×
631
                    ),
632
                    type_size,
×
633
                )
634
            }
635
            b'i' => {
×
636
                let type_size = size_of::<i32>();
2✔
637
                (
638
                    Aux::I32(
2✔
639
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
4✔
640
                            .read_i32::<LittleEndian>()
×
641
                            .map_err(|_| Error::BamAuxParsingError)?,
×
642
                    ),
643
                    type_size,
×
644
                )
645
            }
646
            b'I' => {
×
647
                let type_size = size_of::<u32>();
1✔
648
                (
649
                    Aux::U32(
1✔
650
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
651
                            .read_u32::<LittleEndian>()
×
652
                            .map_err(|_| Error::BamAuxParsingError)?,
×
653
                    ),
654
                    type_size,
×
655
                )
656
            }
657
            b'f' => {
×
658
                let type_size = size_of::<f32>();
1✔
659
                (
660
                    Aux::Float(
1✔
661
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
662
                            .read_f32::<LittleEndian>()
×
663
                            .map_err(|_| Error::BamAuxParsingError)?,
×
664
                    ),
665
                    type_size,
×
666
                )
667
            }
668
            b'd' => {
×
669
                let type_size = size_of::<f64>();
1✔
670
                (
671
                    Aux::Double(
1✔
672
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
673
                            .read_f64::<LittleEndian>()
×
674
                            .map_err(|_| Error::BamAuxParsingError)?,
×
675
                    ),
676
                    type_size,
×
677
                )
678
            }
679
            b'Z' | b'H' => {
×
680
                let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
1✔
681
                let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
2✔
682
                (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
1✔
683
            }
684
            b'B' => {
×
685
                const ARRAY_INNER_TYPE_LEN: isize = 1;
686
                const ARRAY_COUNT_LEN: isize = 4;
687

688
                // Used for skipping metadata
689
                let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
6✔
690

691
                let length =
6✔
692
                    slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
×
693
                        .read_u32::<LittleEndian>()
×
694
                        .map_err(|_| Error::BamAuxParsingError)? as usize;
×
695

696
                // Return tuples of an `Aux` enum and the length of data + metadata in bytes
697
                let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
6✔
698
                    b'c' => (
1✔
699
                        Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
1✔
700
                            aux.offset(array_data_offset),
×
701
                            length,
×
702
                        ))),
703
                        length,
×
704
                    ),
705
                    b'C' => (
1✔
706
                        Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
1✔
707
                            aux.offset(array_data_offset),
×
708
                            length,
×
709
                        ))),
710
                        length,
×
711
                    ),
712
                    b's' => (
1✔
713
                        Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
1✔
714
                            aux.offset(array_data_offset),
×
715
                            length * size_of::<i16>(),
1✔
716
                        ))),
717
                        length * std::mem::size_of::<i16>(),
1✔
718
                    ),
719
                    b'S' => (
1✔
720
                        Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
1✔
721
                            aux.offset(array_data_offset),
×
722
                            length * size_of::<u16>(),
1✔
723
                        ))),
724
                        length * std::mem::size_of::<u16>(),
1✔
725
                    ),
726
                    b'i' => (
1✔
727
                        Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
1✔
728
                            aux.offset(array_data_offset),
×
729
                            length * size_of::<i32>(),
1✔
730
                        ))),
731
                        length * std::mem::size_of::<i32>(),
1✔
732
                    ),
733
                    b'I' => (
1✔
734
                        Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
1✔
735
                            aux.offset(array_data_offset),
×
736
                            length * size_of::<u32>(),
1✔
737
                        ))),
738
                        length * std::mem::size_of::<u32>(),
1✔
739
                    ),
740
                    b'f' => (
3✔
741
                        Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
3✔
742
                            aux.offset(array_data_offset),
×
743
                            length * size_of::<f32>(),
3✔
744
                        ))),
745
                        length * std::mem::size_of::<f32>(),
3✔
746
                    ),
747
                    _ => {
×
748
                        return Err(Error::BamAuxUnknownType);
×
749
                    }
750
                };
751
                (
752
                    array_data,
×
753
                    // Offset: array-specific metadata + array size
754
                    ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
6✔
755
                )
756
            }
757
            _ => {
×
758
                return Err(Error::BamAuxUnknownType);
×
759
            }
760
        };
761

762
        // Offset: metadata + type size
763
        Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
6✔
764
    }
765

766
    /// Returns an iterator over the auxiliary fields of the record.
767
    ///
768
    /// When an error occurs, the `Err` variant will be returned
769
    /// and the iterator will not be able to advance anymore.
770
    pub fn aux_iter(&self) -> AuxIter {
1✔
771
        AuxIter {
772
            // In order to get to the aux data section of a `bam::Record`
773
            // we need to skip fields in front of it
774
            aux: &self.data()[
2✔
775
                // NUL terminated read name:
776
                self.qname_capacity()
777
                // CIGAR (uint32_t):
778
                + self.cigar_len() * std::mem::size_of::<u32>()
779
                // Read sequence (4-bit encoded):
780
                + (self.seq_len() + 1) / 2
781
                // Base qualities (char):
782
                + self.seq_len()..],
783
        }
784
    }
785

786
    /// Add auxiliary data.
787
    pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
3✔
788
        // Don't allow pushing aux data when the given tag is already present in the record.
789
        // `htslib` seems to allow this (for non-array values), which can lead to problems
790
        // since retrieving aux fields consumes &[u8; 2] and yields one field only.
791
        if self.aux(tag).is_ok() {
4✔
792
            return Err(Error::BamAuxTagAlreadyPresent);
1✔
793
        }
794

795
        let ctag = tag.as_ptr() as *mut c_char;
3✔
796
        let ret = unsafe {
797
            match value {
3✔
798
                Aux::Char(v) => htslib::bam_aux_append(
799
                    self.inner_ptr_mut(),
×
800
                    ctag,
801
                    b'A' as c_char,
802
                    size_of::<u8>() as i32,
×
803
                    [v].as_mut_ptr() as *mut u8,
×
804
                ),
805
                Aux::I8(v) => htslib::bam_aux_append(
806
                    self.inner_ptr_mut(),
1✔
807
                    ctag,
808
                    b'c' as c_char,
809
                    size_of::<i8>() as i32,
1✔
810
                    [v].as_mut_ptr() as *mut u8,
1✔
811
                ),
812
                Aux::U8(v) => htslib::bam_aux_append(
813
                    self.inner_ptr_mut(),
1✔
814
                    ctag,
815
                    b'C' as c_char,
816
                    size_of::<u8>() as i32,
1✔
817
                    [v].as_mut_ptr() as *mut u8,
1✔
818
                ),
819
                Aux::I16(v) => htslib::bam_aux_append(
820
                    self.inner_ptr_mut(),
1✔
821
                    ctag,
822
                    b's' as c_char,
823
                    size_of::<i16>() as i32,
1✔
824
                    [v].as_mut_ptr() as *mut u8,
1✔
825
                ),
826
                Aux::U16(v) => htslib::bam_aux_append(
827
                    self.inner_ptr_mut(),
1✔
828
                    ctag,
829
                    b'S' as c_char,
830
                    size_of::<u16>() as i32,
1✔
831
                    [v].as_mut_ptr() as *mut u8,
1✔
832
                ),
833
                Aux::I32(v) => htslib::bam_aux_append(
834
                    self.inner_ptr_mut(),
2✔
835
                    ctag,
836
                    b'i' as c_char,
837
                    size_of::<i32>() as i32,
2✔
838
                    [v].as_mut_ptr() as *mut u8,
2✔
839
                ),
840
                Aux::U32(v) => htslib::bam_aux_append(
841
                    self.inner_ptr_mut(),
1✔
842
                    ctag,
843
                    b'I' as c_char,
844
                    size_of::<u32>() as i32,
1✔
845
                    [v].as_mut_ptr() as *mut u8,
1✔
846
                ),
847
                Aux::Float(v) => htslib::bam_aux_append(
848
                    self.inner_ptr_mut(),
1✔
849
                    ctag,
850
                    b'f' as c_char,
851
                    size_of::<f32>() as i32,
1✔
852
                    [v].as_mut_ptr() as *mut u8,
1✔
853
                ),
854
                // Not part of specs but implemented in `htslib`:
855
                Aux::Double(v) => htslib::bam_aux_append(
856
                    self.inner_ptr_mut(),
1✔
857
                    ctag,
858
                    b'd' as c_char,
859
                    size_of::<f64>() as i32,
1✔
860
                    [v].as_mut_ptr() as *mut u8,
1✔
861
                ),
862
                Aux::String(v) => {
1✔
863
                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
2✔
864
                    htslib::bam_aux_append(
865
                        self.inner_ptr_mut(),
1✔
866
                        ctag,
867
                        b'Z' as c_char,
868
                        (v.len() + 1) as i32,
1✔
869
                        c_str.as_ptr() as *mut u8,
2✔
870
                    )
871
                }
872
                Aux::HexByteArray(v) => {
×
873
                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
×
874
                    htslib::bam_aux_append(
875
                        self.inner_ptr_mut(),
×
876
                        ctag,
877
                        b'H' as c_char,
878
                        (v.len() + 1) as i32,
×
879
                        c_str.as_ptr() as *mut u8,
×
880
                    )
881
                }
882
                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
883
                Aux::ArrayI8(aux_array) => match aux_array {
1✔
884
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
885
                        self.inner_ptr_mut(),
1✔
886
                        ctag,
887
                        b'c',
888
                        inner.len() as u32,
1✔
889
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
890
                    ),
891
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
892
                        self.inner_ptr_mut(),
1✔
893
                        ctag,
894
                        b'c',
895
                        inner.len() as u32,
1✔
896
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
897
                    ),
898
                },
899
                Aux::ArrayU8(aux_array) => match aux_array {
1✔
900
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
901
                        self.inner_ptr_mut(),
1✔
902
                        ctag,
903
                        b'C',
904
                        inner.len() as u32,
1✔
905
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
906
                    ),
907
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
908
                        self.inner_ptr_mut(),
1✔
909
                        ctag,
910
                        b'C',
911
                        inner.len() as u32,
1✔
912
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
913
                    ),
914
                },
915
                Aux::ArrayI16(aux_array) => match aux_array {
1✔
916
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
917
                        self.inner_ptr_mut(),
1✔
918
                        ctag,
919
                        b's',
920
                        inner.len() as u32,
1✔
921
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
922
                    ),
923
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
924
                        self.inner_ptr_mut(),
1✔
925
                        ctag,
926
                        b's',
927
                        inner.len() as u32,
1✔
928
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
929
                    ),
930
                },
931
                Aux::ArrayU16(aux_array) => match aux_array {
1✔
932
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
933
                        self.inner_ptr_mut(),
1✔
934
                        ctag,
935
                        b'S',
936
                        inner.len() as u32,
1✔
937
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
938
                    ),
939
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
940
                        self.inner_ptr_mut(),
1✔
941
                        ctag,
942
                        b'S',
943
                        inner.len() as u32,
1✔
944
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
945
                    ),
946
                },
947
                Aux::ArrayI32(aux_array) => match aux_array {
1✔
948
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
949
                        self.inner_ptr_mut(),
1✔
950
                        ctag,
951
                        b'i',
952
                        inner.len() as u32,
1✔
953
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
954
                    ),
955
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
956
                        self.inner_ptr_mut(),
1✔
957
                        ctag,
958
                        b'i',
959
                        inner.len() as u32,
1✔
960
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
961
                    ),
962
                },
963
                Aux::ArrayU32(aux_array) => match aux_array {
1✔
964
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
965
                        self.inner_ptr_mut(),
1✔
966
                        ctag,
967
                        b'I',
968
                        inner.len() as u32,
1✔
969
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
970
                    ),
971
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
972
                        self.inner_ptr_mut(),
1✔
973
                        ctag,
974
                        b'I',
975
                        inner.len() as u32,
1✔
976
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
977
                    ),
978
                },
979
                Aux::ArrayFloat(aux_array) => match aux_array {
3✔
980
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
981
                        self.inner_ptr_mut(),
3✔
982
                        ctag,
983
                        b'f',
984
                        inner.len() as u32,
3✔
985
                        inner.slice.as_ptr() as *mut ::libc::c_void,
3✔
986
                    ),
987
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
988
                        self.inner_ptr_mut(),
1✔
989
                        ctag,
990
                        b'f',
991
                        inner.len() as u32,
1✔
992
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
993
                    ),
994
                },
995
            }
996
        };
997

998
        if ret < 0 {
6✔
999
            Err(Error::BamAux)
×
1000
        } else {
1001
            Ok(())
3✔
1002
        }
1003
    }
1004

1005
    // Delete auxiliary tag.
1006
    pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1✔
1007
        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1✔
1008
        let aux = unsafe {
1009
            htslib::bam_aux_get(
1010
                &self.inner as *const htslib::bam1_t,
1✔
1011
                c_str.as_ptr() as *mut c_char,
2✔
1012
            )
1013
        };
1014
        unsafe {
1015
            if aux.is_null() {
2✔
1016
                Err(Error::BamAuxTagNotFound)
1✔
1017
            } else {
1018
                htslib::bam_aux_del(self.inner_ptr_mut(), aux);
2✔
1019
                Ok(())
1✔
1020
            }
1021
        }
1022
    }
1023

1024
    /// Access the base modifications associated with this Record through the MM tag.
1025
    /// Example:
1026
    /// ```
1027
    ///    use rust_htslib::bam::{Read, Reader, Record};
1028
    ///    let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
1029
    ///    let mut mod_count = 0;
1030
    ///    for r in bam.records() {
1031
    ///        let record = r.unwrap();
1032
    ///        if let Ok(mods) = record.basemods_iter() {
1033
    ///            // print metadata for the modifications present in this record
1034
    ///            for mod_code in mods.recorded() {
1035
    ///                if let Ok(mod_metadata) = mods.query_type(*mod_code) {
1036
    ///                    println!("mod found with code {}/{} flags: [{} {} {}]",
1037
    ///                              mod_code, *mod_code as u8 as char,
1038
    ///                              mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char);
1039
    ///                }
1040
    ///            }
1041
    ///
1042
    ///            // iterate over the modifications in this record
1043
    ///            // the modifications are returned as a tuple with the
1044
    ///            // position within SEQ and an hts_base_mod struct
1045
    ///            for res in mods {
1046
    ///                if let Ok( (position, m) ) = res {
1047
    ///                    println!("{} {},{}", position, m.modified_base as u8 as char, m.qual);
1048
    ///                    mod_count += 1;
1049
    ///                }
1050
    ///            }
1051
    ///        };
1052
    ///    }
1053
    ///    assert_eq!(mod_count, 14);
1054
    /// ```
1055
    pub fn basemods_iter(&self) -> Result<BaseModificationsIter> {
2✔
1056
        BaseModificationsIter::new(self)
2✔
1057
    }
1058

1059
    /// An iterator that returns all of the modifications for each position as a vector.
1060
    /// This is useful for the case where multiple possible modifications can be annotated
1061
    /// at a single position (for example a C could be 5-mC or 5-hmC)
1062
    pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter> {
1✔
1063
        BaseModificationsPositionIter::new(self)
1✔
1064
    }
1065

1066
    /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record
1067
    /// is not paired, mates are not mapping to the same contig, or mates start at the
1068
    /// same position.
1069
    pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1✔
1070
        if self.is_paired()
1✔
1071
            && !self.is_unmapped()
1✔
1072
            && !self.is_mate_unmapped()
1✔
1073
            && self.tid() == self.mtid()
1✔
1074
        {
1075
            if self.pos() == self.mpos() {
1✔
1076
                // both reads start at the same position, we cannot decide on the orientation.
1077
                return SequenceReadPairOrientation::None;
×
1078
            }
1079

1080
            let (is_reverse, is_first_in_template, is_mate_reverse) = if self.pos() < self.mpos() {
3✔
1081
                // given record is the left one
1082
                (
1083
                    self.is_reverse(),
1✔
1084
                    self.is_first_in_template(),
1✔
1085
                    self.is_mate_reverse(),
1✔
1086
                )
1087
            } else {
1088
                // given record is the right one
1089
                (
1090
                    self.is_mate_reverse(),
1✔
1091
                    self.is_last_in_template(),
1✔
1092
                    self.is_reverse(),
1✔
1093
                )
1094
            };
1095
            match (is_reverse, is_first_in_template, is_mate_reverse) {
1✔
1096
                (false, false, false) => SequenceReadPairOrientation::F2F1,
×
1097
                (false, false, true) => SequenceReadPairOrientation::F2R1,
×
1098
                (false, true, false) => SequenceReadPairOrientation::F1F2,
×
1099
                (true, false, false) => SequenceReadPairOrientation::R2F1,
×
1100
                (false, true, true) => SequenceReadPairOrientation::F1R2,
1✔
1101
                (true, false, true) => SequenceReadPairOrientation::R2R1,
×
1102
                (true, true, false) => SequenceReadPairOrientation::R1F2,
×
1103
                (true, true, true) => SequenceReadPairOrientation::R1R2,
×
1104
            }
1105
        } else {
1106
            SequenceReadPairOrientation::None
×
1107
        }
1108
    }
1109

1110
    flag!(is_paired, set_paired, unset_paired, 1u16);
1111
    flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1112
    flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1113
    flag!(
1114
        is_mate_unmapped,
1115
        set_mate_unmapped,
1116
        unset_mate_unmapped,
1117
        8u16
1118
    );
1119
    flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1120
    flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1121
    flag!(
1122
        is_first_in_template,
1123
        set_first_in_template,
1124
        unset_first_in_template,
1125
        64u16
1126
    );
1127
    flag!(
1128
        is_last_in_template,
1129
        set_last_in_template,
1130
        unset_last_in_template,
1131
        128u16
1132
    );
1133
    flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1134
    flag!(
1135
        is_quality_check_failed,
1136
        set_quality_check_failed,
1137
        unset_quality_check_failed,
1138
        512u16
1139
    );
1140
    flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1141
    flag!(
1142
        is_supplementary,
1143
        set_supplementary,
1144
        unset_supplementary,
1145
        2048u16
1146
    );
1147
}
1148

1149
impl Drop for Record {
1150
    fn drop(&mut self) {
10✔
1151
        if self.own {
10✔
1152
            unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
9✔
1153
        }
1154
    }
1155
}
1156

1157
impl SequenceRead for Record {
1158
    fn name(&self) -> &[u8] {
×
1159
        self.qname()
×
1160
    }
1161

1162
    fn base(&self, i: usize) -> u8 {
×
1163
        *decode_base_unchecked(encoded_base(self.seq_data(), i))
×
1164
    }
1165

1166
    fn base_qual(&self, i: usize) -> u8 {
×
1167
        self.qual()[i]
×
1168
    }
1169

1170
    fn len(&self) -> usize {
×
1171
        self.seq_len()
×
1172
    }
1173

1174
    fn is_empty(&self) -> bool {
×
1175
        self.len() == 0
×
1176
    }
1177
}
1178

1179
impl genome::AbstractInterval for Record {
1180
    /// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
1181
    fn contig(&self) -> &str {
×
1182
        let tid = self.tid();
×
1183
        if tid < 0 {
×
1184
            panic!("invalid tid, must be at least zero");
×
1185
        }
1186
        str::from_utf8(
1187
            self.header
×
1188
                .as_ref()
1189
                .expect(
1190
                    "header must be set (this is the case if the record has been read from a file)",
1191
                )
1192
                .tid2name(tid as u32),
1193
        )
1194
        .expect("unable to interpret contig name as UTF-8")
1195
    }
1196

1197
    /// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
1198
    fn range(&self) -> ops::Range<genome::Position> {
×
1199
        let end_pos = self
×
1200
            .cigar_cached()
1201
            .expect("cigar has not been cached yet, call cache_cigar() first")
1202
            .end_pos() as u64;
1203

1204
        if self.pos() < 0 {
×
1205
            panic!("invalid position, must be positive")
×
1206
        }
1207

1208
        self.pos() as u64..end_pos
×
1209
    }
1210
}
1211

1212
/// Auxiliary record data
1213
///
1214
/// The specification allows a wide range of types to be stored as an auxiliary data field of a BAM record.
1215
///
1216
/// Please note that the [`Aux::Double`] variant is _not_ part of the specification, but it is supported by `htslib`.
1217
///
1218
/// # Examples
1219
///
1220
/// ```
1221
/// use rust_htslib::{
1222
///     bam,
1223
///     bam::record::{Aux, AuxArray},
1224
///     errors::Error,
1225
/// };
1226
///
1227
/// //Set up BAM record
1228
/// let bam_header = bam::Header::new();
1229
/// let mut record = bam::Record::from_sam(
1230
///     &mut bam::HeaderView::from_header(&bam_header),
1231
///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1232
/// )
1233
/// .unwrap();
1234
///
1235
/// // Add an integer field
1236
/// let aux_integer_field = Aux::I32(1234);
1237
/// record.push_aux(b"XI", aux_integer_field).unwrap();
1238
///
1239
/// match record.aux(b"XI") {
1240
///     Ok(value) => {
1241
///         // Typically, callers expect an aux field to be of a certain type.
1242
///         // If that's not the case, the value can be `match`ed exhaustively.
1243
///         if let Aux::I32(v) = value {
1244
///             assert_eq!(v, 1234);
1245
///         }
1246
///     }
1247
///     Err(e) => {
1248
///         panic!("Error reading aux field: {}", e);
1249
///     }
1250
/// }
1251
///
1252
/// // Add an array field
1253
/// let array_like_data = vec![0.4, 0.3, 0.2, 0.1];
1254
/// let slice_of_data = &array_like_data;
1255
/// let aux_array: AuxArray<f32> = slice_of_data.into();
1256
/// let aux_array_field = Aux::ArrayFloat(aux_array);
1257
/// record.push_aux(b"XA", aux_array_field).unwrap();
1258
///
1259
/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1260
///     let read_array = array.iter().collect::<Vec<_>>();
1261
///     assert_eq!(read_array, array_like_data);
1262
/// } else {
1263
///     panic!("Could not read array data");
1264
/// }
1265
/// ```
1266
#[derive(Debug, PartialEq)]
1267
pub enum Aux<'a> {
1268
    Char(u8),
1269
    I8(i8),
1270
    U8(u8),
1271
    I16(i16),
1272
    U16(u16),
1273
    I32(i32),
1274
    U32(u32),
1275
    Float(f32),
1276
    Double(f64), // Not part of specs but implemented in `htslib`
1277
    String(&'a str),
1278
    HexByteArray(&'a str),
1279
    ArrayI8(AuxArray<'a, i8>),
1280
    ArrayU8(AuxArray<'a, u8>),
1281
    ArrayI16(AuxArray<'a, i16>),
1282
    ArrayU16(AuxArray<'a, u16>),
1283
    ArrayI32(AuxArray<'a, i32>),
1284
    ArrayU32(AuxArray<'a, u32>),
1285
    ArrayFloat(AuxArray<'a, f32>),
1286
}
1287

1288
unsafe impl<'a> Send for Aux<'a> {}
1289
unsafe impl<'a> Sync for Aux<'a> {}
1290

1291
/// Types that can be used in aux arrays.
1292
pub trait AuxArrayElement: Copy {
1293
    fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1294
}
1295

1296
impl AuxArrayElement for i8 {
1297
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1298
        std::io::Cursor::new(bytes).read_i8().ok()
1✔
1299
    }
1300
}
1301
impl AuxArrayElement for u8 {
1302
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1303
        std::io::Cursor::new(bytes).read_u8().ok()
1✔
1304
    }
1305
}
1306
impl AuxArrayElement for i16 {
1307
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1308
        std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1✔
1309
    }
1310
}
1311
impl AuxArrayElement for u16 {
1312
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1313
        std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1✔
1314
    }
1315
}
1316
impl AuxArrayElement for i32 {
1317
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1318
        std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1✔
1319
    }
1320
}
1321
impl AuxArrayElement for u32 {
1322
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1323
        std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1✔
1324
    }
1325
}
1326
impl AuxArrayElement for f32 {
1327
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
3✔
1328
        std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
3✔
1329
    }
1330
}
1331

1332
/// Provides access to aux arrays.
1333
///
1334
/// Provides methods to either retrieve single elements or an iterator over the
1335
/// array.
1336
///
1337
/// This type is used for wrapping both, array data that was read from a
1338
/// BAM record and slices of data that are going to be stored in one.
1339
///
1340
/// In order to be able to add an `AuxArray` field to a BAM record, `AuxArray`s
1341
/// can be constructed via the `From` trait which is implemented for all
1342
/// supported types (see [`AuxArrayElement`] for a list).
1343
///
1344
/// # Examples
1345
///
1346
/// ```
1347
/// use rust_htslib::{
1348
///     bam,
1349
///     bam::record::{Aux, AuxArray},
1350
/// };
1351
///
1352
/// //Set up BAM record
1353
/// let bam_header = bam::Header::new();
1354
/// let mut record = bam::Record::from_sam(
1355
///     &mut bam::HeaderView::from_header(&bam_header),
1356
///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1357
/// ).unwrap();
1358
///
1359
/// let data = vec![0.4, 0.3, 0.2, 0.1];
1360
/// let slice_of_data = &data;
1361
/// let aux_array: AuxArray<f32> = slice_of_data.into();
1362
/// let aux_field = Aux::ArrayFloat(aux_array);
1363
/// record.push_aux(b"XA", aux_field);
1364
///
1365
/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1366
///     // Retrieve the second element from the array
1367
///     assert_eq!(array.get(1).unwrap(), 0.3);
1368
///     // Iterate over the array and collect it into a `Vec`
1369
///     let read_array = array.iter().collect::<Vec<_>>();
1370
///     assert_eq!(read_array, data);
1371
/// } else {
1372
///     panic!("Could not read array data");
1373
/// }
1374
/// ```
1375
#[derive(Debug)]
1376
pub enum AuxArray<'a, T> {
1377
    TargetType(AuxArrayTargetType<'a, T>),
1378
    RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1379
}
1380

1381
impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1382
where
1383
    T: AuxArrayElement + PartialEq,
1384
{
1385
    fn eq(&self, other: &AuxArray<'_, T>) -> bool {
7✔
1386
        use AuxArray::*;
1387
        match (self, other) {
7✔
1388
            (TargetType(v), TargetType(v_other)) => v == v_other,
1✔
1389
            (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1✔
1390
            (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
×
1391
            (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
7✔
1392
        }
1393
    }
1394
}
1395

1396
/// Create AuxArrays from slices of allowed target types.
1397
impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1398
where
1399
    I: AuxArrayElement,
1400
    T: AsRef<[I]> + ?Sized,
1401
{
1402
    fn from(src: &'a T) -> Self {
9✔
1403
        AuxArray::TargetType(AuxArrayTargetType {
9✔
1404
            slice: src.as_ref(),
9✔
1405
        })
1406
    }
1407
}
1408

1409
impl<'a, T> AuxArray<'a, T>
1410
where
1411
    T: AuxArrayElement,
1412
{
1413
    /// Returns the element at a position or None if out of bounds.
1414
    pub fn get(&self, index: usize) -> Option<T> {
9✔
1415
        match self {
9✔
1416
            AuxArray::TargetType(v) => v.get(index),
7✔
1417
            AuxArray::RawLeBytes(v) => v.get(index),
9✔
1418
        }
1419
    }
1420

1421
    /// Returns the number of elements in the array.
1422
    pub fn len(&self) -> usize {
9✔
1423
        match self {
9✔
1424
            AuxArray::TargetType(a) => a.len(),
×
1425
            AuxArray::RawLeBytes(a) => a.len(),
9✔
1426
        }
1427
    }
1428

1429
    /// Returns true if the array contains no elements.
1430
    pub fn is_empty(&self) -> bool {
×
1431
        self.len() == 0
×
1432
    }
1433

1434
    /// Returns an iterator over the array.
1435
    pub fn iter(&self) -> AuxArrayIter<T> {
9✔
1436
        AuxArrayIter {
1437
            index: 0,
1438
            array: self,
1439
        }
1440
    }
1441

1442
    /// Create AuxArrays from raw byte slices borrowed from `bam::Record`.
1443
    fn from_bytes(bytes: &'a [u8]) -> Self {
9✔
1444
        Self::RawLeBytes(AuxArrayRawLeBytes {
9✔
1445
            slice: bytes,
×
1446
            phantom_data: PhantomData,
×
1447
        })
1448
    }
1449
}
1450

1451
/// Encapsulates slice of target type.
1452
#[doc(hidden)]
1453
#[derive(Debug, PartialEq)]
1454
pub struct AuxArrayTargetType<'a, T> {
1455
    slice: &'a [T],
1456
}
1457

1458
impl<'a, T> AuxArrayTargetType<'a, T>
1459
where
1460
    T: AuxArrayElement,
1461
{
1462
    fn get(&self, index: usize) -> Option<T> {
7✔
1463
        self.slice.get(index).copied()
7✔
1464
    }
1465

1466
    fn len(&self) -> usize {
9✔
1467
        self.slice.len()
9✔
1468
    }
1469
}
1470

1471
/// Encapsulates slice of raw bytes to prevent it from being accidentally accessed.
1472
#[doc(hidden)]
1473
#[derive(Debug, PartialEq)]
1474
pub struct AuxArrayRawLeBytes<'a, T> {
1475
    slice: &'a [u8],
1476
    phantom_data: PhantomData<T>,
1477
}
1478

1479
impl<'a, T> AuxArrayRawLeBytes<'a, T>
1480
where
1481
    T: AuxArrayElement,
1482
{
1483
    fn get(&self, index: usize) -> Option<T> {
9✔
1484
        let type_size = std::mem::size_of::<T>();
9✔
1485
        if index * type_size + type_size > self.slice.len() {
9✔
1486
            return None;
9✔
1487
        }
1488
        T::from_le_bytes(&self.slice[index * type_size..][..type_size])
36✔
1489
    }
1490

1491
    fn len(&self) -> usize {
9✔
1492
        self.slice.len() / std::mem::size_of::<T>()
9✔
1493
    }
1494
}
1495

1496
/// Aux array iterator
1497
///
1498
/// This struct is created by the [`AuxArray::iter`] method.
1499
pub struct AuxArrayIter<'a, T> {
1500
    index: usize,
1501
    array: &'a AuxArray<'a, T>,
1502
}
1503

1504
impl<T> Iterator for AuxArrayIter<'_, T>
1505
where
1506
    T: AuxArrayElement,
1507
{
1508
    type Item = T;
1509
    fn next(&mut self) -> Option<Self::Item> {
9✔
1510
        let value = self.array.get(self.index);
9✔
1511
        self.index += 1;
9✔
1512
        value
×
1513
    }
1514

1515
    fn size_hint(&self) -> (usize, Option<usize>) {
9✔
1516
        let array_length = self.array.len() - self.index;
9✔
1517
        (array_length, Some(array_length))
9✔
1518
    }
1519
}
1520

1521
/// Auxiliary data iterator
1522
///
1523
/// This struct is created by the [`Record::aux_iter`] method.
1524
///
1525
/// This iterator returns `Result`s that wrap tuples containing
1526
/// a slice which represents the two-byte tag (`&[u8; 2]`) as
1527
/// well as an `Aux` enum that wraps the associated value.
1528
///
1529
/// When an error occurs, the `Err` variant will be returned
1530
/// and the iterator will not be able to advance anymore.
1531
pub struct AuxIter<'a> {
1532
    aux: &'a [u8],
1533
}
1534

1535
impl<'a> Iterator for AuxIter<'a> {
1536
    type Item = Result<(&'a [u8], Aux<'a>)>;
1537

1538
    fn next(&mut self) -> Option<Self::Item> {
1✔
1539
        // We're finished
1540
        if self.aux.is_empty() {
1✔
1541
            return None;
1✔
1542
        }
1543
        // Incomplete aux data
1544
        if (1..=3).contains(&self.aux.len()) {
1✔
1545
            // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1546
            self.aux = &[];
×
1547
            return Some(Err(Error::BamAuxParsingError));
×
1548
        }
1549
        let tag = &self.aux[..2];
2✔
1550
        Some(unsafe {
1✔
1551
            let data_ptr = self.aux[2..].as_ptr();
2✔
1552
            Record::read_aux_field(data_ptr)
3✔
1553
                .map(|(aux, offset)| {
3✔
1554
                    self.aux = &self.aux[offset..];
2✔
1555
                    (tag, aux)
1✔
1556
                })
1557
                .map_err(|e| {
1✔
1558
                    // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1559
                    self.aux = &[];
×
1560
                    e
×
1561
                })
1562
        })
1563
    }
1564
}
1565

1566
static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1567
static ENCODE_BASE: [u8; 256] = [
1568
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1569
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1570
    1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1571
    15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1572
    13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1573
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1574
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1575
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1576
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1577
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1578
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1579
];
1580

1581
#[inline]
1582
fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
3✔
1583
    (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
3✔
1584
}
1585

1586
#[inline]
1587
unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
×
1588
    (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
×
1589
}
1590

1591
#[inline]
1592
fn decode_base_unchecked(base: u8) -> &'static u8 {
3✔
1593
    unsafe { DECODE_BASE.get_unchecked(base as usize) }
3✔
1594
}
1595

1596
/// The sequence of a record.
1597
#[derive(Debug, Copy, Clone)]
1598
pub struct Seq<'a> {
1599
    pub encoded: &'a [u8],
1600
    len: usize,
1601
}
1602

1603
impl<'a> Seq<'a> {
1604
    /// Return encoded base. Complexity: O(1).
1605
    #[inline]
1606
    pub fn encoded_base(&self, i: usize) -> u8 {
3✔
1607
        encoded_base(self.encoded, i)
3✔
1608
    }
1609

1610
    /// Return encoded base. Complexity: O(1).
1611
    #[inline]
1612
    pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
×
1613
        encoded_base_unchecked(self.encoded, i)
×
1614
    }
1615

1616
    /// Obtain decoded base without performing bounds checking.
1617
    /// Use index based access seq()[i], for checked, safe access.
1618
    /// Complexity: O(1).
1619
    #[inline]
1620
    pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
×
1621
        *decode_base_unchecked(self.encoded_base_unchecked(i))
×
1622
    }
1623

1624
    /// Return decoded sequence. Complexity: O(m) with m being the read length.
1625
    pub fn as_bytes(&self) -> Vec<u8> {
2✔
1626
        (0..self.len()).map(|i| self[i]).collect()
6✔
1627
    }
1628

1629
    /// Return length (in bases) of the sequence.
1630
    pub fn len(&self) -> usize {
2✔
1631
        self.len
2✔
1632
    }
1633

1634
    pub fn is_empty(&self) -> bool {
×
1635
        self.len() == 0
×
1636
    }
1637
}
1638

1639
impl<'a> ops::Index<usize> for Seq<'a> {
1640
    type Output = u8;
1641

1642
    /// Return decoded base at given position within read. Complexity: O(1).
1643
    fn index(&self, index: usize) -> &u8 {
3✔
1644
        decode_base_unchecked(self.encoded_base(index))
3✔
1645
    }
1646
}
1647

1648
unsafe impl<'a> Send for Seq<'a> {}
1649
unsafe impl<'a> Sync for Seq<'a> {}
1650

1651
#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1652
#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1653
pub enum Cigar {
1654
    Match(u32),    // M
1655
    Ins(u32),      // I
1656
    Del(u32),      // D
1657
    RefSkip(u32),  // N
1658
    SoftClip(u32), // S
1659
    HardClip(u32), // H
1660
    Pad(u32),      // P
1661
    Equal(u32),    // =
1662
    Diff(u32),     // X
1663
}
1664

1665
impl Cigar {
1666
    fn encode(self) -> u32 {
1✔
1667
        match self {
1✔
1668
            Cigar::Match(len) => len << 4, // | 0,
1✔
1669
            Cigar::Ins(len) => len << 4 | 1,
1✔
1670
            Cigar::Del(len) => len << 4 | 2,
1✔
1671
            Cigar::RefSkip(len) => len << 4 | 3,
×
1672
            Cigar::SoftClip(len) => len << 4 | 4,
1✔
1673
            Cigar::HardClip(len) => len << 4 | 5,
1✔
1674
            Cigar::Pad(len) => len << 4 | 6,
×
1675
            Cigar::Equal(len) => len << 4 | 7,
1✔
1676
            Cigar::Diff(len) => len << 4 | 8,
1✔
1677
        }
1678
    }
1679

1680
    /// Return the length of the CIGAR.
1681
    pub fn len(self) -> u32 {
2✔
1682
        match self {
2✔
1683
            Cigar::Match(len) => len,
2✔
1684
            Cigar::Ins(len) => len,
1✔
1685
            Cigar::Del(len) => len,
1✔
1686
            Cigar::RefSkip(len) => len,
×
1687
            Cigar::SoftClip(len) => len,
2✔
1688
            Cigar::HardClip(len) => len,
1✔
1689
            Cigar::Pad(len) => len,
×
1690
            Cigar::Equal(len) => len,
1✔
1691
            Cigar::Diff(len) => len,
1✔
1692
        }
1693
    }
1694

1695
    pub fn is_empty(self) -> bool {
×
1696
        self.len() == 0
×
1697
    }
1698

1699
    /// Return the character representing the CIGAR.
1700
    pub fn char(self) -> char {
2✔
1701
        match self {
2✔
1702
            Cigar::Match(_) => 'M',
2✔
1703
            Cigar::Ins(_) => 'I',
1✔
1704
            Cigar::Del(_) => 'D',
1✔
1705
            Cigar::RefSkip(_) => 'N',
×
1706
            Cigar::SoftClip(_) => 'S',
2✔
1707
            Cigar::HardClip(_) => 'H',
1✔
1708
            Cigar::Pad(_) => 'P',
×
1709
            Cigar::Equal(_) => '=',
1✔
1710
            Cigar::Diff(_) => 'X',
1✔
1711
        }
1712
    }
1713
}
1714

1715
impl fmt::Display for Cigar {
1716
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2✔
1717
        fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
6✔
1718
    }
1719
}
1720

1721
unsafe impl Send for Cigar {}
1722
unsafe impl Sync for Cigar {}
1723

1724
custom_derive! {
1725
    /// A CIGAR string. This type wraps around a `Vec<Cigar>`.
1726
    ///
1727
    /// # Example
1728
    ///
1729
    /// ```
1730
    /// use rust_htslib::bam::record::{Cigar, CigarString};
1731
    ///
1732
    /// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
1733
    ///
1734
    /// // access by index
1735
    /// assert_eq!(cigar[0], Cigar::Match(100));
1736
    /// // format into classical string representation
1737
    /// assert_eq!(format!("{}", cigar), "100M10S");
1738
    /// // iterate
1739
    /// for op in &cigar {
1740
    ///    println!("{}", op);
1741
    /// }
1742
    /// ```
1743
    #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
×
1744
    #[derive(NewtypeDeref,
1745
            NewtypeDerefMut,
1746
             NewtypeIndex(usize),
1747
             NewtypeIndexMut(usize),
1748
             NewtypeFrom,
1749
             PartialEq,
1750
             PartialOrd,
1751
             Eq,
1752
             NewtypeDebug,
1753
             Clone,
1754
             Hash
1755
    )]
1756
    pub struct CigarString(pub Vec<Cigar>);
3✔
1757
}
1758

1759
impl CigarString {
1760
    /// Create a `CigarStringView` from this CigarString at position `pos`
1761
    pub fn into_view(self, pos: i64) -> CigarStringView {
1✔
1762
        CigarStringView::new(self, pos)
1✔
1763
    }
1764

1765
    /// Calculate the bam cigar from the alignment struct. x is the target string
1766
    /// and y is the reference. `hard_clip` controls how unaligned read bases are encoded in the
1767
    /// cigar string. Set to true to use the hard clip (`H`) code, or false to use soft clip
1768
    /// (`S`) code. See the [SAM spec](https://samtools.github.io/hts-specs/SAMv1.pdf) for more details.
1769
    pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
1✔
1770
        match alignment.mode {
1✔
1771
            AlignmentMode::Global => {
1772
                panic!(" Bam cigar fn not supported for Global Alignment mode")
×
1773
            }
1774
            AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
×
1775
            _ => {}
1776
        }
1777

1778
        let mut cigar = Vec::new();
1✔
1779
        if alignment.operations.is_empty() {
2✔
1780
            return CigarString(cigar);
×
1781
        }
1782

1783
        let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
3✔
1784
            AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
1✔
1785
            AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
1✔
1786
            AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
1✔
1787
            AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
1✔
1788
            _ => {}
1789
        };
1790

1791
        if alignment.xstart > 0 {
1✔
1792
            cigar.push(if hard_clip {
3✔
1793
                Cigar::HardClip(alignment.xstart as u32)
×
1794
            } else {
1795
                Cigar::SoftClip(alignment.xstart as u32)
1✔
1796
            });
1797
        }
1798

1799
        let mut last = alignment.operations[0];
2✔
1800
        let mut k = 1u32;
1✔
1801
        for &op in alignment.operations[1..].iter() {
3✔
1802
            if op == last {
3✔
1803
                k += 1;
2✔
1804
            } else {
1805
                add_op(last, k, &mut cigar);
1✔
1806
                k = 1;
1✔
1807
            }
1808
            last = op;
1✔
1809
        }
1810
        add_op(last, k, &mut cigar);
1✔
1811
        if alignment.xlen > alignment.xend {
1✔
1812
            cigar.push(if hard_clip {
5✔
1813
                Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
2✔
1814
            } else {
1815
                Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
2✔
1816
            });
1817
        }
1818

1819
        CigarString(cigar)
1✔
1820
    }
1821
}
1822

1823
impl TryFrom<&[u8]> for CigarString {
1824
    type Error = Error;
1825

1826
    /// Create a CigarString from given &[u8].
1827
    /// # Example
1828
    /// ```
1829
    /// use rust_htslib::bam::record::*;
1830
    /// use rust_htslib::bam::record::CigarString;
1831
    /// use rust_htslib::bam::record::Cigar::*;
1832
    /// use std::convert::TryFrom;
1833
    ///
1834
    /// let cigar_str = "2H10M5X3=2H".as_bytes();
1835
    /// let cigar = CigarString::try_from(cigar_str)
1836
    ///     .expect("Unable to parse cigar string.");
1837
    /// let expected_cigar = CigarString(vec![
1838
    ///     HardClip(2),
1839
    ///     Match(10),
1840
    ///     Diff(5),
1841
    ///     Equal(3),
1842
    ///     HardClip(2),
1843
    /// ]);
1844
    /// assert_eq!(cigar, expected_cigar);
1845
    /// ```
1846
    fn try_from(bytes: &[u8]) -> Result<Self> {
3✔
1847
        let mut inner = Vec::new();
3✔
1848
        let mut i = 0;
3✔
1849
        let text_len = bytes.len();
3✔
1850
        while i < text_len {
6✔
1851
            let mut j = i;
3✔
1852
            while j < text_len && bytes[j].is_ascii_digit() {
15✔
1853
                j += 1;
6✔
1854
            }
1855
            // check that length is provided
1856
            if i == j {
3✔
1857
                return Err(Error::BamParseCigar {
×
1858
                    msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
×
1859
                });
1860
            }
1861
            // get the length of the operation
1862
            let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
6✔
1863
                msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
×
1864
            })?;
1865
            let n = s.parse().map_err(|_| Error::BamParseCigar {
6✔
1866
                msg: format!("Unable to parse &str '{:?}' to u32.", s),
×
1867
            })?;
1868
            // get the operation
1869
            let op = &bytes[j];
6✔
1870
            inner.push(match op {
6✔
1871
                b'M' => Cigar::Match(n),
3✔
1872
                b'I' => Cigar::Ins(n),
1✔
1873
                b'D' => Cigar::Del(n),
1✔
1874
                b'N' => Cigar::RefSkip(n),
1✔
1875
                b'H' => {
1876
                    if i == 0 || j + 1 == text_len {
6✔
1877
                        Cigar::HardClip(n)
3✔
1878
                    } else {
1879
                        return Err(Error::BamParseCigar {
×
1880
                            msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
×
1881
                                .to_owned(),
1882
                        });
1883
                    }
1884
                }
1885
                b'S' => {
1886
                    if i == 0
1✔
1887
                        || j + 1 == text_len
1✔
1888
                        || bytes[i-1] == b'H'
1✔
1889
                        || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
3✔
1890
                        Cigar::SoftClip(n)
1✔
1891
                    } else {
1892
                        return Err(Error::BamParseCigar {
×
1893
                        msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
×
1894
                            .to_owned(),
1895
                        });
1896
                    }
1897
                },
1898
                b'P' => Cigar::Pad(n),
1✔
1899
                b'=' => Cigar::Equal(n),
3✔
1900
                b'X' => Cigar::Diff(n),
3✔
1901
                op => {
×
1902
                    return Err(Error::BamParseCigar {
×
1903
                        msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
×
1904
                    })
1905
                }
1906
            });
1907
            i = j + 1;
3✔
1908
        }
1909
        Ok(CigarString(inner))
3✔
1910
    }
1911
}
1912

1913
impl TryFrom<&str> for CigarString {
1914
    type Error = Error;
1915

1916
    /// Create a CigarString from given &str.
1917
    /// # Example
1918
    /// ```
1919
    /// use rust_htslib::bam::record::*;
1920
    /// use rust_htslib::bam::record::CigarString;
1921
    /// use rust_htslib::bam::record::Cigar::*;
1922
    /// use std::convert::TryFrom;
1923
    ///
1924
    /// let cigar_str = "2H10M5X3=2H";
1925
    /// let cigar = CigarString::try_from(cigar_str)
1926
    ///     .expect("Unable to parse cigar string.");
1927
    /// let expected_cigar = CigarString(vec![
1928
    ///     HardClip(2),
1929
    ///     Match(10),
1930
    ///     Diff(5),
1931
    ///     Equal(3),
1932
    ///     HardClip(2),
1933
    /// ]);
1934
    /// assert_eq!(cigar, expected_cigar);
1935
    /// ```
1936
    fn try_from(text: &str) -> Result<Self> {
2✔
1937
        let bytes = text.as_bytes();
2✔
1938
        if text.chars().count() != bytes.len() {
2✔
1939
            return Err(Error::BamParseCigar {
1✔
1940
                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
1✔
1941
            });
1942
        }
1943
        CigarString::try_from(bytes)
2✔
1944
    }
1945
}
1946

1947
impl<'a> CigarString {
1948
    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
1✔
1949
        self.into_iter()
1✔
1950
    }
1951
}
1952

1953
impl<'a> IntoIterator for &'a CigarString {
1954
    type Item = &'a Cigar;
1955
    type IntoIter = ::std::slice::Iter<'a, Cigar>;
1956

1957
    fn into_iter(self) -> Self::IntoIter {
2✔
1958
        self.0.iter()
2✔
1959
    }
1960
}
1961

1962
impl fmt::Display for CigarString {
1963
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2✔
1964
        for op in self {
4✔
1965
            fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
8✔
1966
        }
1967
        Ok(())
2✔
1968
    }
1969
}
1970

1971
// Get number of leading/trailing softclips if a CigarString taking hardclips into account
1972
fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2✔
1973
    match (cigar.next(), cigar.next()) {
8✔
1974
        (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
4✔
1975
            *s as i64
2✔
1976
        }
1977
        _ => 0,
×
1978
    }
1979
}
1980

1981
#[derive(Eq, PartialEq, Clone, Debug)]
1982
pub struct CigarStringView {
1983
    inner: CigarString,
1984
    pos: i64,
1985
}
1986

1987
impl CigarStringView {
1988
    /// Construct a new CigarStringView from a CigarString at a position
1989
    pub fn new(c: CigarString, pos: i64) -> CigarStringView {
1✔
1990
        CigarStringView { inner: c, pos }
1991
    }
1992

1993
    /// Get (exclusive) end position of alignment.
1994
    pub fn end_pos(&self) -> i64 {
1✔
1995
        let mut pos = self.pos;
1✔
1996
        for c in self {
2✔
1997
            match c {
3✔
UNCOV
1998
                Cigar::Match(l)
×
1999
                | Cigar::RefSkip(l)
2000
                | Cigar::Del(l)
2001
                | Cigar::Equal(l)
2002
                | Cigar::Diff(l) => pos += *l as i64,
2003
                // these don't add to end_pos on reference
2004
                Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2005
            }
2006
        }
2007
        pos
1✔
2008
    }
2009

2010
    /// Get the start position of the alignment (0-based).
2011
    pub fn pos(&self) -> i64 {
1✔
2012
        self.pos
1✔
2013
    }
2014

2015
    /// Get number of bases softclipped at the beginning of the alignment.
2016
    pub fn leading_softclips(&self) -> i64 {
1✔
2017
        calc_softclips(self.iter())
1✔
2018
    }
2019

2020
    /// Get number of bases softclipped at the end of the alignment.
2021
    pub fn trailing_softclips(&self) -> i64 {
1✔
2022
        calc_softclips(self.iter().rev())
1✔
2023
    }
2024

2025
    /// Get number of bases hardclipped at the beginning of the alignment.
2026
    pub fn leading_hardclips(&self) -> i64 {
×
2027
        self.first().map_or(0, |cigar| {
×
2028
            if let Cigar::HardClip(s) = cigar {
×
2029
                *s as i64
×
2030
            } else {
2031
                0
×
2032
            }
2033
        })
2034
    }
2035

2036
    /// Get number of bases hardclipped at the end of the alignment.
2037
    pub fn trailing_hardclips(&self) -> i64 {
×
2038
        self.last().map_or(0, |cigar| {
×
2039
            if let Cigar::HardClip(s) = cigar {
×
2040
                *s as i64
×
2041
            } else {
2042
                0
×
2043
            }
2044
        })
2045
    }
2046

2047
    /// For a given position in the reference, get corresponding position within read.
2048
    /// If reference position is outside of the read alignment, return None.
2049
    ///
2050
    /// # Arguments
2051
    ///
2052
    /// * `ref_pos` - the reference position
2053
    /// * `include_softclips` - if true, softclips will be considered as matches or mismatches
2054
    /// * `include_dels` - if true, positions within deletions will be considered (first reference matching read position after deletion will be returned)
2055
    ///
2056
    pub fn read_pos(
1✔
2057
        &self,
2058
        ref_pos: u32,
2059
        include_softclips: bool,
2060
        include_dels: bool,
2061
    ) -> Result<Option<u32>> {
2062
        let mut rpos = self.pos as u32; // reference position
1✔
2063
        let mut qpos = 0u32; // position within read
1✔
2064
        let mut j = 0; // index into cigar operation vector
1✔
2065

2066
        // find first cigar operation referring to qpos = 0 (and thus bases in record.seq()),
2067
        // because all augmentations of qpos and rpos before that are invalid
2068
        for (i, c) in self.iter().enumerate() {
2✔
2069
            match c {
1✔
2070
                Cigar::Match(_) |
2071
                Cigar::Diff(_)  |
2072
                Cigar::Equal(_) |
2073
                // this is unexpected, but bwa + GATK indel realignment can produce insertions
2074
                // before matching positions
2075
                Cigar::Ins(_) => {
2076
                    j = i;
1✔
2077
                    break;
1✔
2078
                },
2079
                Cigar::SoftClip(l) => {
1✔
2080
                    j = i;
1✔
2081
                    if include_softclips {
2✔
2082
                        // Alignment starts with softclip and we want to include it in the
2083
                        // projection of the reference position. However, the POS field does not
2084
                        // include the softclip. Hence we have to subtract its length.
2085
                        rpos = rpos.saturating_sub(*l);
2✔
2086
                    }
2087
                    break;
2088
                },
2089
                Cigar::Del(_) => {
2090
                    return Err(Error::BamUnexpectedCigarOperation {
×
2091
                        msg: "'deletion' (D) found before any operation describing read sequence".to_owned()
×
2092
                    });
2093
                },
2094
                Cigar::RefSkip(_) => {
2095
                    return Err(Error::BamUnexpectedCigarOperation {
×
2096
                        msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
×
2097
                    });
2098
                },
2099
                Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2✔
2100
                    return Err(Error::BamUnexpectedCigarOperation{
1✔
2101
                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
1✔
2102
                    });
2103
                },
2104
                // if we have reached the end of the CigarString with only pads and hard clips, we have no read position matching the variant
2105
                Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
3✔
2106
                // skip leading HardClips and Pads, as they consume neither read sequence nor reference sequence
2107
                Cigar::Pad(_) | Cigar::HardClip(_) => ()
2108
            }
2109
        }
2110

2111
        let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2✔
2112
            cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2✔
2113
        };
2114

2115
        while rpos <= ref_pos && j < self.len() {
3✔
2116
            match self[j] {
9✔
2117
                // potential SNV evidence
2118
                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
5✔
2119
                    // difference between desired position and first position of current cigar
2120
                    // operation
2121
                    qpos += ref_pos - rpos;
2✔
2122
                    return Ok(Some(qpos));
1✔
2123
                }
2124
                Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2✔
2125
                    qpos += ref_pos - rpos;
2✔
2126
                    return Ok(Some(qpos));
1✔
2127
                }
2128
                Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2✔
2129
                    // qpos shall resemble the start of the deletion
2130
                    return Ok(Some(qpos));
1✔
2131
                }
2132
                // for others, just increase pos and qpos as needed
2133
                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
3✔
2134
                    rpos += l;
2✔
2135
                    qpos += l;
2✔
2136
                    j += 1;
2✔
2137
                }
2138
                Cigar::SoftClip(l) => {
1✔
2139
                    qpos += l;
2✔
2140
                    j += 1;
2✔
2141
                    if include_softclips {
2✔
2142
                        rpos += l;
1✔
2143
                    }
2144
                }
2145
                Cigar::Ins(l) => {
1✔
2146
                    qpos += l;
2✔
2147
                    j += 1;
2✔
2148
                }
2149
                Cigar::RefSkip(l) | Cigar::Del(l) => {
2✔
2150
                    rpos += l;
1✔
2151
                    j += 1;
2✔
2152
                }
2153
                Cigar::Pad(_) => {
1✔
2154
                    j += 1;
2✔
2155
                }
2156
                Cigar::HardClip(_) if j < self.len() - 1 => {
2✔
2157
                    return Err(Error::BamUnexpectedCigarOperation{
1✔
2158
                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
1✔
2159
                    });
2160
                }
2161
                Cigar::HardClip(_) => return Ok(None),
×
2162
            }
2163
        }
2164

2165
        Ok(None)
1✔
2166
    }
2167

2168
    /// transfer ownership of the Cigar out of the CigarView
2169
    pub fn take(self) -> CigarString {
1✔
2170
        self.inner
1✔
2171
    }
2172
}
2173

2174
impl ops::Deref for CigarStringView {
2175
    type Target = CigarString;
2176

2177
    fn deref(&self) -> &CigarString {
1✔
2178
        &self.inner
2179
    }
2180
}
2181

2182
impl ops::Index<usize> for CigarStringView {
2183
    type Output = Cigar;
2184

2185
    fn index(&self, index: usize) -> &Cigar {
1✔
2186
        self.inner.index(index)
1✔
2187
    }
2188
}
2189

2190
impl ops::IndexMut<usize> for CigarStringView {
2191
    fn index_mut(&mut self, index: usize) -> &mut Cigar {
×
2192
        self.inner.index_mut(index)
×
2193
    }
2194
}
2195

2196
impl<'a> CigarStringView {
2197
    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
1✔
2198
        self.inner.into_iter()
1✔
2199
    }
2200
}
2201

2202
impl<'a> IntoIterator for &'a CigarStringView {
2203
    type Item = &'a Cigar;
2204
    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2205

2206
    fn into_iter(self) -> Self::IntoIter {
1✔
2207
        self.inner.into_iter()
1✔
2208
    }
2209
}
2210

2211
impl fmt::Display for CigarStringView {
2212
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
×
2213
        self.inner.fmt(fmt)
×
2214
    }
2215
}
2216

2217
pub struct BaseModificationMetadata {
2218
    pub strand: i32,
2219
    pub implicit: i32,
2220
    pub canonical: u8,
2221
}
2222

2223
/// struct containing the internal state required to access
2224
/// the base modifications for a bam::Record
2225
pub struct BaseModificationState<'a> {
2226
    record: &'a Record,
2227
    state: *mut htslib::hts_base_mod_state,
2228
    buffer: Vec<htslib::hts_base_mod>,
2229
    buffer_pos: i32,
2230
}
2231

2232
impl BaseModificationState<'_> {
2233
    /// Initialize a new BaseModification struct from a bam::Record
2234
    /// This function allocates memory for the state structure
2235
    /// and initializes the iterator to the start of the modification
2236
    /// records.
2237
    fn new<'a>(r: &'a Record) -> Result<BaseModificationState<'a>> {
2✔
2238
        let mut bm = unsafe {
2239
            BaseModificationState {
2240
                record: r,
2241
                state: hts_sys::hts_base_mod_state_alloc(),
2✔
2242
                buffer: Vec::new(),
2✔
2243
                buffer_pos: -1,
×
2244
            }
2245
        };
2246

2247
        if bm.state.is_null() {
4✔
2248
            panic!("Unable to allocate memory for hts_base_mod_state");
×
2249
        }
2250

2251
        // parse the MM tag to initialize the state
2252
        unsafe {
2253
            let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
4✔
2254
            if ret != 0 {
2✔
2255
                return Err(Error::BamBaseModificationTagNotFound);
×
2256
            }
2257
        }
2258

2259
        let types = bm.recorded();
4✔
2260
        bm.buffer.reserve(types.len());
2✔
2261
        return Ok(bm);
2✔
2262
    }
2263

2264
    pub fn buffer_next_mods(&mut self) -> Result<usize> {
2✔
2265
        unsafe {
2266
            let ret = hts_sys::bam_next_basemod(
2267
                self.record.inner_ptr(),
2✔
2268
                self.state,
2✔
2269
                self.buffer.as_mut_ptr(),
2✔
2270
                self.buffer.capacity() as i32,
2✔
2271
                &mut self.buffer_pos,
2✔
2272
            );
2273

2274
            if ret < 0 {
2✔
2275
                return Err(Error::BamBaseModificationIterationFailed);
×
2276
            }
2277

2278
            // the htslib API won't write more than buffer.capacity() mods to the output array but it will
2279
            // return the actual number of modifications found. We return an error to the caller
2280
            // in the case where there was insufficient storage to return all mods.
2281
            if ret as usize > self.buffer.capacity() {
2✔
2282
                return Err(Error::BamBaseModificationTooManyMods);
×
2283
            }
2284

2285
            // we read the modifications directly into the vector, which does
2286
            // not update the length so needs to be manually set
2287
            self.buffer.set_len(ret as usize);
2✔
2288

2289
            return Ok(ret as usize);
2✔
2290
        }
2291
    }
2292

2293
    /// Return an array containing the modification codes listed for this record.
2294
    /// Positive values are ascii character codes (eg m), negative values are chEBI codes.
2295
    pub fn recorded<'a>(&self) -> &'a [i32] {
2✔
2296
        unsafe {
2297
            let mut n: i32 = 0;
2✔
2298
            let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2✔
2299

2300
            // htslib should not return a null pointer, even when there are no base mods
2301
            if data_ptr.is_null() {
2✔
2302
                panic!("Unable to obtain pointer to base modifications");
×
2303
            }
2304
            assert!(n >= 0);
2✔
2305
            return slice::from_raw_parts(data_ptr, n as usize);
2✔
2306
        }
2307
    }
2308

2309
    /// Return metadata for the specified character code indicating the strand
2310
    /// the base modification was called on, whether the tag uses implicit mode
2311
    /// and the ascii code for the canonical base.
2312
    /// If there are multiple modifications with the same code this will return the data
2313
    /// for the first mod.  See https://github.com/samtools/htslib/issues/1635
2314
    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2✔
2315
        unsafe {
2316
            let mut strand: i32 = 0;
2✔
2317
            let mut implicit: i32 = 0;
2✔
2318
            // This may be i8 or u8 in hts_sys.
2319
            let mut canonical: c_char = 0;
2✔
2320

2321
            let ret = hts_sys::bam_mods_query_type(
2322
                self.state,
2✔
2323
                code,
×
2324
                &mut strand,
×
2325
                &mut implicit,
×
2326
                &mut canonical,
×
2327
            );
2328
            if ret == -1 {
2✔
2329
                return Err(Error::BamBaseModificationTypeNotFound);
×
2330
            } else {
2331
                return Ok(BaseModificationMetadata {
2✔
2332
                    strand,
2✔
2333
                    implicit,
2✔
2334
                    canonical: canonical.try_into().unwrap(),
2✔
2335
                });
2336
            }
2337
        }
2338
    }
2339
}
2340

2341
impl Drop for BaseModificationState<'_> {
2342
    fn drop<'a>(&mut self) {
2✔
2343
        unsafe {
2344
            hts_sys::hts_base_mod_state_free(self.state);
2✔
2345
        }
2346
    }
2347
}
2348

2349
/// Iterator over the base modifications that returns
2350
/// a vector for all of the mods at each position
2351
pub struct BaseModificationsPositionIter<'a> {
2352
    mod_state: BaseModificationState<'a>,
2353
}
2354

2355
impl BaseModificationsPositionIter<'_> {
2356
    fn new<'a>(r: &'a Record) -> Result<BaseModificationsPositionIter<'a>> {
1✔
2357
        let state = BaseModificationState::new(r)?;
1✔
2358
        Ok(BaseModificationsPositionIter { mod_state: state })
1✔
2359
    }
2360

2361
    pub fn recorded<'a>(&self) -> &'a [i32] {
×
2362
        return self.mod_state.recorded();
×
2363
    }
2364

2365
    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
×
2366
        return self.mod_state.query_type(code);
×
2367
    }
2368
}
2369

2370
impl<'a> Iterator for BaseModificationsPositionIter<'a> {
2371
    type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2372

2373
    fn next(&mut self) -> Option<Self::Item> {
1✔
2374
        let ret = self.mod_state.buffer_next_mods();
1✔
2375

2376
        // Three possible things happened in buffer_next_mods:
2377
        // 1. the htslib API call was successful but there are no more mods
2378
        // 2. ths htslib API call was successful and we read some mods
2379
        // 3. the htslib API call failed, we propogate the error wrapped in an option
2380
        match ret {
1✔
2381
            Ok(num_mods) => {
1✔
2382
                if num_mods == 0 {
1✔
2383
                    return None;
1✔
2384
                } else {
2385
                    let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
1✔
2386
                    return Some(Ok(data));
1✔
2387
                }
2388
            }
2389
            Err(e) => return Some(Err(e)),
×
2390
        }
2391
    }
2392
}
2393

2394
/// Iterator over the base modifications that returns
2395
/// the next modification found, one by one
2396
pub struct BaseModificationsIter<'a> {
2397
    mod_state: BaseModificationState<'a>,
2398
    buffer_idx: usize,
2399
}
2400

2401
impl BaseModificationsIter<'_> {
2402
    fn new<'a>(r: &'a Record) -> Result<BaseModificationsIter<'a>> {
2✔
2403
        let state = BaseModificationState::new(r)?;
2✔
2404
        Ok(BaseModificationsIter {
2✔
2405
            mod_state: state,
×
2406
            buffer_idx: 0,
×
2407
        })
2408
    }
2409

2410
    pub fn recorded<'a>(&self) -> &'a [i32] {
2✔
2411
        return self.mod_state.recorded();
2✔
2412
    }
2413

2414
    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2✔
2415
        return self.mod_state.query_type(code);
2✔
2416
    }
2417
}
2418

2419
impl<'a> Iterator for BaseModificationsIter<'a> {
2420
    type Item = Result<(i32, hts_sys::hts_base_mod)>;
2421

2422
    fn next(&mut self) -> Option<Self::Item> {
2✔
2423
        if self.buffer_idx == self.mod_state.buffer.len() {
4✔
2424
            // need to use the internal state to read the next
2425
            // set of modifications into the buffer
2426
            let ret = self.mod_state.buffer_next_mods();
2✔
2427

2428
            match ret {
2✔
2429
                Ok(num_mods) => {
2✔
2430
                    if num_mods == 0 {
2✔
2431
                        // done iterating
2432
                        return None;
2✔
2433
                    } else {
2434
                        // we read some mods, reset the position in the buffer then fall through
2435
                        self.buffer_idx = 0;
2✔
2436
                    }
2437
                }
2438
                Err(e) => return Some(Err(e)),
×
2439
            }
2440
        }
2441

2442
        // if we got here when there are mods buffered that we haven't emitted yet
2443
        assert!(self.buffer_idx < self.mod_state.buffer.len());
2✔
2444
        let data = (
2✔
2445
            self.mod_state.buffer_pos,
2✔
2446
            self.mod_state.buffer[self.buffer_idx],
2✔
2447
        );
2448
        self.buffer_idx += 1;
2✔
2449
        return Some(Ok(data));
2✔
2450
    }
2451
}
2452

2453
#[cfg(test)]
2454
mod tests {
2455
    use super::*;
2456

2457
    #[test]
2458
    fn test_cigar_string() {
2459
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2460

2461
        assert_eq!(cigar[0], Cigar::Match(100));
2462
        assert_eq!(format!("{}", cigar), "100M10S");
2463
        for op in &cigar {
2464
            println!("{}", op);
2465
        }
2466
    }
2467

2468
    #[test]
2469
    fn test_cigar_string_view_pos() {
2470
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2471
        assert_eq!(cigar.pos(), 5);
2472
    }
2473

2474
    #[test]
2475
    fn test_cigar_string_leading_softclips() {
2476
        let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2477
        assert_eq!(cigar.leading_softclips(), 10);
2478
        let cigar2 = CigarString(vec![
2479
            Cigar::HardClip(5),
2480
            Cigar::SoftClip(10),
2481
            Cigar::Match(100),
2482
        ])
2483
        .into_view(0);
2484
        assert_eq!(cigar2.leading_softclips(), 10);
2485
    }
2486

2487
    #[test]
2488
    fn test_cigar_string_trailing_softclips() {
2489
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2490
        assert_eq!(cigar.trailing_softclips(), 10);
2491
        let cigar2 = CigarString(vec![
2492
            Cigar::Match(100),
2493
            Cigar::SoftClip(10),
2494
            Cigar::HardClip(5),
2495
        ])
2496
        .into_view(0);
2497
        assert_eq!(cigar2.trailing_softclips(), 10);
2498
    }
2499

2500
    #[test]
2501
    fn test_cigar_read_pos() {
2502
        let vpos = 5; // variant position
2503

2504
        // Ignore leading HardClip
2505
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2506
        // var:                       V
2507
        // c01: 7H                 M  M
2508
        // qpos:                  00 01
2509
        let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2510
        assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2511

2512
        // Skip leading SoftClip or use as pre-POS matches
2513
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2514
        // var:                       V
2515
        // c02: 5H2S         M  M  M  M  M  M
2516
        // qpos:  00        02 03 04 05 06 07
2517
        // c02: 5H     S  S  M  M  M  M  M  M
2518
        // qpos:      00 01 02 03 04 05 06 07
2519
        let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2520
        assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2521
        assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2522

2523
        // Skip leading SoftClip returning None for unmatched reference positiong or use as
2524
        // pre-POS matches
2525
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2526
        // var:                       V
2527
        // c03:  3S                      M  M
2528
        // qpos: 00                     03 04
2529
        // c03:                 S  S  S  M  M
2530
        // qpos:               00 01 02 03 04
2531
        let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2532
        assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2533
        assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2534

2535
        // Skip leading Insertion before variant position
2536
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2537
        // var:                       V
2538
        // c04:  3I                X  X  X
2539
        // qpos: 00               03 04 05
2540
        let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2541
        assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2542

2543
        // Matches and deletion before variant position
2544
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2545
        // var:                       V
2546
        // c05:        =  =  D  D  X  =  =
2547
        // qpos:      00 01       02 03 04 05
2548
        let c05 = CigarString(vec![
2549
            Cigar::Equal(2),
2550
            Cigar::Del(2),
2551
            Cigar::Diff(1),
2552
            Cigar::Equal(2),
2553
        ])
2554
        .into_view(0);
2555
        assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2556

2557
        // single nucleotide Deletion covering variant position
2558
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2559
        // var:                       V
2560
        // c06:                 =  =  D  X  X
2561
        // qpos:               00 01    02 03
2562
        let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2563
        assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2564
        assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2565

2566
        // three nucleotide Deletion covering variant position
2567
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2568
        // var:                       V
2569
        // c07:              =  =  D  D  D  M  M
2570
        // qpos:            00 01          02 03
2571
        let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2572
        assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2573
        assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2574

2575
        // three nucleotide RefSkip covering variant position
2576
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2577
        // var:                       V
2578
        // c08:              =  X  N  N  N  M  M
2579
        // qpos:            00 01          02 03
2580
        let c08 = CigarString(vec![
2581
            Cigar::Equal(1),
2582
            Cigar::Diff(1),
2583
            Cigar::RefSkip(3),
2584
            Cigar::Match(2),
2585
        ])
2586
        .into_view(2);
2587
        assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2588
        assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2589

2590
        // internal hard clip before variant pos
2591
        // ref:       00 01 02 03    04 05 06 07 08 09 10 11 12 13 14 15
2592
        // var:                          V
2593
        // c09: 3H           =  = 3H  =  =
2594
        // qpos:            00 01    02 03
2595
        let c09 = CigarString(vec![
2596
            Cigar::HardClip(3),
2597
            Cigar::Equal(2),
2598
            Cigar::HardClip(3),
2599
            Cigar::Equal(2),
2600
        ])
2601
        .into_view(2);
2602
        assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2603

2604
        // Deletion right before variant position
2605
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2606
        // var:                       V
2607
        // c10:           M  M  D  D  M  M
2608
        // qpos:         00 01       02 03
2609
        let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2610
        assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2611

2612
        // Insertion right before variant position
2613
        // ref:       00 01 02 03 04    05 06 07 08 09 10 11 12 13 14 15
2614
        // var:                          V
2615
        // c11:                 M  M 3I  M
2616
        // qpos:               00 01 02 05 06
2617
        let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2618
        assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2619

2620
        // Insertion right after variant position
2621
        // ref:       00 01 02 03 04 05    06 07 08 09 10 11 12 13 14 15
2622
        // var:                       V
2623
        // c12:                 M  M  M 2I  =
2624
        // qpos:               00 01 02 03 05
2625
        let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2626
        assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2627

2628
        // Deletion right after variant position
2629
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2630
        // var:                       V
2631
        // c13:                 M  M  M  D  =
2632
        // qpos:               00 01 02    03
2633
        let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2634
        assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2635

2636
        // A messy and complicated example, including a Pad operation
2637
        let vpos2 = 15;
2638
        // ref:       00    01 02    03 04 05    06 07 08 09 10 11 12 13 14 15
2639
        // var:                                                           V
2640
        // c14: 5H3S   = 2P  M  X 3I  M  M  D 2I  =  =  N  N  N  M  M  M  =  =  5S2H
2641
        // qpos:  00  03    04 05 06 09 10    11 13 14          15 16 17 18 19
2642
        let c14 = CigarString(vec![
2643
            Cigar::HardClip(5),
2644
            Cigar::SoftClip(3),
2645
            Cigar::Equal(1),
2646
            Cigar::Pad(2),
2647
            Cigar::Match(1),
2648
            Cigar::Diff(1),
2649
            Cigar::Ins(3),
2650
            Cigar::Match(2),
2651
            Cigar::Del(1),
2652
            Cigar::Ins(2),
2653
            Cigar::Equal(2),
2654
            Cigar::RefSkip(3),
2655
            Cigar::Match(3),
2656
            Cigar::Equal(2),
2657
            Cigar::SoftClip(5),
2658
            Cigar::HardClip(2),
2659
        ])
2660
        .into_view(0);
2661
        assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2662

2663
        // HardClip after Pad
2664
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2665
        // var:                       V
2666
        // c15: 5P1H            =  =  =
2667
        // qpos:               00 01 02
2668
        let c15 =
2669
            CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2670
        assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2671

2672
        // only HardClip and Pad operations
2673
        // c16: 7H5P2H
2674
        let c16 =
2675
            CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2676
        assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2677
    }
2678

2679
    #[test]
2680
    fn test_clone() {
2681
        let mut rec = Record::new();
2682
        rec.set_pos(300);
2683
        rec.set_qname(b"read1");
2684
        let clone = rec.clone();
2685
        assert_eq!(rec, clone);
2686
    }
2687

2688
    #[test]
2689
    fn test_flags() {
2690
        let mut rec = Record::new();
2691

2692
        rec.set_paired();
2693
        assert_eq!(rec.is_paired(), true);
2694

2695
        rec.set_supplementary();
2696
        assert_eq!(rec.is_supplementary(), true);
2697
        assert_eq!(rec.is_supplementary(), true);
2698

2699
        rec.unset_paired();
2700
        assert_eq!(rec.is_paired(), false);
2701
        assert_eq!(rec.is_supplementary(), true);
2702

2703
        rec.unset_supplementary();
2704
        assert_eq!(rec.is_paired(), false);
2705
        assert_eq!(rec.is_supplementary(), false);
2706
    }
2707

2708
    #[test]
2709
    fn test_cigar_parse() {
2710
        let cigar = "1S20M1D2I3X1=2H";
2711
        let parsed = CigarString::try_from(cigar).unwrap();
2712
        assert_eq!(parsed.to_string(), cigar);
2713
    }
2714
}
2715

2716
#[cfg(test)]
2717
mod alignment_cigar_tests {
2718
    use super::*;
2719
    use crate::bam::{Read, Reader};
2720
    use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2721
    use bio_types::alignment::{Alignment, AlignmentMode};
2722

2723
    #[test]
2724
    fn test_cigar() {
2725
        let alignment = Alignment {
2726
            score: 5,
2727
            xstart: 3,
2728
            ystart: 0,
2729
            xend: 9,
2730
            yend: 10,
2731
            ylen: 10,
2732
            xlen: 10,
2733
            operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2734
            mode: AlignmentMode::Semiglobal,
2735
        };
2736
        assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2737
        assert_eq!(
2738
            CigarString::from_alignment(&alignment, false).0,
2739
            vec![
2740
                Cigar::SoftClip(3),
2741
                Cigar::Equal(3),
2742
                Cigar::Diff(1),
2743
                Cigar::Ins(2),
2744
                Cigar::Del(2),
2745
                Cigar::SoftClip(1),
2746
            ]
2747
        );
2748

2749
        let alignment = Alignment {
2750
            score: 5,
2751
            xstart: 0,
2752
            ystart: 5,
2753
            xend: 4,
2754
            yend: 10,
2755
            ylen: 10,
2756
            xlen: 5,
2757
            operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2758
            mode: AlignmentMode::Custom,
2759
        };
2760
        assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2761
        assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
2762
        assert_eq!(
2763
            CigarString::from_alignment(&alignment, false).0,
2764
            vec![
2765
                Cigar::Equal(1),
2766
                Cigar::Diff(2),
2767
                Cigar::Ins(1),
2768
                Cigar::Del(2),
2769
                Cigar::SoftClip(1),
2770
            ]
2771
        );
2772
        assert_eq!(
2773
            CigarString::from_alignment(&alignment, true).0,
2774
            vec![
2775
                Cigar::Equal(1),
2776
                Cigar::Diff(2),
2777
                Cigar::Ins(1),
2778
                Cigar::Del(2),
2779
                Cigar::HardClip(1),
2780
            ]
2781
        );
2782

2783
        let alignment = Alignment {
2784
            score: 5,
2785
            xstart: 0,
2786
            ystart: 5,
2787
            xend: 3,
2788
            yend: 8,
2789
            ylen: 10,
2790
            xlen: 3,
2791
            operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
2792
            mode: AlignmentMode::Custom,
2793
        };
2794
        assert_eq!(alignment.cigar(false), "1X1=1X");
2795
        assert_eq!(
2796
            CigarString::from_alignment(&alignment, false).0,
2797
            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2798
        );
2799

2800
        let alignment = Alignment {
2801
            score: 5,
2802
            xstart: 0,
2803
            ystart: 5,
2804
            xend: 3,
2805
            yend: 8,
2806
            ylen: 10,
2807
            xlen: 3,
2808
            operations: vec![Subst, Match, Subst],
2809
            mode: AlignmentMode::Semiglobal,
2810
        };
2811
        assert_eq!(alignment.cigar(false), "1X1=1X");
2812
        assert_eq!(
2813
            CigarString::from_alignment(&alignment, false).0,
2814
            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2815
        );
2816
    }
2817

2818
    #[test]
2819
    fn test_read_orientation_f1r2() {
2820
        let mut bam = Reader::from_path(&"test/test_paired.sam").unwrap();
2821

2822
        for res in bam.records() {
2823
            let record = res.unwrap();
2824
            assert_eq!(
2825
                record.read_pair_orientation(),
2826
                SequenceReadPairOrientation::F1R2
2827
            );
2828
        }
2829
    }
2830

2831
    #[test]
2832
    pub fn test_cigar_parsing_non_ascii_error() {
2833
        let cigar_str = "43ጷ";
2834
        let expected_error = Err(Error::BamParseCigar {
2835
                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2836
            });
2837

2838
        let result = CigarString::try_from(cigar_str);
2839
        assert_eq!(expected_error, result);
2840
    }
2841

2842
    #[test]
2843
    pub fn test_cigar_parsing() {
2844
        // parsing test cases
2845
        let cigar_strs = vec![
2846
            "1H10M4D100I300N1102=10P25X11S", // test every cigar opt
2847
            "100M",                          // test a single op
2848
            "",                              // test empty input
2849
            "1H1=1H",                        // test simple hardclip
2850
            "1S1=1S",                        // test simple softclip
2851
            "11H11S11=11S11H",               // test complex softclip
2852
            "10H",
2853
            "10S",
2854
        ];
2855
        // expected results
2856
        let cigars = vec![
2857
            CigarString(vec![
2858
                Cigar::HardClip(1),
2859
                Cigar::Match(10),
2860
                Cigar::Del(4),
2861
                Cigar::Ins(100),
2862
                Cigar::RefSkip(300),
2863
                Cigar::Equal(1102),
2864
                Cigar::Pad(10),
2865
                Cigar::Diff(25),
2866
                Cigar::SoftClip(11),
2867
            ]),
2868
            CigarString(vec![Cigar::Match(100)]),
2869
            CigarString(vec![]),
2870
            CigarString(vec![
2871
                Cigar::HardClip(1),
2872
                Cigar::Equal(1),
2873
                Cigar::HardClip(1),
2874
            ]),
2875
            CigarString(vec![
2876
                Cigar::SoftClip(1),
2877
                Cigar::Equal(1),
2878
                Cigar::SoftClip(1),
2879
            ]),
2880
            CigarString(vec![
2881
                Cigar::HardClip(11),
2882
                Cigar::SoftClip(11),
2883
                Cigar::Equal(11),
2884
                Cigar::SoftClip(11),
2885
                Cigar::HardClip(11),
2886
            ]),
2887
            CigarString(vec![Cigar::HardClip(10)]),
2888
            CigarString(vec![Cigar::SoftClip(10)]),
2889
        ];
2890
        // compare
2891
        for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
2892
            let cigar_parse = CigarString::try_from(cigar_str)
2893
                .expect(&format!("Unable to parse cigar: {}", cigar_str));
2894
            assert_eq!(&cigar_parse, truth);
2895
        }
2896
    }
2897
}
2898

2899
#[cfg(test)]
2900
mod basemod_tests {
2901
    use crate::bam::{Read, Reader};
2902

2903
    #[test]
2904
    pub fn test_count_recorded() {
2905
        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2906

2907
        for r in bam.records() {
2908
            let record = r.unwrap();
2909
            if let Ok(mods) = record.basemods_iter() {
2910
                let n = mods.recorded().len();
2911
                assert_eq!(n, 3);
2912
            };
2913
        }
2914
    }
2915

2916
    #[test]
2917
    pub fn test_query_type() {
2918
        let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
2919

2920
        let mut n_fwd = 0;
2921
        let mut n_rev = 0;
2922

2923
        for r in bam.records() {
2924
            let record = r.unwrap();
2925
            if let Ok(mods) = record.basemods_iter() {
2926
                for mod_code in mods.recorded() {
2927
                    if let Ok(mod_metadata) = mods.query_type(*mod_code) {
2928
                        if mod_metadata.strand == 0 {
2929
                            n_fwd += 1;
2930
                        }
2931
                        if mod_metadata.strand == 1 {
2932
                            n_rev += 1;
2933
                        }
2934
                    }
2935
                }
2936
            };
2937
        }
2938
        assert_eq!(n_fwd, 2);
2939
        assert_eq!(n_rev, 2);
2940
    }
2941

2942
    #[test]
2943
    pub fn test_mod_iter() {
2944
        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2945
        let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
2946
        let mut i = 0;
2947

2948
        for r in bam.records() {
2949
            let record = r.unwrap();
2950
            for res in record.basemods_iter().unwrap() {
2951
                if let Ok((position, _m)) = res {
2952
                    assert_eq!(position, expected_positions[i]);
2953
                    i += 1;
2954
                }
2955
            }
2956
        }
2957
    }
2958

2959
    #[test]
2960
    pub fn test_position_iter() {
2961
        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2962
        let expected_positions = [1, 7, 12, 13, 22, 30, 31];
2963
        let expected_counts = [1, 1, 1, 2, 1, 1, 1];
2964
        let mut i = 0;
2965

2966
        for r in bam.records() {
2967
            let record = r.unwrap();
2968
            for res in record.basemods_position_iter().unwrap() {
2969
                if let Ok((position, elements)) = res {
2970
                    assert_eq!(position, expected_positions[i]);
2971
                    assert_eq!(elements.len(), expected_counts[i]);
2972
                    i += 1;
2973
                }
2974
            }
2975
        }
2976
    }
2977
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc