• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rust-bio / rust-htslib / 11799067967

12 Nov 2024 02:14PM UTC coverage: 79.66% (-0.03%) from 79.692%
11799067967

Pull #447

github

web-flow
Merge 0528a3adb into 4b3cbcbcd
Pull Request #447: fix: allow leading deletions in read_pos method of CigarStringView.

0 of 2 new or added lines in 1 file covered. (0.0%)

1 existing line in 1 file now uncovered.

2483 of 3117 relevant lines covered (79.66%)

2.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.45
/src/bam/record.rs
1
// Copyright 2014 Christopher Schröder, Johannes Köster.
2
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3
// This file may not be copied, modified, or distributed
4
// except according to those terms.
12✔
5

6
use std::convert::TryFrom;
7
use std::convert::TryInto;
8
use std::ffi;
9
use std::fmt;
10
use std::marker::PhantomData;
11
use std::mem::{size_of, MaybeUninit};
12
use std::ops;
13
use std::os::raw::c_char;
14
use std::rc::Rc;
15
use std::slice;
16
use std::str;
17
use std::u32;
18

19
use byteorder::{LittleEndian, ReadBytesExt};
20

21
use crate::bam::Error;
22
use crate::bam::HeaderView;
23
use crate::errors::Result;
24
use crate::htslib;
25
use crate::utils;
26
#[cfg(feature = "serde_feature")]
27
use serde::{self, Deserialize, Serialize};
28

29
use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
30
use bio_types::genome;
31
use bio_types::sequence::SequenceRead;
32
use bio_types::sequence::SequenceReadPairOrientation;
33
use bio_types::strand::ReqStrand;
34

35
/// A macro creating methods for flag access.
36
macro_rules! flag {
37
    ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
2✔
38
        pub fn $get(&self) -> bool {
11✔
39
            self.inner().core.flag & $bit != 0
11✔
40
        }
41

42
        pub fn $set(&mut self) {
12✔
43
            self.inner_mut().core.flag |= $bit;
14✔
44
        }
45

46
        pub fn $unset(&mut self) {
3✔
47
            self.inner_mut().core.flag &= !$bit;
2✔
48
        }
49
    };
50
}
51

52
/// A BAM record.
53
pub struct Record {
54
    pub inner: htslib::bam1_t,
55
    own: bool,
56
    cigar: Option<CigarStringView>,
57
    header: Option<Rc<HeaderView>>,
58
}
59

60
unsafe impl Send for Record {}
61
unsafe impl Sync for Record {}
62

63
impl Clone for Record {
64
    fn clone(&self) -> Self {
1✔
65
        let mut copy = Record::new();
1✔
66
        unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
2✔
67
        copy
1✔
68
    }
69
}
70

71
impl PartialEq for Record {
72
    fn eq(&self, other: &Record) -> bool {
1✔
73
        self.tid() == other.tid()
1✔
74
            && self.pos() == other.pos()
1✔
75
            && self.bin() == other.bin()
1✔
76
            && self.mapq() == other.mapq()
1✔
77
            && self.flags() == other.flags()
1✔
78
            && self.mtid() == other.mtid()
1✔
79
            && self.mpos() == other.mpos()
1✔
80
            && self.insert_size() == other.insert_size()
1✔
81
            && self.data() == other.data()
1✔
82
            && self.inner().core.l_extranul == other.inner().core.l_extranul
1✔
83
    }
84
}
85

86
impl Eq for Record {}
87

88
impl fmt::Debug for Record {
89
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
×
90
        fmt.write_fmt(format_args!(
×
91
            "Record(tid: {}, pos: {})",
92
            self.tid(),
×
93
            self.pos()
×
94
        ))
95
    }
96
}
97

98
impl Default for Record {
99
    fn default() -> Self {
×
100
        Self::new()
×
101
    }
102
}
103

104
#[inline]
105
fn extranul_from_qname(qname: &[u8]) -> usize {
9✔
106
    let qlen = qname.len() + 1;
9✔
107
    if qlen % 4 != 0 {
19✔
108
        4 - qlen % 4
18✔
109
    } else {
110
        0
1✔
111
    }
112
}
113

114
impl Record {
115
    /// Create an empty BAM record.
116
    pub fn new() -> Self {
9✔
117
        let mut record = Record {
118
            inner: unsafe { MaybeUninit::zeroed().assume_init() },
9✔
119
            own: true,
120
            cigar: None,
121
            header: None,
122
        };
123
        // The read/query name needs to be set as empty to properly initialize
124
        // the record
125
        record.set_qname(b"");
9✔
126
        // Developer note: these are needed so the returned record is properly
127
        // initialized as unmapped.
128
        record.set_unmapped();
9✔
129
        record.set_tid(-1);
9✔
130
        record.set_pos(-1);
9✔
131
        record.set_mpos(-1);
9✔
132
        record.set_mtid(-1);
9✔
133
        record
9✔
134
    }
135

136
    pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
2✔
137
        Record {
138
            inner: {
139
                #[allow(clippy::uninit_assumed_init)]
140
                let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
141
                unsafe {
142
                    ::libc::memcpy(
143
                        &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
144
                        from as *const ::libc::c_void,
145
                        size_of::<htslib::bam1_t>(),
146
                    );
147
                }
148
                inner
149
            },
150
            own: false,
151
            cigar: None,
152
            header: None,
153
        }
154
    }
155

156
    // Create a BAM record from a line SAM text. SAM slice need not be 0-terminated.
157
    pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
3✔
158
        let mut record = Self::new();
3✔
159

160
        let mut sam_copy = Vec::with_capacity(sam.len() + 1);
3✔
161
        sam_copy.extend(sam);
3✔
162
        sam_copy.push(0);
3✔
163

164
        let mut sam_string = htslib::kstring_t {
165
            s: sam_copy.as_ptr() as *mut c_char,
3✔
166
            l: sam_copy.len(),
3✔
167
            m: sam_copy.len(),
3✔
168
        };
169

170
        let succ = unsafe {
171
            htslib::sam_parse1(
172
                &mut sam_string,
173
                header_view.inner_ptr() as *mut htslib::bam_hdr_t,
3✔
174
                record.inner_ptr_mut(),
3✔
175
            )
176
        };
177

178
        if succ == 0 {
6✔
179
            Ok(record)
3✔
180
        } else {
181
            Err(Error::BamParseSAM {
×
182
                rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
×
183
            })
184
        }
185
    }
186

187
    pub fn set_header(&mut self, header: Rc<HeaderView>) {
6✔
188
        self.header = Some(header);
12✔
189
    }
190

191
    pub(super) fn data(&self) -> &[u8] {
4✔
192
        unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
4✔
193
    }
194

195
    #[inline]
196
    pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
9✔
197
        &mut self.inner
9✔
198
    }
199

200
    #[inline]
201
    pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
7✔
202
        &mut self.inner as *mut htslib::bam1_t
7✔
203
    }
204

205
    #[inline]
206
    pub fn inner(&self) -> &htslib::bam1_t {
10✔
207
        &self.inner
10✔
208
    }
209

210
    #[inline]
211
    pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
3✔
212
        &self.inner as *const htslib::bam1_t
3✔
213
    }
214

215
    /// Get target id.
216
    pub fn tid(&self) -> i32 {
2✔
217
        self.inner().core.tid
2✔
218
    }
219

220
    /// Set target id.
221
    pub fn set_tid(&mut self, tid: i32) {
9✔
222
        self.inner_mut().core.tid = tid;
9✔
223
    }
224

225
    /// Get position (0-based).
226
    pub fn pos(&self) -> i64 {
1✔
227
        self.inner().core.pos
1✔
228
    }
229

230
    /// Set position (0-based).
231
    pub fn set_pos(&mut self, pos: i64) {
9✔
232
        self.inner_mut().core.pos = pos;
9✔
233
    }
234

235
    pub fn bin(&self) -> u16 {
1✔
236
        self.inner().core.bin
1✔
237
    }
238

239
    pub fn set_bin(&mut self, bin: u16) {
×
240
        self.inner_mut().core.bin = bin;
×
241
    }
242

243
    /// Get MAPQ.
244
    pub fn mapq(&self) -> u8 {
1✔
245
        self.inner().core.qual
1✔
246
    }
247

248
    /// Set MAPQ.
249
    pub fn set_mapq(&mut self, mapq: u8) {
×
250
        self.inner_mut().core.qual = mapq;
×
251
    }
252

253
    /// Get strand information from record flags.
254
    pub fn strand(&self) -> ReqStrand {
×
255
        let reverse = self.flags() & 0x10 != 0;
×
256
        if reverse {
×
257
            ReqStrand::Reverse
×
258
        } else {
259
            ReqStrand::Forward
×
260
        }
261
    }
262

263
    /// Get raw flags.
264
    pub fn flags(&self) -> u16 {
2✔
265
        self.inner().core.flag
2✔
266
    }
267

268
    /// Set raw flags.
269
    pub fn set_flags(&mut self, flags: u16) {
×
270
        self.inner_mut().core.flag = flags;
×
271
    }
272

273
    /// Unset all flags.
274
    pub fn unset_flags(&mut self) {
×
275
        self.inner_mut().core.flag = 0;
×
276
    }
277

278
    /// Get target id of mate.
279
    pub fn mtid(&self) -> i32 {
1✔
280
        self.inner().core.mtid
1✔
281
    }
282

283
    /// Set target id of mate.
284
    pub fn set_mtid(&mut self, mtid: i32) {
9✔
285
        self.inner_mut().core.mtid = mtid;
9✔
286
    }
287

288
    /// Get mate position.
289
    pub fn mpos(&self) -> i64 {
1✔
290
        self.inner().core.mpos
1✔
291
    }
292

293
    /// Set mate position.
294
    pub fn set_mpos(&mut self, mpos: i64) {
9✔
295
        self.inner_mut().core.mpos = mpos;
9✔
296
    }
297

298
    /// Get insert size.
299
    pub fn insert_size(&self) -> i64 {
1✔
300
        self.inner().core.isize_
1✔
301
    }
302

303
    /// Set insert size.
304
    pub fn set_insert_size(&mut self, insert_size: i64) {
×
305
        self.inner_mut().core.isize_ = insert_size;
×
306
    }
307

308
    fn qname_capacity(&self) -> usize {
10✔
309
        self.inner().core.l_qname as usize
10✔
310
    }
311

312
    fn qname_len(&self) -> usize {
2✔
313
        // discount all trailing zeros (the default one and extra nulls)
314
        self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
2✔
315
    }
316

317
    /// Get qname (read name). Complexity: O(1).
318
    pub fn qname(&self) -> &[u8] {
2✔
319
        &self.data()[..self.qname_len()]
2✔
320
    }
321

322
    /// Set the variable length data buffer
323
    pub fn set_data(&mut self, new_data: &[u8]) {
1✔
324
        self.cigar = None;
2✔
325

326
        self.inner_mut().l_data = new_data.len() as i32;
1✔
327
        if (self.inner().m_data as i32) < self.inner().l_data {
1✔
328
            // Verbosity due to lexical borrowing
329
            let l_data = self.inner().l_data;
1✔
330
            self.realloc_var_data(l_data as usize);
1✔
331
        }
332

333
        // Copy new data into buffer
334
        let data =
1✔
335
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
336
        utils::copy_memory(new_data, data);
1✔
337
    }
338

339
    /// Set variable length data (qname, cigar, seq, qual).
340
    /// The aux data is left unchanged.
341
    /// `qual` is Phred-scaled quality values, without any offset.
342
    /// NOTE: seq.len() must equal qual.len() or this method
343
    /// will panic. If you don't have quality values use
344
    /// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
345
    /// be recognized as missing QVs by `samtools`.
346
    pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
1✔
347
        assert!(qname.len() < 255);
1✔
348
        assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
1✔
349

350
        self.cigar = None;
1✔
351

352
        let cigar_width = if let Some(cigar_string) = cigar {
2✔
353
            cigar_string.len()
1✔
354
        } else {
355
            0
×
356
        } * 4;
357
        let q_len = qname.len() + 1;
2✔
358
        let extranul = extranul_from_qname(qname);
1✔
359

360
        let orig_aux_offset = self.qname_capacity()
4✔
361
            + 4 * self.cigar_len()
1✔
362
            + (self.seq_len() + 1) / 2
2✔
363
            + self.seq_len();
1✔
364
        let new_aux_offset = q_len + extranul + cigar_width + (seq.len() + 1) / 2 + qual.len();
2✔
365
        assert!(orig_aux_offset <= self.inner.l_data as usize);
1✔
366
        let aux_len = self.inner.l_data as usize - orig_aux_offset;
1✔
367
        self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
2✔
368
        if (self.inner().m_data as i32) < self.inner().l_data {
1✔
369
            // Verbosity due to lexical borrowing
370
            let l_data = self.inner().l_data;
1✔
371
            self.realloc_var_data(l_data as usize);
1✔
372
        }
373

374
        // Copy the aux data.
375
        if aux_len > 0 && orig_aux_offset != new_aux_offset {
2✔
376
            let data =
1✔
377
                unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
378
            data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
1✔
379
        }
380

381
        let data =
1✔
382
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
383

384
        // qname
385
        utils::copy_memory(qname, data);
1✔
386
        for i in 0..=extranul {
4✔
387
            data[qname.len() + i] = b'\0';
2✔
388
        }
389
        let mut i = q_len + extranul;
2✔
390
        self.inner_mut().core.l_qname = i as u16;
1✔
391
        self.inner_mut().core.l_extranul = extranul as u8;
1✔
392

393
        // cigar
394
        if let Some(cigar_string) = cigar {
3✔
395
            let cigar_data = unsafe {
396
                //cigar is always aligned to 4 bytes (see extranul above) - so this is safe
397
                #[allow(clippy::cast_ptr_alignment)]
398
                slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
2✔
399
            };
400
            for (i, c) in cigar_string.iter().enumerate() {
4✔
401
                cigar_data[i] = c.encode();
2✔
402
            }
403
            self.inner_mut().core.n_cigar = cigar_string.len() as u32;
1✔
404
            i += cigar_string.len() * 4;
2✔
405
        } else {
406
            self.inner_mut().core.n_cigar = 0;
×
407
        };
408

409
        // seq
410
        {
411
            for j in (0..seq.len()).step_by(2) {
4✔
412
                data[i + j / 2] = ENCODE_BASE[seq[j] as usize] << 4
4✔
413
                    | (if j + 1 < seq.len() {
4✔
414
                        ENCODE_BASE[seq[j + 1] as usize]
2✔
415
                    } else {
416
                        0
1✔
417
                    });
418
            }
419
            self.inner_mut().core.l_qseq = seq.len() as i32;
1✔
420
            i += (seq.len() + 1) / 2;
2✔
421
        }
422

423
        // qual
424
        utils::copy_memory(qual, &mut data[i..]);
2✔
425
    }
426

427
    /// Replace current qname with a new one.
428
    pub fn set_qname(&mut self, new_qname: &[u8]) {
9✔
429
        // 251 + 1NUL is the max 32-bit aligned value that fits in u8
430
        assert!(new_qname.len() < 252);
9✔
431

432
        let old_q_len = self.qname_capacity();
9✔
433
        // We're going to add a terminal NUL
434
        let extranul = extranul_from_qname(new_qname);
9✔
435
        let new_q_len = new_qname.len() + 1 + extranul;
9✔
436

437
        // Length of data after qname
438
        let other_len = self.inner_mut().l_data - old_q_len as i32;
18✔
439

440
        if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
11✔
441
            self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
2✔
442
        } else if new_q_len > old_q_len {
9✔
443
            self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
9✔
444

445
            // Reallocate if necessary
446
            if (self.inner().m_data as i32) < self.inner().l_data {
9✔
447
                // Verbosity due to lexical borrowing
448
                let l_data = self.inner().l_data;
9✔
449
                self.realloc_var_data(l_data as usize);
9✔
450
            }
451
        }
452

453
        if new_q_len != old_q_len {
9✔
454
            // Move other data to new location
455
            unsafe {
456
                let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
9✔
457

458
                ::libc::memmove(
459
                    data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
460
                    data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
461
                    other_len as usize,
9✔
462
                );
463
            }
464
        }
465

466
        // Copy qname data
467
        let data =
9✔
468
            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
469
        utils::copy_memory(new_qname, data);
9✔
470
        for i in 0..=extranul {
36✔
471
            data[new_q_len - i - 1] = b'\0';
9✔
472
        }
473
        self.inner_mut().core.l_qname = new_q_len as u16;
9✔
474
        self.inner_mut().core.l_extranul = extranul as u8;
9✔
475
    }
476

477
    fn realloc_var_data(&mut self, new_len: usize) {
9✔
478
        // pad request
479
        let new_len = new_len as u32;
9✔
480
        let new_request = new_len + 32 - (new_len % 32);
9✔
481

482
        let ptr = unsafe {
483
            ::libc::realloc(
9✔
484
                self.inner().data as *mut ::libc::c_void,
9✔
485
                new_request as usize,
9✔
486
            ) as *mut u8
487
        };
488

489
        if ptr.is_null() {
9✔
490
            panic!("ran out of memory in rust_htslib trying to realloc");
×
491
        }
492

493
        // don't update m_data until we know we have
494
        // a successful allocation.
495
        self.inner_mut().m_data = new_request;
9✔
496
        self.inner_mut().data = ptr;
9✔
497

498
        // we now own inner.data
499
        self.own = true;
9✔
500
    }
501

502
    pub fn cigar_len(&self) -> usize {
3✔
503
        self.inner().core.n_cigar as usize
3✔
504
    }
505

506
    /// Get reference to raw cigar string representation (as stored in BAM file).
507
    /// Usually, the method `Record::cigar` should be used instead.
508
    pub fn raw_cigar(&self) -> &[u32] {
1✔
509
        //cigar is always aligned to 4 bytes - so this is safe
510
        #[allow(clippy::cast_ptr_alignment)]
511
        unsafe {
512
            slice::from_raw_parts(
513
                self.data()[self.qname_capacity()..].as_ptr() as *const u32,
1✔
514
                self.cigar_len(),
1✔
515
            )
516
        }
517
    }
518

519
    /// Return unpacked cigar string. This will create a fresh copy the Cigar data.
520
    pub fn cigar(&self) -> CigarStringView {
1✔
521
        match self.cigar {
1✔
522
            Some(ref c) => c.clone(),
×
523
            None => self.unpack_cigar(),
1✔
524
        }
525
    }
526

527
    // Return unpacked cigar string. This returns None unless you have first called `bam::Record::cache_cigar`.
528
    pub fn cigar_cached(&self) -> Option<&CigarStringView> {
×
529
        self.cigar.as_ref()
×
530
    }
531

532
    /// Decode the cigar string and cache it inside the `Record`
533
    pub fn cache_cigar(&mut self) {
×
534
        self.cigar = Some(self.unpack_cigar())
×
535
    }
536

537
    /// Unpack cigar string. Complexity: O(k) with k being the length of the cigar string.
538
    fn unpack_cigar(&self) -> CigarStringView {
1✔
539
        CigarString(
540
            self.raw_cigar()
1✔
541
                .iter()
542
                .map(|&c| {
2✔
543
                    let len = c >> 4;
1✔
544
                    match c & 0b1111 {
1✔
545
                        0 => Cigar::Match(len),
1✔
546
                        1 => Cigar::Ins(len),
1✔
547
                        2 => Cigar::Del(len),
1✔
548
                        3 => Cigar::RefSkip(len),
1✔
549
                        4 => Cigar::SoftClip(len),
1✔
550
                        5 => Cigar::HardClip(len),
1✔
551
                        6 => Cigar::Pad(len),
×
552
                        7 => Cigar::Equal(len),
1✔
553
                        8 => Cigar::Diff(len),
1✔
554
                        _ => panic!("Unexpected cigar operation"),
×
555
                    }
556
                })
557
                .collect(),
558
        )
559
        .into_view(self.pos())
2✔
560
    }
561

562
    pub fn seq_len(&self) -> usize {
3✔
563
        self.inner().core.l_qseq as usize
3✔
564
    }
565

566
    fn seq_data(&self) -> &[u8] {
3✔
567
        let offset = self.qname_capacity() + self.cigar_len() * 4;
3✔
568
        &self.data()[offset..][..(self.seq_len() + 1) / 2]
9✔
569
    }
570

571
    /// Get read sequence. Complexity: O(1).
572
    pub fn seq(&self) -> Seq<'_> {
3✔
573
        Seq {
574
            encoded: self.seq_data(),
3✔
575
            len: self.seq_len(),
3✔
576
        }
577
    }
578

579
    /// Get base qualities (PHRED-scaled probability that base is wrong).
580
    /// This does not entail any offsets, hence the qualities can be used directly without
581
    /// e.g. subtracting 33. Complexity: O(1).
582
    pub fn qual(&self) -> &[u8] {
1✔
583
        &self.data()[self.qname_capacity() + self.cigar_len() * 4 + (self.seq_len() + 1) / 2..]
1✔
584
            [..self.seq_len()]
1✔
585
    }
586

587
    /// Look up an auxiliary field by its tag.
588
    ///
589
    /// Only the first two bytes of a given tag are used for the look-up of a field.
590
    /// See [`Aux`] for more details.
591
    pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
3✔
592
        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
3✔
593
        let aux = unsafe {
594
            htslib::bam_aux_get(
595
                &self.inner as *const htslib::bam1_t,
3✔
596
                c_str.as_ptr() as *mut c_char,
6✔
597
            )
598
        };
599
        unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
9✔
600
    }
601

602
    unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
3✔
603
        const TAG_LEN: isize = 2;
604
        // Used for skipping type identifier
605
        const TYPE_ID_LEN: isize = 1;
606

607
        if aux.is_null() {
3✔
608
            return Err(Error::BamAuxTagNotFound);
3✔
609
        }
610

611
        let (data, type_size) = match *aux {
6✔
612
            b'A' => {
×
613
                let type_size = size_of::<u8>();
×
614
                (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
×
615
            }
616
            b'c' => {
×
617
                let type_size = size_of::<i8>();
1✔
618
                (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
1✔
619
            }
620
            b'C' => {
×
621
                let type_size = size_of::<u8>();
1✔
622
                (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
1✔
623
            }
624
            b's' => {
×
625
                let type_size = size_of::<i16>();
1✔
626
                (
627
                    Aux::I16(
1✔
628
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
629
                            .read_i16::<LittleEndian>()
×
630
                            .map_err(|_| Error::BamAuxParsingError)?,
×
631
                    ),
632
                    type_size,
×
633
                )
634
            }
635
            b'S' => {
×
636
                let type_size = size_of::<u16>();
1✔
637
                (
638
                    Aux::U16(
1✔
639
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
640
                            .read_u16::<LittleEndian>()
×
641
                            .map_err(|_| Error::BamAuxParsingError)?,
×
642
                    ),
643
                    type_size,
×
644
                )
645
            }
646
            b'i' => {
×
647
                let type_size = size_of::<i32>();
2✔
648
                (
649
                    Aux::I32(
2✔
650
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
4✔
651
                            .read_i32::<LittleEndian>()
×
652
                            .map_err(|_| Error::BamAuxParsingError)?,
×
653
                    ),
654
                    type_size,
×
655
                )
656
            }
657
            b'I' => {
×
658
                let type_size = size_of::<u32>();
1✔
659
                (
660
                    Aux::U32(
1✔
661
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
662
                            .read_u32::<LittleEndian>()
×
663
                            .map_err(|_| Error::BamAuxParsingError)?,
×
664
                    ),
665
                    type_size,
×
666
                )
667
            }
668
            b'f' => {
×
669
                let type_size = size_of::<f32>();
1✔
670
                (
671
                    Aux::Float(
1✔
672
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
673
                            .read_f32::<LittleEndian>()
×
674
                            .map_err(|_| Error::BamAuxParsingError)?,
×
675
                    ),
676
                    type_size,
×
677
                )
678
            }
679
            b'd' => {
×
680
                let type_size = size_of::<f64>();
1✔
681
                (
682
                    Aux::Double(
1✔
683
                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
2✔
684
                            .read_f64::<LittleEndian>()
×
685
                            .map_err(|_| Error::BamAuxParsingError)?,
×
686
                    ),
687
                    type_size,
×
688
                )
689
            }
690
            b'Z' | b'H' => {
×
691
                let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
1✔
692
                let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
2✔
693
                (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
1✔
694
            }
695
            b'B' => {
×
696
                const ARRAY_INNER_TYPE_LEN: isize = 1;
697
                const ARRAY_COUNT_LEN: isize = 4;
698

699
                // Used for skipping metadata
700
                let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
6✔
701

702
                let length =
6✔
703
                    slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
×
704
                        .read_u32::<LittleEndian>()
×
705
                        .map_err(|_| Error::BamAuxParsingError)? as usize;
×
706

707
                // Return tuples of an `Aux` enum and the length of data + metadata in bytes
708
                let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
6✔
709
                    b'c' => (
1✔
710
                        Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
1✔
711
                            aux.offset(array_data_offset),
×
712
                            length,
×
713
                        ))),
714
                        length,
×
715
                    ),
716
                    b'C' => (
1✔
717
                        Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
1✔
718
                            aux.offset(array_data_offset),
×
719
                            length,
×
720
                        ))),
721
                        length,
×
722
                    ),
723
                    b's' => (
1✔
724
                        Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
1✔
725
                            aux.offset(array_data_offset),
×
726
                            length * size_of::<i16>(),
1✔
727
                        ))),
728
                        length * std::mem::size_of::<i16>(),
1✔
729
                    ),
730
                    b'S' => (
1✔
731
                        Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
1✔
732
                            aux.offset(array_data_offset),
×
733
                            length * size_of::<u16>(),
1✔
734
                        ))),
735
                        length * std::mem::size_of::<u16>(),
1✔
736
                    ),
737
                    b'i' => (
1✔
738
                        Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
1✔
739
                            aux.offset(array_data_offset),
×
740
                            length * size_of::<i32>(),
1✔
741
                        ))),
742
                        length * std::mem::size_of::<i32>(),
1✔
743
                    ),
744
                    b'I' => (
1✔
745
                        Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
1✔
746
                            aux.offset(array_data_offset),
×
747
                            length * size_of::<u32>(),
1✔
748
                        ))),
749
                        length * std::mem::size_of::<u32>(),
1✔
750
                    ),
751
                    b'f' => (
3✔
752
                        Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
3✔
753
                            aux.offset(array_data_offset),
×
754
                            length * size_of::<f32>(),
3✔
755
                        ))),
756
                        length * std::mem::size_of::<f32>(),
3✔
757
                    ),
758
                    _ => {
×
759
                        return Err(Error::BamAuxUnknownType);
×
760
                    }
761
                };
762
                (
763
                    array_data,
×
764
                    // Offset: array-specific metadata + array size
765
                    ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
6✔
766
                )
767
            }
768
            _ => {
×
769
                return Err(Error::BamAuxUnknownType);
×
770
            }
771
        };
772

773
        // Offset: metadata + type size
774
        Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
6✔
775
    }
776

777
    /// Returns an iterator over the auxiliary fields of the record.
778
    ///
779
    /// When an error occurs, the `Err` variant will be returned
780
    /// and the iterator will not be able to advance anymore.
781
    pub fn aux_iter(&self) -> AuxIter {
1✔
782
        AuxIter {
783
            // In order to get to the aux data section of a `bam::Record`
784
            // we need to skip fields in front of it
785
            aux: &self.data()[
1✔
786
                // NUL terminated read name:
787
                self.qname_capacity()
788
                // CIGAR (uint32_t):
789
                + self.cigar_len() * std::mem::size_of::<u32>()
790
                // Read sequence (4-bit encoded):
791
                + (self.seq_len() + 1) / 2
792
                // Base qualities (char):
793
                + self.seq_len()..],
794
        }
795
    }
796

797
    /// Add auxiliary data.
798
    pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
3✔
799
        // Don't allow pushing aux data when the given tag is already present in the record.
800
        // `htslib` seems to allow this (for non-array values), which can lead to problems
801
        // since retrieving aux fields consumes &[u8; 2] and yields one field only.
802
        if self.aux(tag).is_ok() {
4✔
803
            return Err(Error::BamAuxTagAlreadyPresent);
1✔
804
        }
805

806
        let ctag = tag.as_ptr() as *mut c_char;
3✔
807
        let ret = unsafe {
808
            match value {
3✔
809
                Aux::Char(v) => htslib::bam_aux_append(
810
                    self.inner_ptr_mut(),
×
811
                    ctag,
812
                    b'A' as c_char,
813
                    size_of::<u8>() as i32,
×
814
                    [v].as_mut_ptr() as *mut u8,
×
815
                ),
816
                Aux::I8(v) => htslib::bam_aux_append(
817
                    self.inner_ptr_mut(),
1✔
818
                    ctag,
819
                    b'c' as c_char,
820
                    size_of::<i8>() as i32,
1✔
821
                    [v].as_mut_ptr() as *mut u8,
1✔
822
                ),
823
                Aux::U8(v) => htslib::bam_aux_append(
824
                    self.inner_ptr_mut(),
1✔
825
                    ctag,
826
                    b'C' as c_char,
827
                    size_of::<u8>() as i32,
1✔
828
                    [v].as_mut_ptr() as *mut u8,
1✔
829
                ),
830
                Aux::I16(v) => htslib::bam_aux_append(
831
                    self.inner_ptr_mut(),
1✔
832
                    ctag,
833
                    b's' as c_char,
834
                    size_of::<i16>() as i32,
1✔
835
                    [v].as_mut_ptr() as *mut u8,
1✔
836
                ),
837
                Aux::U16(v) => htslib::bam_aux_append(
838
                    self.inner_ptr_mut(),
1✔
839
                    ctag,
840
                    b'S' as c_char,
841
                    size_of::<u16>() as i32,
1✔
842
                    [v].as_mut_ptr() as *mut u8,
1✔
843
                ),
844
                Aux::I32(v) => htslib::bam_aux_append(
845
                    self.inner_ptr_mut(),
2✔
846
                    ctag,
847
                    b'i' as c_char,
848
                    size_of::<i32>() as i32,
2✔
849
                    [v].as_mut_ptr() as *mut u8,
2✔
850
                ),
851
                Aux::U32(v) => htslib::bam_aux_append(
852
                    self.inner_ptr_mut(),
1✔
853
                    ctag,
854
                    b'I' as c_char,
855
                    size_of::<u32>() as i32,
1✔
856
                    [v].as_mut_ptr() as *mut u8,
1✔
857
                ),
858
                Aux::Float(v) => htslib::bam_aux_append(
859
                    self.inner_ptr_mut(),
1✔
860
                    ctag,
861
                    b'f' as c_char,
862
                    size_of::<f32>() as i32,
1✔
863
                    [v].as_mut_ptr() as *mut u8,
1✔
864
                ),
865
                // Not part of specs but implemented in `htslib`:
866
                Aux::Double(v) => htslib::bam_aux_append(
867
                    self.inner_ptr_mut(),
1✔
868
                    ctag,
869
                    b'd' as c_char,
870
                    size_of::<f64>() as i32,
1✔
871
                    [v].as_mut_ptr() as *mut u8,
1✔
872
                ),
873
                Aux::String(v) => {
1✔
874
                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
2✔
875
                    htslib::bam_aux_append(
876
                        self.inner_ptr_mut(),
1✔
877
                        ctag,
878
                        b'Z' as c_char,
879
                        (v.len() + 1) as i32,
1✔
880
                        c_str.as_ptr() as *mut u8,
2✔
881
                    )
882
                }
883
                Aux::HexByteArray(v) => {
×
884
                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
×
885
                    htslib::bam_aux_append(
886
                        self.inner_ptr_mut(),
×
887
                        ctag,
888
                        b'H' as c_char,
889
                        (v.len() + 1) as i32,
×
890
                        c_str.as_ptr() as *mut u8,
×
891
                    )
892
                }
893
                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
894
                Aux::ArrayI8(aux_array) => match aux_array {
1✔
895
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
896
                        self.inner_ptr_mut(),
1✔
897
                        ctag,
898
                        b'c',
899
                        inner.len() as u32,
1✔
900
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
901
                    ),
902
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
903
                        self.inner_ptr_mut(),
1✔
904
                        ctag,
905
                        b'c',
906
                        inner.len() as u32,
1✔
907
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
908
                    ),
909
                },
910
                Aux::ArrayU8(aux_array) => match aux_array {
1✔
911
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
912
                        self.inner_ptr_mut(),
1✔
913
                        ctag,
914
                        b'C',
915
                        inner.len() as u32,
1✔
916
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
917
                    ),
918
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
919
                        self.inner_ptr_mut(),
1✔
920
                        ctag,
921
                        b'C',
922
                        inner.len() as u32,
1✔
923
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
924
                    ),
925
                },
926
                Aux::ArrayI16(aux_array) => match aux_array {
1✔
927
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
928
                        self.inner_ptr_mut(),
1✔
929
                        ctag,
930
                        b's',
931
                        inner.len() as u32,
1✔
932
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
933
                    ),
934
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
935
                        self.inner_ptr_mut(),
1✔
936
                        ctag,
937
                        b's',
938
                        inner.len() as u32,
1✔
939
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
940
                    ),
941
                },
942
                Aux::ArrayU16(aux_array) => match aux_array {
1✔
943
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
944
                        self.inner_ptr_mut(),
1✔
945
                        ctag,
946
                        b'S',
947
                        inner.len() as u32,
1✔
948
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
949
                    ),
950
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
951
                        self.inner_ptr_mut(),
1✔
952
                        ctag,
953
                        b'S',
954
                        inner.len() as u32,
1✔
955
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
956
                    ),
957
                },
958
                Aux::ArrayI32(aux_array) => match aux_array {
1✔
959
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
960
                        self.inner_ptr_mut(),
1✔
961
                        ctag,
962
                        b'i',
963
                        inner.len() as u32,
1✔
964
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
965
                    ),
966
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
967
                        self.inner_ptr_mut(),
1✔
968
                        ctag,
969
                        b'i',
970
                        inner.len() as u32,
1✔
971
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
972
                    ),
973
                },
974
                Aux::ArrayU32(aux_array) => match aux_array {
1✔
975
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
976
                        self.inner_ptr_mut(),
1✔
977
                        ctag,
978
                        b'I',
979
                        inner.len() as u32,
1✔
980
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
981
                    ),
982
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
983
                        self.inner_ptr_mut(),
1✔
984
                        ctag,
985
                        b'I',
986
                        inner.len() as u32,
1✔
987
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
988
                    ),
989
                },
990
                Aux::ArrayFloat(aux_array) => match aux_array {
3✔
991
                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
992
                        self.inner_ptr_mut(),
3✔
993
                        ctag,
994
                        b'f',
995
                        inner.len() as u32,
3✔
996
                        inner.slice.as_ptr() as *mut ::libc::c_void,
3✔
997
                    ),
998
                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
999
                        self.inner_ptr_mut(),
1✔
1000
                        ctag,
1001
                        b'f',
1002
                        inner.len() as u32,
1✔
1003
                        inner.slice.as_ptr() as *mut ::libc::c_void,
1✔
1004
                    ),
1005
                },
1006
            }
1007
        };
1008

1009
        if ret < 0 {
6✔
1010
            Err(Error::BamAux)
×
1011
        } else {
1012
            Ok(())
3✔
1013
        }
1014
    }
1015

1016
    // Delete auxiliary tag.
1017
    pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1✔
1018
        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1✔
1019
        let aux = unsafe {
1020
            htslib::bam_aux_get(
1021
                &self.inner as *const htslib::bam1_t,
1✔
1022
                c_str.as_ptr() as *mut c_char,
2✔
1023
            )
1024
        };
1025
        unsafe {
1026
            if aux.is_null() {
2✔
1027
                Err(Error::BamAuxTagNotFound)
1✔
1028
            } else {
1029
                htslib::bam_aux_del(self.inner_ptr_mut(), aux);
2✔
1030
                Ok(())
1✔
1031
            }
1032
        }
1033
    }
1034

1035
    /// Access the base modifications associated with this Record through the MM tag.
1036
    /// Example:
1037
    /// ```
1038
    ///    use rust_htslib::bam::{Read, Reader, Record};
1039
    ///    let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
1040
    ///    let mut mod_count = 0;
1041
    ///    for r in bam.records() {
1042
    ///        let record = r.unwrap();
1043
    ///        if let Ok(mods) = record.basemods_iter() {
1044
    ///            // print metadata for the modifications present in this record
1045
    ///            for mod_code in mods.recorded() {
1046
    ///                if let Ok(mod_metadata) = mods.query_type(*mod_code) {
1047
    ///                    println!("mod found with code {}/{} flags: [{} {} {}]",
1048
    ///                              mod_code, *mod_code as u8 as char,
1049
    ///                              mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char);
1050
    ///                }
1051
    ///            }
1052
    ///
1053
    ///            // iterate over the modifications in this record
1054
    ///            // the modifications are returned as a tuple with the
1055
    ///            // position within SEQ and an hts_base_mod struct
1056
    ///            for res in mods {
1057
    ///                if let Ok( (position, m) ) = res {
1058
    ///                    println!("{} {},{}", position, m.modified_base as u8 as char, m.qual);
1059
    ///                    mod_count += 1;
1060
    ///                }
1061
    ///            }
1062
    ///        };
1063
    ///    }
1064
    ///    assert_eq!(mod_count, 14);
1065
    /// ```
1066
    pub fn basemods_iter(&self) -> Result<BaseModificationsIter> {
2✔
1067
        BaseModificationsIter::new(self)
2✔
1068
    }
1069

1070
    /// An iterator that returns all of the modifications for each position as a vector.
1071
    /// This is useful for the case where multiple possible modifications can be annotated
1072
    /// at a single position (for example a C could be 5-mC or 5-hmC)
1073
    pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter> {
1✔
1074
        BaseModificationsPositionIter::new(self)
1✔
1075
    }
1076

1077
    /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record
1078
    /// is not paired, mates are not mapping to the same contig, or mates start at the
1079
    /// same position.
1080
    pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1✔
1081
        if self.is_paired()
1✔
1082
            && !self.is_unmapped()
1✔
1083
            && !self.is_mate_unmapped()
1✔
1084
            && self.tid() == self.mtid()
1✔
1085
        {
1086
            if self.pos() == self.mpos() {
1✔
1087
                // both reads start at the same position, we cannot decide on the orientation.
1088
                return SequenceReadPairOrientation::None;
×
1089
            }
1090

1091
            let (is_reverse, is_first_in_template, is_mate_reverse) = if self.pos() < self.mpos() {
3✔
1092
                // given record is the left one
1093
                (
1094
                    self.is_reverse(),
1✔
1095
                    self.is_first_in_template(),
1✔
1096
                    self.is_mate_reverse(),
1✔
1097
                )
1098
            } else {
1099
                // given record is the right one
1100
                (
1101
                    self.is_mate_reverse(),
1✔
1102
                    self.is_last_in_template(),
1✔
1103
                    self.is_reverse(),
1✔
1104
                )
1105
            };
1106
            match (is_reverse, is_first_in_template, is_mate_reverse) {
1✔
1107
                (false, false, false) => SequenceReadPairOrientation::F2F1,
×
1108
                (false, false, true) => SequenceReadPairOrientation::F2R1,
×
1109
                (false, true, false) => SequenceReadPairOrientation::F1F2,
×
1110
                (true, false, false) => SequenceReadPairOrientation::R2F1,
×
1111
                (false, true, true) => SequenceReadPairOrientation::F1R2,
1✔
1112
                (true, false, true) => SequenceReadPairOrientation::R2R1,
×
1113
                (true, true, false) => SequenceReadPairOrientation::R1F2,
×
1114
                (true, true, true) => SequenceReadPairOrientation::R1R2,
×
1115
            }
1116
        } else {
1117
            SequenceReadPairOrientation::None
×
1118
        }
1119
    }
1120

1121
    flag!(is_paired, set_paired, unset_paired, 1u16);
1122
    flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1123
    flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1124
    flag!(
1125
        is_mate_unmapped,
1126
        set_mate_unmapped,
1127
        unset_mate_unmapped,
1128
        8u16
1129
    );
1130
    flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1131
    flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1132
    flag!(
1133
        is_first_in_template,
1134
        set_first_in_template,
1135
        unset_first_in_template,
1136
        64u16
1137
    );
1138
    flag!(
1139
        is_last_in_template,
1140
        set_last_in_template,
1141
        unset_last_in_template,
1142
        128u16
1143
    );
1144
    flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1145
    flag!(
1146
        is_quality_check_failed,
1147
        set_quality_check_failed,
1148
        unset_quality_check_failed,
1149
        512u16
1150
    );
1151
    flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1152
    flag!(
1153
        is_supplementary,
1154
        set_supplementary,
1155
        unset_supplementary,
1156
        2048u16
1157
    );
1158
}
1159

1160
impl Drop for Record {
1161
    fn drop(&mut self) {
10✔
1162
        if self.own {
10✔
1163
            unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
9✔
1164
        }
1165
    }
1166
}
1167

1168
impl SequenceRead for Record {
1169
    fn name(&self) -> &[u8] {
×
1170
        self.qname()
×
1171
    }
1172

1173
    fn base(&self, i: usize) -> u8 {
×
1174
        *decode_base_unchecked(encoded_base(self.seq_data(), i))
×
1175
    }
1176

1177
    fn base_qual(&self, i: usize) -> u8 {
×
1178
        self.qual()[i]
×
1179
    }
1180

1181
    fn len(&self) -> usize {
×
1182
        self.seq_len()
×
1183
    }
1184

1185
    fn is_empty(&self) -> bool {
×
1186
        self.len() == 0
×
1187
    }
1188
}
1189

1190
impl genome::AbstractInterval for Record {
1191
    /// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
1192
    fn contig(&self) -> &str {
×
1193
        let tid = self.tid();
×
1194
        if tid < 0 {
×
1195
            panic!("invalid tid, must be at least zero");
×
1196
        }
1197
        str::from_utf8(
1198
            self.header
×
1199
                .as_ref()
1200
                .expect(
1201
                    "header must be set (this is the case if the record has been read from a file)",
1202
                )
1203
                .tid2name(tid as u32),
1204
        )
1205
        .expect("unable to interpret contig name as UTF-8")
1206
    }
1207

1208
    /// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
1209
    fn range(&self) -> ops::Range<genome::Position> {
×
1210
        let end_pos = self
×
1211
            .cigar_cached()
1212
            .expect("cigar has not been cached yet, call cache_cigar() first")
1213
            .end_pos() as u64;
1214

1215
        if self.pos() < 0 {
×
1216
            panic!("invalid position, must be positive")
×
1217
        }
1218

1219
        self.pos() as u64..end_pos
×
1220
    }
1221
}
1222

1223
/// Auxiliary record data
1224
///
1225
/// The specification allows a wide range of types to be stored as an auxiliary data field of a BAM record.
1226
///
1227
/// Please note that the [`Aux::Double`] variant is _not_ part of the specification, but it is supported by `htslib`.
1228
///
1229
/// # Examples
1230
///
1231
/// ```
1232
/// use rust_htslib::{
1233
///     bam,
1234
///     bam::record::{Aux, AuxArray},
1235
///     errors::Error,
1236
/// };
1237
///
1238
/// //Set up BAM record
1239
/// let bam_header = bam::Header::new();
1240
/// let mut record = bam::Record::from_sam(
1241
///     &mut bam::HeaderView::from_header(&bam_header),
1242
///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1243
/// )
1244
/// .unwrap();
1245
///
1246
/// // Add an integer field
1247
/// let aux_integer_field = Aux::I32(1234);
1248
/// record.push_aux(b"XI", aux_integer_field).unwrap();
1249
///
1250
/// match record.aux(b"XI") {
1251
///     Ok(value) => {
1252
///         // Typically, callers expect an aux field to be of a certain type.
1253
///         // If that's not the case, the value can be `match`ed exhaustively.
1254
///         if let Aux::I32(v) = value {
1255
///             assert_eq!(v, 1234);
1256
///         }
1257
///     }
1258
///     Err(e) => {
1259
///         panic!("Error reading aux field: {}", e);
1260
///     }
1261
/// }
1262
///
1263
/// // Add an array field
1264
/// let array_like_data = vec![0.4, 0.3, 0.2, 0.1];
1265
/// let slice_of_data = &array_like_data;
1266
/// let aux_array: AuxArray<f32> = slice_of_data.into();
1267
/// let aux_array_field = Aux::ArrayFloat(aux_array);
1268
/// record.push_aux(b"XA", aux_array_field).unwrap();
1269
///
1270
/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1271
///     let read_array = array.iter().collect::<Vec<_>>();
1272
///     assert_eq!(read_array, array_like_data);
1273
/// } else {
1274
///     panic!("Could not read array data");
1275
/// }
1276
/// ```
1277
#[derive(Debug, PartialEq)]
1278
pub enum Aux<'a> {
1279
    Char(u8),
1280
    I8(i8),
1281
    U8(u8),
1282
    I16(i16),
1283
    U16(u16),
1284
    I32(i32),
1285
    U32(u32),
1286
    Float(f32),
1287
    Double(f64), // Not part of specs but implemented in `htslib`
1288
    String(&'a str),
1289
    HexByteArray(&'a str),
1290
    ArrayI8(AuxArray<'a, i8>),
1291
    ArrayU8(AuxArray<'a, u8>),
1292
    ArrayI16(AuxArray<'a, i16>),
1293
    ArrayU16(AuxArray<'a, u16>),
1294
    ArrayI32(AuxArray<'a, i32>),
1295
    ArrayU32(AuxArray<'a, u32>),
1296
    ArrayFloat(AuxArray<'a, f32>),
1297
}
1298

1299
unsafe impl<'a> Send for Aux<'a> {}
1300
unsafe impl<'a> Sync for Aux<'a> {}
1301

1302
/// Types that can be used in aux arrays.
1303
pub trait AuxArrayElement: Copy {
1304
    fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1305
}
1306

1307
impl AuxArrayElement for i8 {
1308
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1309
        std::io::Cursor::new(bytes).read_i8().ok()
1✔
1310
    }
1311
}
1312
impl AuxArrayElement for u8 {
1313
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1314
        std::io::Cursor::new(bytes).read_u8().ok()
1✔
1315
    }
1316
}
1317
impl AuxArrayElement for i16 {
1318
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1319
        std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1✔
1320
    }
1321
}
1322
impl AuxArrayElement for u16 {
1323
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1324
        std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1✔
1325
    }
1326
}
1327
impl AuxArrayElement for i32 {
1328
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1329
        std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1✔
1330
    }
1331
}
1332
impl AuxArrayElement for u32 {
1333
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1✔
1334
        std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1✔
1335
    }
1336
}
1337
impl AuxArrayElement for f32 {
1338
    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
3✔
1339
        std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
3✔
1340
    }
1341
}
1342

1343
/// Provides access to aux arrays.
1344
///
1345
/// Provides methods to either retrieve single elements or an iterator over the
1346
/// array.
1347
///
1348
/// This type is used for wrapping both, array data that was read from a
1349
/// BAM record and slices of data that are going to be stored in one.
1350
///
1351
/// In order to be able to add an `AuxArray` field to a BAM record, `AuxArray`s
1352
/// can be constructed via the `From` trait which is implemented for all
1353
/// supported types (see [`AuxArrayElement`] for a list).
1354
///
1355
/// # Examples
1356
///
1357
/// ```
1358
/// use rust_htslib::{
1359
///     bam,
1360
///     bam::record::{Aux, AuxArray},
1361
/// };
1362
///
1363
/// //Set up BAM record
1364
/// let bam_header = bam::Header::new();
1365
/// let mut record = bam::Record::from_sam(
1366
///     &mut bam::HeaderView::from_header(&bam_header),
1367
///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1368
/// ).unwrap();
1369
///
1370
/// let data = vec![0.4, 0.3, 0.2, 0.1];
1371
/// let slice_of_data = &data;
1372
/// let aux_array: AuxArray<f32> = slice_of_data.into();
1373
/// let aux_field = Aux::ArrayFloat(aux_array);
1374
/// record.push_aux(b"XA", aux_field);
1375
///
1376
/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1377
///     // Retrieve the second element from the array
1378
///     assert_eq!(array.get(1).unwrap(), 0.3);
1379
///     // Iterate over the array and collect it into a `Vec`
1380
///     let read_array = array.iter().collect::<Vec<_>>();
1381
///     assert_eq!(read_array, data);
1382
/// } else {
1383
///     panic!("Could not read array data");
1384
/// }
1385
/// ```
1386
#[derive(Debug)]
1387
pub enum AuxArray<'a, T> {
1388
    TargetType(AuxArrayTargetType<'a, T>),
1389
    RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1390
}
1391

1392
impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1393
where
1394
    T: AuxArrayElement + PartialEq,
1395
{
1396
    fn eq(&self, other: &AuxArray<'_, T>) -> bool {
7✔
1397
        use AuxArray::*;
1398
        match (self, other) {
7✔
1399
            (TargetType(v), TargetType(v_other)) => v == v_other,
1✔
1400
            (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1✔
1401
            (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
×
1402
            (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
7✔
1403
        }
1404
    }
1405
}
1406

1407
/// Create AuxArrays from slices of allowed target types.
1408
impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1409
where
1410
    I: AuxArrayElement,
1411
    T: AsRef<[I]> + ?Sized,
1412
{
1413
    fn from(src: &'a T) -> Self {
9✔
1414
        AuxArray::TargetType(AuxArrayTargetType {
9✔
1415
            slice: src.as_ref(),
9✔
1416
        })
1417
    }
1418
}
1419

1420
impl<'a, T> AuxArray<'a, T>
1421
where
1422
    T: AuxArrayElement,
1423
{
1424
    /// Returns the element at a position or None if out of bounds.
1425
    pub fn get(&self, index: usize) -> Option<T> {
9✔
1426
        match self {
9✔
1427
            AuxArray::TargetType(v) => v.get(index),
7✔
1428
            AuxArray::RawLeBytes(v) => v.get(index),
9✔
1429
        }
1430
    }
1431

1432
    /// Returns the number of elements in the array.
1433
    pub fn len(&self) -> usize {
9✔
1434
        match self {
9✔
1435
            AuxArray::TargetType(a) => a.len(),
×
1436
            AuxArray::RawLeBytes(a) => a.len(),
9✔
1437
        }
1438
    }
1439

1440
    /// Returns true if the array contains no elements.
1441
    pub fn is_empty(&self) -> bool {
×
1442
        self.len() == 0
×
1443
    }
1444

1445
    /// Returns an iterator over the array.
1446
    pub fn iter(&self) -> AuxArrayIter<T> {
9✔
1447
        AuxArrayIter {
1448
            index: 0,
1449
            array: self,
1450
        }
1451
    }
1452

1453
    /// Create AuxArrays from raw byte slices borrowed from `bam::Record`.
1454
    fn from_bytes(bytes: &'a [u8]) -> Self {
9✔
1455
        Self::RawLeBytes(AuxArrayRawLeBytes {
9✔
1456
            slice: bytes,
×
1457
            phantom_data: PhantomData,
×
1458
        })
1459
    }
1460
}
1461

1462
/// Encapsulates slice of target type.
1463
#[doc(hidden)]
1464
#[derive(Debug, PartialEq)]
1465
pub struct AuxArrayTargetType<'a, T> {
1466
    slice: &'a [T],
1467
}
1468

1469
impl<'a, T> AuxArrayTargetType<'a, T>
1470
where
1471
    T: AuxArrayElement,
1472
{
1473
    fn get(&self, index: usize) -> Option<T> {
7✔
1474
        self.slice.get(index).copied()
7✔
1475
    }
1476

1477
    fn len(&self) -> usize {
9✔
1478
        self.slice.len()
9✔
1479
    }
1480
}
1481

1482
/// Encapsulates slice of raw bytes to prevent it from being accidentally accessed.
1483
#[doc(hidden)]
1484
#[derive(Debug, PartialEq)]
1485
pub struct AuxArrayRawLeBytes<'a, T> {
1486
    slice: &'a [u8],
1487
    phantom_data: PhantomData<T>,
1488
}
1489

1490
impl<'a, T> AuxArrayRawLeBytes<'a, T>
1491
where
1492
    T: AuxArrayElement,
1493
{
1494
    fn get(&self, index: usize) -> Option<T> {
9✔
1495
        let type_size = std::mem::size_of::<T>();
9✔
1496
        if index * type_size + type_size > self.slice.len() {
9✔
1497
            return None;
9✔
1498
        }
1499
        T::from_le_bytes(&self.slice[index * type_size..][..type_size])
18✔
1500
    }
1501

1502
    fn len(&self) -> usize {
9✔
1503
        self.slice.len() / std::mem::size_of::<T>()
9✔
1504
    }
1505
}
1506

1507
/// Aux array iterator
1508
///
1509
/// This struct is created by the [`AuxArray::iter`] method.
1510
pub struct AuxArrayIter<'a, T> {
1511
    index: usize,
1512
    array: &'a AuxArray<'a, T>,
1513
}
1514

1515
impl<T> Iterator for AuxArrayIter<'_, T>
1516
where
1517
    T: AuxArrayElement,
1518
{
1519
    type Item = T;
1520
    fn next(&mut self) -> Option<Self::Item> {
9✔
1521
        let value = self.array.get(self.index);
9✔
1522
        self.index += 1;
9✔
1523
        value
×
1524
    }
1525

1526
    fn size_hint(&self) -> (usize, Option<usize>) {
9✔
1527
        let array_length = self.array.len() - self.index;
9✔
1528
        (array_length, Some(array_length))
9✔
1529
    }
1530
}
1531

1532
/// Auxiliary data iterator
1533
///
1534
/// This struct is created by the [`Record::aux_iter`] method.
1535
///
1536
/// This iterator returns `Result`s that wrap tuples containing
1537
/// a slice which represents the two-byte tag (`&[u8; 2]`) as
1538
/// well as an `Aux` enum that wraps the associated value.
1539
///
1540
/// When an error occurs, the `Err` variant will be returned
1541
/// and the iterator will not be able to advance anymore.
1542
pub struct AuxIter<'a> {
1543
    aux: &'a [u8],
1544
}
1545

1546
impl<'a> Iterator for AuxIter<'a> {
1547
    type Item = Result<(&'a [u8], Aux<'a>)>;
1548

1549
    fn next(&mut self) -> Option<Self::Item> {
1✔
1550
        // We're finished
1551
        if self.aux.is_empty() {
1✔
1552
            return None;
1✔
1553
        }
1554
        // Incomplete aux data
1555
        if (1..=3).contains(&self.aux.len()) {
1✔
1556
            // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1557
            self.aux = &[];
×
1558
            return Some(Err(Error::BamAuxParsingError));
×
1559
        }
1560
        let tag = &self.aux[..2];
2✔
1561
        Some(unsafe {
1✔
1562
            let data_ptr = self.aux[2..].as_ptr();
2✔
1563
            Record::read_aux_field(data_ptr)
1✔
1564
                .map(|(aux, offset)| {
2✔
1565
                    self.aux = &self.aux[offset..];
2✔
1566
                    (tag, aux)
1✔
1567
                })
1568
                .map_err(|e| {
×
1569
                    // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1570
                    self.aux = &[];
×
1571
                    e
×
1572
                })
1573
        })
1574
    }
1575
}
1576

1577
static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1578
static ENCODE_BASE: [u8; 256] = [
1579
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1580
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1581
    1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1582
    15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1583
    13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1584
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1585
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1586
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1587
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1588
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1589
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1590
];
1591

1592
#[inline]
1593
fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
3✔
1594
    (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
3✔
1595
}
1596

1597
#[inline]
1598
unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
×
1599
    (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
×
1600
}
1601

1602
#[inline]
1603
fn decode_base_unchecked(base: u8) -> &'static u8 {
3✔
1604
    unsafe { DECODE_BASE.get_unchecked(base as usize) }
3✔
1605
}
1606

1607
/// The sequence of a record.
1608
#[derive(Debug, Copy, Clone)]
1609
pub struct Seq<'a> {
1610
    pub encoded: &'a [u8],
1611
    len: usize,
1612
}
1613

1614
impl<'a> Seq<'a> {
1615
    /// Return encoded base. Complexity: O(1).
1616
    #[inline]
1617
    pub fn encoded_base(&self, i: usize) -> u8 {
3✔
1618
        encoded_base(self.encoded, i)
3✔
1619
    }
1620

1621
    /// Return encoded base. Complexity: O(1).
1622
    #[inline]
1623
    pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
×
1624
        encoded_base_unchecked(self.encoded, i)
×
1625
    }
1626

1627
    /// Obtain decoded base without performing bounds checking.
1628
    /// Use index based access seq()[i], for checked, safe access.
1629
    /// Complexity: O(1).
1630
    #[inline]
1631
    pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
×
1632
        *decode_base_unchecked(self.encoded_base_unchecked(i))
×
1633
    }
1634

1635
    /// Return decoded sequence. Complexity: O(m) with m being the read length.
1636
    pub fn as_bytes(&self) -> Vec<u8> {
2✔
1637
        (0..self.len()).map(|i| self[i]).collect()
6✔
1638
    }
1639

1640
    /// Return length (in bases) of the sequence.
1641
    pub fn len(&self) -> usize {
2✔
1642
        self.len
2✔
1643
    }
1644

1645
    pub fn is_empty(&self) -> bool {
×
1646
        self.len() == 0
×
1647
    }
1648
}
1649

1650
impl<'a> ops::Index<usize> for Seq<'a> {
1651
    type Output = u8;
1652

1653
    /// Return decoded base at given position within read. Complexity: O(1).
1654
    fn index(&self, index: usize) -> &u8 {
3✔
1655
        decode_base_unchecked(self.encoded_base(index))
3✔
1656
    }
1657
}
1658

1659
unsafe impl<'a> Send for Seq<'a> {}
1660
unsafe impl<'a> Sync for Seq<'a> {}
1661

1662
#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1663
#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1664
pub enum Cigar {
1665
    Match(u32),    // M
1666
    Ins(u32),      // I
1667
    Del(u32),      // D
1668
    RefSkip(u32),  // N
1669
    SoftClip(u32), // S
1670
    HardClip(u32), // H
1671
    Pad(u32),      // P
1672
    Equal(u32),    // =
1673
    Diff(u32),     // X
1674
}
1675

1676
impl Cigar {
1677
    fn encode(self) -> u32 {
1✔
1678
        match self {
1✔
1679
            Cigar::Match(len) => len << 4, // | 0,
1✔
1680
            Cigar::Ins(len) => len << 4 | 1,
1✔
1681
            Cigar::Del(len) => len << 4 | 2,
1✔
1682
            Cigar::RefSkip(len) => len << 4 | 3,
×
1683
            Cigar::SoftClip(len) => len << 4 | 4,
1✔
1684
            Cigar::HardClip(len) => len << 4 | 5,
1✔
1685
            Cigar::Pad(len) => len << 4 | 6,
×
1686
            Cigar::Equal(len) => len << 4 | 7,
1✔
1687
            Cigar::Diff(len) => len << 4 | 8,
1✔
1688
        }
1689
    }
1690

1691
    /// Return the length of the CIGAR.
1692
    pub fn len(self) -> u32 {
2✔
1693
        match self {
2✔
1694
            Cigar::Match(len) => len,
2✔
1695
            Cigar::Ins(len) => len,
1✔
1696
            Cigar::Del(len) => len,
1✔
1697
            Cigar::RefSkip(len) => len,
×
1698
            Cigar::SoftClip(len) => len,
2✔
1699
            Cigar::HardClip(len) => len,
1✔
1700
            Cigar::Pad(len) => len,
×
1701
            Cigar::Equal(len) => len,
1✔
1702
            Cigar::Diff(len) => len,
1✔
1703
        }
1704
    }
1705

1706
    pub fn is_empty(self) -> bool {
×
1707
        self.len() == 0
×
1708
    }
1709

1710
    /// Return the character representing the CIGAR.
1711
    pub fn char(self) -> char {
2✔
1712
        match self {
2✔
1713
            Cigar::Match(_) => 'M',
2✔
1714
            Cigar::Ins(_) => 'I',
1✔
1715
            Cigar::Del(_) => 'D',
1✔
1716
            Cigar::RefSkip(_) => 'N',
×
1717
            Cigar::SoftClip(_) => 'S',
2✔
1718
            Cigar::HardClip(_) => 'H',
1✔
1719
            Cigar::Pad(_) => 'P',
×
1720
            Cigar::Equal(_) => '=',
1✔
1721
            Cigar::Diff(_) => 'X',
1✔
1722
        }
1723
    }
1724
}
1725

1726
impl fmt::Display for Cigar {
1727
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2✔
1728
        fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
6✔
1729
    }
1730
}
1731

1732
unsafe impl Send for Cigar {}
1733
unsafe impl Sync for Cigar {}
1734

1735
custom_derive! {
1736
    /// A CIGAR string. This type wraps around a `Vec<Cigar>`.
1737
    ///
1738
    /// # Example
1739
    ///
1740
    /// ```
1741
    /// use rust_htslib::bam::record::{Cigar, CigarString};
1742
    ///
1743
    /// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
1744
    ///
1745
    /// // access by index
1746
    /// assert_eq!(cigar[0], Cigar::Match(100));
1747
    /// // format into classical string representation
1748
    /// assert_eq!(format!("{}", cigar), "100M10S");
1749
    /// // iterate
1750
    /// for op in &cigar {
1751
    ///    println!("{}", op);
1752
    /// }
1753
    /// ```
1754
    #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
×
1755
    #[derive(NewtypeDeref,
1756
            NewtypeDerefMut,
1757
             NewtypeIndex(usize),
1758
             NewtypeIndexMut(usize),
1759
             NewtypeFrom,
1760
             PartialEq,
1761
             PartialOrd,
1762
             Eq,
1763
             NewtypeDebug,
1764
             Clone,
1765
             Hash
1766
    )]
1767
    pub struct CigarString(pub Vec<Cigar>);
3✔
1768
}
1769

1770
impl CigarString {
1771
    /// Create a `CigarStringView` from this CigarString at position `pos`
1772
    pub fn into_view(self, pos: i64) -> CigarStringView {
1✔
1773
        CigarStringView::new(self, pos)
1✔
1774
    }
1775

1776
    /// Calculate the bam cigar from the alignment struct. x is the target string
1777
    /// and y is the reference. `hard_clip` controls how unaligned read bases are encoded in the
1778
    /// cigar string. Set to true to use the hard clip (`H`) code, or false to use soft clip
1779
    /// (`S`) code. See the [SAM spec](https://samtools.github.io/hts-specs/SAMv1.pdf) for more details.
1780
    pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
1✔
1781
        match alignment.mode {
1✔
1782
            AlignmentMode::Global => {
1783
                panic!(" Bam cigar fn not supported for Global Alignment mode")
×
1784
            }
1785
            AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
×
1786
            _ => {}
1787
        }
1788

1789
        let mut cigar = Vec::new();
1✔
1790
        if alignment.operations.is_empty() {
2✔
1791
            return CigarString(cigar);
×
1792
        }
1793

1794
        let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
2✔
1795
            AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
1✔
1796
            AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
1✔
1797
            AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
1✔
1798
            AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
1✔
1799
            _ => {}
1800
        };
1801

1802
        if alignment.xstart > 0 {
1✔
1803
            cigar.push(if hard_clip {
3✔
1804
                Cigar::HardClip(alignment.xstart as u32)
×
1805
            } else {
1806
                Cigar::SoftClip(alignment.xstart as u32)
1✔
1807
            });
1808
        }
1809

1810
        let mut last = alignment.operations[0];
2✔
1811
        let mut k = 1u32;
1✔
1812
        for &op in alignment.operations[1..].iter() {
3✔
1813
            if op == last {
3✔
1814
                k += 1;
2✔
1815
            } else {
1816
                add_op(last, k, &mut cigar);
1✔
1817
                k = 1;
1✔
1818
            }
1819
            last = op;
1✔
1820
        }
1821
        add_op(last, k, &mut cigar);
1✔
1822
        if alignment.xlen > alignment.xend {
1✔
1823
            cigar.push(if hard_clip {
5✔
1824
                Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
2✔
1825
            } else {
1826
                Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
2✔
1827
            });
1828
        }
1829

1830
        CigarString(cigar)
1✔
1831
    }
1832
}
1833

1834
impl TryFrom<&[u8]> for CigarString {
1835
    type Error = Error;
1836

1837
    /// Create a CigarString from given &[u8].
1838
    /// # Example
1839
    /// ```
1840
    /// use rust_htslib::bam::record::*;
1841
    /// use rust_htslib::bam::record::CigarString;
1842
    /// use rust_htslib::bam::record::Cigar::*;
1843
    /// use std::convert::TryFrom;
1844
    ///
1845
    /// let cigar_str = "2H10M5X3=2H".as_bytes();
1846
    /// let cigar = CigarString::try_from(cigar_str)
1847
    ///     .expect("Unable to parse cigar string.");
1848
    /// let expected_cigar = CigarString(vec![
1849
    ///     HardClip(2),
1850
    ///     Match(10),
1851
    ///     Diff(5),
1852
    ///     Equal(3),
1853
    ///     HardClip(2),
1854
    /// ]);
1855
    /// assert_eq!(cigar, expected_cigar);
1856
    /// ```
1857
    fn try_from(bytes: &[u8]) -> Result<Self> {
3✔
1858
        let mut inner = Vec::new();
3✔
1859
        let mut i = 0;
3✔
1860
        let text_len = bytes.len();
3✔
1861
        while i < text_len {
6✔
1862
            let mut j = i;
3✔
1863
            while j < text_len && bytes[j].is_ascii_digit() {
15✔
1864
                j += 1;
6✔
1865
            }
1866
            // check that length is provided
1867
            if i == j {
3✔
1868
                return Err(Error::BamParseCigar {
×
1869
                    msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
×
1870
                });
1871
            }
1872
            // get the length of the operation
1873
            let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
6✔
1874
                msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
×
1875
            })?;
1876
            let n = s.parse().map_err(|_| Error::BamParseCigar {
6✔
1877
                msg: format!("Unable to parse &str '{:?}' to u32.", s),
×
1878
            })?;
1879
            // get the operation
1880
            let op = &bytes[j];
6✔
1881
            inner.push(match op {
6✔
1882
                b'M' => Cigar::Match(n),
3✔
1883
                b'I' => Cigar::Ins(n),
1✔
1884
                b'D' => Cigar::Del(n),
1✔
1885
                b'N' => Cigar::RefSkip(n),
1✔
1886
                b'H' => {
1887
                    if i == 0 || j + 1 == text_len {
6✔
1888
                        Cigar::HardClip(n)
3✔
1889
                    } else {
1890
                        return Err(Error::BamParseCigar {
×
1891
                            msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
×
1892
                                .to_owned(),
1893
                        });
1894
                    }
1895
                }
1896
                b'S' => {
1897
                    if i == 0
1✔
1898
                        || j + 1 == text_len
1✔
1899
                        || bytes[i-1] == b'H'
1✔
1900
                        || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
3✔
1901
                        Cigar::SoftClip(n)
1✔
1902
                    } else {
1903
                        return Err(Error::BamParseCigar {
×
1904
                        msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
×
1905
                            .to_owned(),
1906
                        });
1907
                    }
1908
                },
1909
                b'P' => Cigar::Pad(n),
1✔
1910
                b'=' => Cigar::Equal(n),
3✔
1911
                b'X' => Cigar::Diff(n),
3✔
1912
                op => {
×
1913
                    return Err(Error::BamParseCigar {
×
1914
                        msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
×
1915
                    })
1916
                }
1917
            });
1918
            i = j + 1;
3✔
1919
        }
1920
        Ok(CigarString(inner))
3✔
1921
    }
1922
}
1923

1924
impl TryFrom<&str> for CigarString {
1925
    type Error = Error;
1926

1927
    /// Create a CigarString from given &str.
1928
    /// # Example
1929
    /// ```
1930
    /// use rust_htslib::bam::record::*;
1931
    /// use rust_htslib::bam::record::CigarString;
1932
    /// use rust_htslib::bam::record::Cigar::*;
1933
    /// use std::convert::TryFrom;
1934
    ///
1935
    /// let cigar_str = "2H10M5X3=2H";
1936
    /// let cigar = CigarString::try_from(cigar_str)
1937
    ///     .expect("Unable to parse cigar string.");
1938
    /// let expected_cigar = CigarString(vec![
1939
    ///     HardClip(2),
1940
    ///     Match(10),
1941
    ///     Diff(5),
1942
    ///     Equal(3),
1943
    ///     HardClip(2),
1944
    /// ]);
1945
    /// assert_eq!(cigar, expected_cigar);
1946
    /// ```
1947
    fn try_from(text: &str) -> Result<Self> {
2✔
1948
        let bytes = text.as_bytes();
2✔
1949
        if text.chars().count() != bytes.len() {
2✔
1950
            return Err(Error::BamParseCigar {
1✔
1951
                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
1✔
1952
            });
1953
        }
1954
        CigarString::try_from(bytes)
2✔
1955
    }
1956
}
1957

1958
impl<'a> CigarString {
1959
    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
1✔
1960
        self.into_iter()
1✔
1961
    }
1962
}
1963

1964
impl<'a> IntoIterator for &'a CigarString {
1965
    type Item = &'a Cigar;
1966
    type IntoIter = ::std::slice::Iter<'a, Cigar>;
1967

1968
    fn into_iter(self) -> Self::IntoIter {
2✔
1969
        self.0.iter()
2✔
1970
    }
1971
}
1972

1973
impl fmt::Display for CigarString {
1974
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2✔
1975
        for op in self {
4✔
1976
            fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
6✔
1977
        }
1978
        Ok(())
2✔
1979
    }
1980
}
1981

1982
// Get number of leading/trailing softclips if a CigarString taking hardclips into account
1983
fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2✔
1984
    match (cigar.next(), cigar.next()) {
8✔
1985
        (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
4✔
1986
            *s as i64
2✔
1987
        }
1988
        _ => 0,
×
1989
    }
1990
}
1991

1992
#[derive(Eq, PartialEq, Clone, Debug)]
1993
pub struct CigarStringView {
1994
    inner: CigarString,
1995
    pos: i64,
1996
}
1997

1998
impl CigarStringView {
1999
    /// Construct a new CigarStringView from a CigarString at a position
2000
    pub fn new(c: CigarString, pos: i64) -> CigarStringView {
1✔
2001
        CigarStringView { inner: c, pos }
2002
    }
2003

2004
    /// Get (exclusive) end position of alignment.
2005
    pub fn end_pos(&self) -> i64 {
1✔
2006
        let mut pos = self.pos;
1✔
2007
        for c in self {
2✔
2008
            match c {
3✔
UNCOV
2009
                Cigar::Match(l)
×
2010
                | Cigar::RefSkip(l)
2011
                | Cigar::Del(l)
2012
                | Cigar::Equal(l)
2013
                | Cigar::Diff(l) => pos += *l as i64,
2014
                // these don't add to end_pos on reference
2015
                Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2016
            }
2017
        }
2018
        pos
1✔
2019
    }
2020

2021
    /// Get the start position of the alignment (0-based).
2022
    pub fn pos(&self) -> i64 {
1✔
2023
        self.pos
1✔
2024
    }
2025

2026
    /// Get number of bases softclipped at the beginning of the alignment.
2027
    pub fn leading_softclips(&self) -> i64 {
1✔
2028
        calc_softclips(self.iter())
1✔
2029
    }
2030

2031
    /// Get number of bases softclipped at the end of the alignment.
2032
    pub fn trailing_softclips(&self) -> i64 {
1✔
2033
        calc_softclips(self.iter().rev())
1✔
2034
    }
2035

2036
    /// Get number of bases hardclipped at the beginning of the alignment.
2037
    pub fn leading_hardclips(&self) -> i64 {
×
2038
        self.first().map_or(0, |cigar| {
×
2039
            if let Cigar::HardClip(s) = cigar {
×
2040
                *s as i64
×
2041
            } else {
2042
                0
×
2043
            }
2044
        })
2045
    }
2046

2047
    /// Get number of bases hardclipped at the end of the alignment.
2048
    pub fn trailing_hardclips(&self) -> i64 {
×
2049
        self.last().map_or(0, |cigar| {
×
2050
            if let Cigar::HardClip(s) = cigar {
×
2051
                *s as i64
×
2052
            } else {
2053
                0
×
2054
            }
2055
        })
2056
    }
2057

2058
    /// For a given position in the reference, get corresponding position within read.
2059
    /// If reference position is outside of the read alignment, return None.
2060
    ///
2061
    /// # Arguments
2062
    ///
2063
    /// * `ref_pos` - the reference position
2064
    /// * `include_softclips` - if true, softclips will be considered as matches or mismatches
2065
    /// * `include_dels` - if true, positions within deletions will be considered (first reference matching read position after deletion will be returned)
2066
    ///
2067
    pub fn read_pos(
1✔
2068
        &self,
2069
        ref_pos: u32,
2070
        include_softclips: bool,
2071
        include_dels: bool,
2072
    ) -> Result<Option<u32>> {
2073
        let mut rpos = self.pos as u32; // reference position
1✔
2074
        let mut qpos = 0u32; // position within read
1✔
2075
        let mut j = 0; // index into cigar operation vector
1✔
2076

2077
        // find first cigar operation referring to qpos = 0 (and thus bases in record.seq()),
2078
        // because all augmentations of qpos and rpos before that are invalid
2079
        for (i, c) in self.iter().enumerate() {
2✔
2080
            match c {
1✔
2081
                Cigar::Match(_) |
2082
                Cigar::Diff(_)  |
2083
                Cigar::Equal(_) |
2084
                // this is unexpected, but bwa + GATK indel realignment can produce insertions
2085
                // before matching positions
2086
                Cigar::Ins(_) => {
2087
                    j = i;
1✔
2088
                    break;
1✔
2089
                },
2090
                Cigar::SoftClip(l) => {
1✔
2091
                    j = i;
1✔
2092
                    if include_softclips {
2✔
2093
                        // Alignment starts with softclip and we want to include it in the
2094
                        // projection of the reference position. However, the POS field does not
2095
                        // include the softclip. Hence we have to subtract its length.
2096
                        rpos = rpos.saturating_sub(*l);
2✔
2097
                    }
2098
                    break;
2099
                },
NEW
2100
                Cigar::Del(l) => {
×
2101
                    // METHOD: leading deletions can happen in case of trimmed reads where
2102
                    // a primer has been removed AFTER read mapping.
2103
                    // Example: 24M8I8D18M9S before trimming, 32H8D18M9S after trimming
2104
                    // with fgbio. While leading deletions should be impossible with
2105
                    // normal read mapping, they make perfect sense with primer trimming
2106
                    // because the mapper still had the evidence to decide in favor of
2107
                    // the deletion via the primer sequence.
NEW
2108
                    rpos += l;
×
2109
                },
2110
                Cigar::RefSkip(_) => {
2111
                    return Err(Error::BamUnexpectedCigarOperation {
×
2112
                        msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
×
2113
                    });
2114
                },
2115
                Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2✔
2116
                    return Err(Error::BamUnexpectedCigarOperation{
1✔
2117
                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
1✔
2118
                    });
2119
                },
2120
                // if we have reached the end of the CigarString with only pads and hard clips, we have no read position matching the variant
2121
                Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
3✔
2122
                // skip leading HardClips and Pads, as they consume neither read sequence nor reference sequence
2123
                Cigar::Pad(_) | Cigar::HardClip(_) => ()
2124
            }
2125
        }
2126

2127
        let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2✔
2128
            cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2✔
2129
        };
2130

2131
        while rpos <= ref_pos && j < self.len() {
3✔
2132
            match self[j] {
9✔
2133
                // potential SNV evidence
2134
                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
5✔
2135
                    // difference between desired position and first position of current cigar
2136
                    // operation
2137
                    qpos += ref_pos - rpos;
2✔
2138
                    return Ok(Some(qpos));
1✔
2139
                }
2140
                Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2✔
2141
                    qpos += ref_pos - rpos;
2✔
2142
                    return Ok(Some(qpos));
1✔
2143
                }
2144
                Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2✔
2145
                    // qpos shall resemble the start of the deletion
2146
                    return Ok(Some(qpos));
1✔
2147
                }
2148
                // for others, just increase pos and qpos as needed
2149
                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
3✔
2150
                    rpos += l;
2✔
2151
                    qpos += l;
2✔
2152
                    j += 1;
2✔
2153
                }
2154
                Cigar::SoftClip(l) => {
1✔
2155
                    qpos += l;
2✔
2156
                    j += 1;
2✔
2157
                    if include_softclips {
2✔
2158
                        rpos += l;
1✔
2159
                    }
2160
                }
2161
                Cigar::Ins(l) => {
1✔
2162
                    qpos += l;
2✔
2163
                    j += 1;
2✔
2164
                }
2165
                Cigar::RefSkip(l) | Cigar::Del(l) => {
2✔
2166
                    rpos += l;
1✔
2167
                    j += 1;
2✔
2168
                }
2169
                Cigar::Pad(_) => {
1✔
2170
                    j += 1;
2✔
2171
                }
2172
                Cigar::HardClip(_) if j < self.len() - 1 => {
2✔
2173
                    return Err(Error::BamUnexpectedCigarOperation{
1✔
2174
                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
1✔
2175
                    });
2176
                }
2177
                Cigar::HardClip(_) => return Ok(None),
×
2178
            }
2179
        }
2180

2181
        Ok(None)
1✔
2182
    }
2183

2184
    /// transfer ownership of the Cigar out of the CigarView
2185
    pub fn take(self) -> CigarString {
1✔
2186
        self.inner
1✔
2187
    }
2188
}
2189

2190
impl ops::Deref for CigarStringView {
2191
    type Target = CigarString;
2192

2193
    fn deref(&self) -> &CigarString {
1✔
2194
        &self.inner
2195
    }
2196
}
2197

2198
impl ops::Index<usize> for CigarStringView {
2199
    type Output = Cigar;
2200

2201
    fn index(&self, index: usize) -> &Cigar {
1✔
2202
        self.inner.index(index)
1✔
2203
    }
2204
}
2205

2206
impl ops::IndexMut<usize> for CigarStringView {
2207
    fn index_mut(&mut self, index: usize) -> &mut Cigar {
×
2208
        self.inner.index_mut(index)
×
2209
    }
2210
}
2211

2212
impl<'a> CigarStringView {
2213
    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
1✔
2214
        self.inner.into_iter()
1✔
2215
    }
2216
}
2217

2218
impl<'a> IntoIterator for &'a CigarStringView {
2219
    type Item = &'a Cigar;
2220
    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2221

2222
    fn into_iter(self) -> Self::IntoIter {
1✔
2223
        self.inner.into_iter()
1✔
2224
    }
2225
}
2226

2227
impl fmt::Display for CigarStringView {
2228
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
×
2229
        self.inner.fmt(fmt)
×
2230
    }
2231
}
2232

2233
pub struct BaseModificationMetadata {
2234
    pub strand: i32,
2235
    pub implicit: i32,
2236
    pub canonical: u8,
2237
}
2238

2239
/// struct containing the internal state required to access
2240
/// the base modifications for a bam::Record
2241
pub struct BaseModificationState<'a> {
2242
    record: &'a Record,
2243
    state: *mut htslib::hts_base_mod_state,
2244
    buffer: Vec<htslib::hts_base_mod>,
2245
    buffer_pos: i32,
2246
}
2247

2248
impl BaseModificationState<'_> {
2249
    /// Initialize a new BaseModification struct from a bam::Record
2250
    /// This function allocates memory for the state structure
2251
    /// and initializes the iterator to the start of the modification
2252
    /// records.
2253
    fn new<'a>(r: &'a Record) -> Result<BaseModificationState<'a>> {
2✔
2254
        let mut bm = unsafe {
2255
            BaseModificationState {
2256
                record: r,
2257
                state: hts_sys::hts_base_mod_state_alloc(),
2✔
2258
                buffer: Vec::new(),
2✔
2259
                buffer_pos: -1,
×
2260
            }
2261
        };
2262

2263
        if bm.state.is_null() {
4✔
2264
            panic!("Unable to allocate memory for hts_base_mod_state");
×
2265
        }
2266

2267
        // parse the MM tag to initialize the state
2268
        unsafe {
2269
            let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
4✔
2270
            if ret != 0 {
2✔
2271
                return Err(Error::BamBaseModificationTagNotFound);
×
2272
            }
2273
        }
2274

2275
        let types = bm.recorded();
4✔
2276
        bm.buffer.reserve(types.len());
2✔
2277
        return Ok(bm);
2✔
2278
    }
2279

2280
    pub fn buffer_next_mods(&mut self) -> Result<usize> {
2✔
2281
        unsafe {
2282
            let ret = hts_sys::bam_next_basemod(
2283
                self.record.inner_ptr(),
2✔
2284
                self.state,
2✔
2285
                self.buffer.as_mut_ptr(),
2✔
2286
                self.buffer.capacity() as i32,
2✔
2287
                &mut self.buffer_pos,
2✔
2288
            );
2289

2290
            if ret < 0 {
2✔
2291
                return Err(Error::BamBaseModificationIterationFailed);
×
2292
            }
2293

2294
            // the htslib API won't write more than buffer.capacity() mods to the output array but it will
2295
            // return the actual number of modifications found. We return an error to the caller
2296
            // in the case where there was insufficient storage to return all mods.
2297
            if ret as usize > self.buffer.capacity() {
2✔
2298
                return Err(Error::BamBaseModificationTooManyMods);
×
2299
            }
2300

2301
            // we read the modifications directly into the vector, which does
2302
            // not update the length so needs to be manually set
2303
            self.buffer.set_len(ret as usize);
2✔
2304

2305
            return Ok(ret as usize);
2✔
2306
        }
2307
    }
2308

2309
    /// Return an array containing the modification codes listed for this record.
2310
    /// Positive values are ascii character codes (eg m), negative values are chEBI codes.
2311
    pub fn recorded<'a>(&self) -> &'a [i32] {
2✔
2312
        unsafe {
2313
            let mut n: i32 = 0;
2✔
2314
            let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2✔
2315

2316
            // htslib should not return a null pointer, even when there are no base mods
2317
            if data_ptr.is_null() {
2✔
2318
                panic!("Unable to obtain pointer to base modifications");
×
2319
            }
2320
            assert!(n >= 0);
2✔
2321
            return slice::from_raw_parts(data_ptr, n as usize);
2✔
2322
        }
2323
    }
2324

2325
    /// Return metadata for the specified character code indicating the strand
2326
    /// the base modification was called on, whether the tag uses implicit mode
2327
    /// and the ascii code for the canonical base.
2328
    /// If there are multiple modifications with the same code this will return the data
2329
    /// for the first mod.  See https://github.com/samtools/htslib/issues/1635
2330
    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2✔
2331
        unsafe {
2332
            let mut strand: i32 = 0;
2✔
2333
            let mut implicit: i32 = 0;
2✔
2334
            // This may be i8 or u8 in hts_sys.
2335
            let mut canonical: c_char = 0;
2✔
2336

2337
            let ret = hts_sys::bam_mods_query_type(
2338
                self.state,
2✔
2339
                code,
×
2340
                &mut strand,
×
2341
                &mut implicit,
×
2342
                &mut canonical,
×
2343
            );
2344
            if ret == -1 {
2✔
2345
                return Err(Error::BamBaseModificationTypeNotFound);
×
2346
            } else {
2347
                return Ok(BaseModificationMetadata {
2✔
2348
                    strand,
2✔
2349
                    implicit,
2✔
2350
                    canonical: canonical.try_into().unwrap(),
2✔
2351
                });
2352
            }
2353
        }
2354
    }
2355
}
2356

2357
impl Drop for BaseModificationState<'_> {
2358
    fn drop<'a>(&mut self) {
2✔
2359
        unsafe {
2360
            hts_sys::hts_base_mod_state_free(self.state);
2✔
2361
        }
2362
    }
2363
}
2364

2365
/// Iterator over the base modifications that returns
2366
/// a vector for all of the mods at each position
2367
pub struct BaseModificationsPositionIter<'a> {
2368
    mod_state: BaseModificationState<'a>,
2369
}
2370

2371
impl BaseModificationsPositionIter<'_> {
2372
    fn new<'a>(r: &'a Record) -> Result<BaseModificationsPositionIter<'a>> {
1✔
2373
        let state = BaseModificationState::new(r)?;
1✔
2374
        Ok(BaseModificationsPositionIter { mod_state: state })
1✔
2375
    }
2376

2377
    pub fn recorded<'a>(&self) -> &'a [i32] {
×
2378
        return self.mod_state.recorded();
×
2379
    }
2380

2381
    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
×
2382
        return self.mod_state.query_type(code);
×
2383
    }
2384
}
2385

2386
impl<'a> Iterator for BaseModificationsPositionIter<'a> {
2387
    type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2388

2389
    fn next(&mut self) -> Option<Self::Item> {
1✔
2390
        let ret = self.mod_state.buffer_next_mods();
1✔
2391

2392
        // Three possible things happened in buffer_next_mods:
2393
        // 1. the htslib API call was successful but there are no more mods
2394
        // 2. ths htslib API call was successful and we read some mods
2395
        // 3. the htslib API call failed, we propogate the error wrapped in an option
2396
        match ret {
1✔
2397
            Ok(num_mods) => {
1✔
2398
                if num_mods == 0 {
1✔
2399
                    return None;
1✔
2400
                } else {
2401
                    let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
1✔
2402
                    return Some(Ok(data));
1✔
2403
                }
2404
            }
2405
            Err(e) => return Some(Err(e)),
×
2406
        }
2407
    }
2408
}
2409

2410
/// Iterator over the base modifications that returns
2411
/// the next modification found, one by one
2412
pub struct BaseModificationsIter<'a> {
2413
    mod_state: BaseModificationState<'a>,
2414
    buffer_idx: usize,
2415
}
2416

2417
impl BaseModificationsIter<'_> {
2418
    fn new<'a>(r: &'a Record) -> Result<BaseModificationsIter<'a>> {
2✔
2419
        let state = BaseModificationState::new(r)?;
2✔
2420
        Ok(BaseModificationsIter {
2✔
2421
            mod_state: state,
×
2422
            buffer_idx: 0,
×
2423
        })
2424
    }
2425

2426
    pub fn recorded<'a>(&self) -> &'a [i32] {
2✔
2427
        return self.mod_state.recorded();
2✔
2428
    }
2429

2430
    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2✔
2431
        return self.mod_state.query_type(code);
2✔
2432
    }
2433
}
2434

2435
impl<'a> Iterator for BaseModificationsIter<'a> {
2436
    type Item = Result<(i32, hts_sys::hts_base_mod)>;
2437

2438
    fn next(&mut self) -> Option<Self::Item> {
2✔
2439
        if self.buffer_idx == self.mod_state.buffer.len() {
4✔
2440
            // need to use the internal state to read the next
2441
            // set of modifications into the buffer
2442
            let ret = self.mod_state.buffer_next_mods();
2✔
2443

2444
            match ret {
2✔
2445
                Ok(num_mods) => {
2✔
2446
                    if num_mods == 0 {
2✔
2447
                        // done iterating
2448
                        return None;
2✔
2449
                    } else {
2450
                        // we read some mods, reset the position in the buffer then fall through
2451
                        self.buffer_idx = 0;
2✔
2452
                    }
2453
                }
2454
                Err(e) => return Some(Err(e)),
×
2455
            }
2456
        }
2457

2458
        // if we got here when there are mods buffered that we haven't emitted yet
2459
        assert!(self.buffer_idx < self.mod_state.buffer.len());
2✔
2460
        let data = (
2✔
2461
            self.mod_state.buffer_pos,
2✔
2462
            self.mod_state.buffer[self.buffer_idx],
2✔
2463
        );
2464
        self.buffer_idx += 1;
2✔
2465
        return Some(Ok(data));
2✔
2466
    }
2467
}
2468

2469
#[cfg(test)]
2470
mod tests {
2471
    use super::*;
2472

2473
    #[test]
2474
    fn test_cigar_string() {
2475
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2476

2477
        assert_eq!(cigar[0], Cigar::Match(100));
2478
        assert_eq!(format!("{}", cigar), "100M10S");
2479
        for op in &cigar {
2480
            println!("{}", op);
2481
        }
2482
    }
2483

2484
    #[test]
2485
    fn test_cigar_string_view_pos() {
2486
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2487
        assert_eq!(cigar.pos(), 5);
2488
    }
2489

2490
    #[test]
2491
    fn test_cigar_string_leading_softclips() {
2492
        let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2493
        assert_eq!(cigar.leading_softclips(), 10);
2494
        let cigar2 = CigarString(vec![
2495
            Cigar::HardClip(5),
2496
            Cigar::SoftClip(10),
2497
            Cigar::Match(100),
2498
        ])
2499
        .into_view(0);
2500
        assert_eq!(cigar2.leading_softclips(), 10);
2501
    }
2502

2503
    #[test]
2504
    fn test_cigar_string_trailing_softclips() {
2505
        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2506
        assert_eq!(cigar.trailing_softclips(), 10);
2507
        let cigar2 = CigarString(vec![
2508
            Cigar::Match(100),
2509
            Cigar::SoftClip(10),
2510
            Cigar::HardClip(5),
2511
        ])
2512
        .into_view(0);
2513
        assert_eq!(cigar2.trailing_softclips(), 10);
2514
    }
2515

2516
    #[test]
2517
    fn test_cigar_read_pos() {
2518
        let vpos = 5; // variant position
2519

2520
        // Ignore leading HardClip
2521
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2522
        // var:                       V
2523
        // c01: 7H                 M  M
2524
        // qpos:                  00 01
2525
        let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2526
        assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2527

2528
        // Skip leading SoftClip or use as pre-POS matches
2529
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2530
        // var:                       V
2531
        // c02: 5H2S         M  M  M  M  M  M
2532
        // qpos:  00        02 03 04 05 06 07
2533
        // c02: 5H     S  S  M  M  M  M  M  M
2534
        // qpos:      00 01 02 03 04 05 06 07
2535
        let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2536
        assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2537
        assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2538

2539
        // Skip leading SoftClip returning None for unmatched reference positiong or use as
2540
        // pre-POS matches
2541
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2542
        // var:                       V
2543
        // c03:  3S                      M  M
2544
        // qpos: 00                     03 04
2545
        // c03:                 S  S  S  M  M
2546
        // qpos:               00 01 02 03 04
2547
        let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2548
        assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2549
        assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2550

2551
        // Skip leading Insertion before variant position
2552
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2553
        // var:                       V
2554
        // c04:  3I                X  X  X
2555
        // qpos: 00               03 04 05
2556
        let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2557
        assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2558

2559
        // Matches and deletion before variant position
2560
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2561
        // var:                       V
2562
        // c05:        =  =  D  D  X  =  =
2563
        // qpos:      00 01       02 03 04 05
2564
        let c05 = CigarString(vec![
2565
            Cigar::Equal(2),
2566
            Cigar::Del(2),
2567
            Cigar::Diff(1),
2568
            Cigar::Equal(2),
2569
        ])
2570
        .into_view(0);
2571
        assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2572

2573
        // single nucleotide Deletion covering variant position
2574
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2575
        // var:                       V
2576
        // c06:                 =  =  D  X  X
2577
        // qpos:               00 01    02 03
2578
        let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2579
        assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2580
        assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2581

2582
        // three nucleotide Deletion covering variant position
2583
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2584
        // var:                       V
2585
        // c07:              =  =  D  D  D  M  M
2586
        // qpos:            00 01          02 03
2587
        let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2588
        assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2589
        assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2590

2591
        // three nucleotide RefSkip covering variant position
2592
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2593
        // var:                       V
2594
        // c08:              =  X  N  N  N  M  M
2595
        // qpos:            00 01          02 03
2596
        let c08 = CigarString(vec![
2597
            Cigar::Equal(1),
2598
            Cigar::Diff(1),
2599
            Cigar::RefSkip(3),
2600
            Cigar::Match(2),
2601
        ])
2602
        .into_view(2);
2603
        assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2604
        assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2605

2606
        // internal hard clip before variant pos
2607
        // ref:       00 01 02 03    04 05 06 07 08 09 10 11 12 13 14 15
2608
        // var:                          V
2609
        // c09: 3H           =  = 3H  =  =
2610
        // qpos:            00 01    02 03
2611
        let c09 = CigarString(vec![
2612
            Cigar::HardClip(3),
2613
            Cigar::Equal(2),
2614
            Cigar::HardClip(3),
2615
            Cigar::Equal(2),
2616
        ])
2617
        .into_view(2);
2618
        assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2619

2620
        // Deletion right before variant position
2621
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2622
        // var:                       V
2623
        // c10:           M  M  D  D  M  M
2624
        // qpos:         00 01       02 03
2625
        let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2626
        assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2627

2628
        // Insertion right before variant position
2629
        // ref:       00 01 02 03 04    05 06 07 08 09 10 11 12 13 14 15
2630
        // var:                          V
2631
        // c11:                 M  M 3I  M
2632
        // qpos:               00 01 02 05 06
2633
        let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2634
        assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2635

2636
        // Insertion right after variant position
2637
        // ref:       00 01 02 03 04 05    06 07 08 09 10 11 12 13 14 15
2638
        // var:                       V
2639
        // c12:                 M  M  M 2I  =
2640
        // qpos:               00 01 02 03 05
2641
        let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2642
        assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2643

2644
        // Deletion right after variant position
2645
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2646
        // var:                       V
2647
        // c13:                 M  M  M  D  =
2648
        // qpos:               00 01 02    03
2649
        let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2650
        assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2651

2652
        // A messy and complicated example, including a Pad operation
2653
        let vpos2 = 15;
2654
        // ref:       00    01 02    03 04 05    06 07 08 09 10 11 12 13 14 15
2655
        // var:                                                           V
2656
        // c14: 5H3S   = 2P  M  X 3I  M  M  D 2I  =  =  N  N  N  M  M  M  =  =  5S2H
2657
        // qpos:  00  03    04 05 06 09 10    11 13 14          15 16 17 18 19
2658
        let c14 = CigarString(vec![
2659
            Cigar::HardClip(5),
2660
            Cigar::SoftClip(3),
2661
            Cigar::Equal(1),
2662
            Cigar::Pad(2),
2663
            Cigar::Match(1),
2664
            Cigar::Diff(1),
2665
            Cigar::Ins(3),
2666
            Cigar::Match(2),
2667
            Cigar::Del(1),
2668
            Cigar::Ins(2),
2669
            Cigar::Equal(2),
2670
            Cigar::RefSkip(3),
2671
            Cigar::Match(3),
2672
            Cigar::Equal(2),
2673
            Cigar::SoftClip(5),
2674
            Cigar::HardClip(2),
2675
        ])
2676
        .into_view(0);
2677
        assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2678

2679
        // HardClip after Pad
2680
        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2681
        // var:                       V
2682
        // c15: 5P1H            =  =  =
2683
        // qpos:               00 01 02
2684
        let c15 =
2685
            CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2686
        assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2687

2688
        // only HardClip and Pad operations
2689
        // c16: 7H5P2H
2690
        let c16 =
2691
            CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2692
        assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2693
    }
2694

2695
    #[test]
2696
    fn test_clone() {
2697
        let mut rec = Record::new();
2698
        rec.set_pos(300);
2699
        rec.set_qname(b"read1");
2700
        let clone = rec.clone();
2701
        assert_eq!(rec, clone);
2702
    }
2703

2704
    #[test]
2705
    fn test_flags() {
2706
        let mut rec = Record::new();
2707

2708
        rec.set_paired();
2709
        assert_eq!(rec.is_paired(), true);
2710

2711
        rec.set_supplementary();
2712
        assert_eq!(rec.is_supplementary(), true);
2713
        assert_eq!(rec.is_supplementary(), true);
2714

2715
        rec.unset_paired();
2716
        assert_eq!(rec.is_paired(), false);
2717
        assert_eq!(rec.is_supplementary(), true);
2718

2719
        rec.unset_supplementary();
2720
        assert_eq!(rec.is_paired(), false);
2721
        assert_eq!(rec.is_supplementary(), false);
2722
    }
2723

2724
    #[test]
2725
    fn test_cigar_parse() {
2726
        let cigar = "1S20M1D2I3X1=2H";
2727
        let parsed = CigarString::try_from(cigar).unwrap();
2728
        assert_eq!(parsed.to_string(), cigar);
2729
    }
2730
}
2731

2732
#[cfg(test)]
2733
mod alignment_cigar_tests {
2734
    use super::*;
2735
    use crate::bam::{Read, Reader};
2736
    use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2737
    use bio_types::alignment::{Alignment, AlignmentMode};
2738

2739
    #[test]
2740
    fn test_cigar() {
2741
        let alignment = Alignment {
2742
            score: 5,
2743
            xstart: 3,
2744
            ystart: 0,
2745
            xend: 9,
2746
            yend: 10,
2747
            ylen: 10,
2748
            xlen: 10,
2749
            operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2750
            mode: AlignmentMode::Semiglobal,
2751
        };
2752
        assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2753
        assert_eq!(
2754
            CigarString::from_alignment(&alignment, false).0,
2755
            vec![
2756
                Cigar::SoftClip(3),
2757
                Cigar::Equal(3),
2758
                Cigar::Diff(1),
2759
                Cigar::Ins(2),
2760
                Cigar::Del(2),
2761
                Cigar::SoftClip(1),
2762
            ]
2763
        );
2764

2765
        let alignment = Alignment {
2766
            score: 5,
2767
            xstart: 0,
2768
            ystart: 5,
2769
            xend: 4,
2770
            yend: 10,
2771
            ylen: 10,
2772
            xlen: 5,
2773
            operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2774
            mode: AlignmentMode::Custom,
2775
        };
2776
        assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2777
        assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
2778
        assert_eq!(
2779
            CigarString::from_alignment(&alignment, false).0,
2780
            vec![
2781
                Cigar::Equal(1),
2782
                Cigar::Diff(2),
2783
                Cigar::Ins(1),
2784
                Cigar::Del(2),
2785
                Cigar::SoftClip(1),
2786
            ]
2787
        );
2788
        assert_eq!(
2789
            CigarString::from_alignment(&alignment, true).0,
2790
            vec![
2791
                Cigar::Equal(1),
2792
                Cigar::Diff(2),
2793
                Cigar::Ins(1),
2794
                Cigar::Del(2),
2795
                Cigar::HardClip(1),
2796
            ]
2797
        );
2798

2799
        let alignment = Alignment {
2800
            score: 5,
2801
            xstart: 0,
2802
            ystart: 5,
2803
            xend: 3,
2804
            yend: 8,
2805
            ylen: 10,
2806
            xlen: 3,
2807
            operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
2808
            mode: AlignmentMode::Custom,
2809
        };
2810
        assert_eq!(alignment.cigar(false), "1X1=1X");
2811
        assert_eq!(
2812
            CigarString::from_alignment(&alignment, false).0,
2813
            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2814
        );
2815

2816
        let alignment = Alignment {
2817
            score: 5,
2818
            xstart: 0,
2819
            ystart: 5,
2820
            xend: 3,
2821
            yend: 8,
2822
            ylen: 10,
2823
            xlen: 3,
2824
            operations: vec![Subst, Match, Subst],
2825
            mode: AlignmentMode::Semiglobal,
2826
        };
2827
        assert_eq!(alignment.cigar(false), "1X1=1X");
2828
        assert_eq!(
2829
            CigarString::from_alignment(&alignment, false).0,
2830
            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2831
        );
2832
    }
2833

2834
    #[test]
2835
    fn test_read_orientation_f1r2() {
2836
        let mut bam = Reader::from_path(&"test/test_paired.sam").unwrap();
2837

2838
        for res in bam.records() {
2839
            let record = res.unwrap();
2840
            assert_eq!(
2841
                record.read_pair_orientation(),
2842
                SequenceReadPairOrientation::F1R2
2843
            );
2844
        }
2845
    }
2846

2847
    #[test]
2848
    pub fn test_cigar_parsing_non_ascii_error() {
2849
        let cigar_str = "43ጷ";
2850
        let expected_error = Err(Error::BamParseCigar {
2851
                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2852
            });
2853

2854
        let result = CigarString::try_from(cigar_str);
2855
        assert_eq!(expected_error, result);
2856
    }
2857

2858
    #[test]
2859
    pub fn test_cigar_parsing() {
2860
        // parsing test cases
2861
        let cigar_strs = vec![
2862
            "1H10M4D100I300N1102=10P25X11S", // test every cigar opt
2863
            "100M",                          // test a single op
2864
            "",                              // test empty input
2865
            "1H1=1H",                        // test simple hardclip
2866
            "1S1=1S",                        // test simple softclip
2867
            "11H11S11=11S11H",               // test complex softclip
2868
            "10H",
2869
            "10S",
2870
        ];
2871
        // expected results
2872
        let cigars = vec![
2873
            CigarString(vec![
2874
                Cigar::HardClip(1),
2875
                Cigar::Match(10),
2876
                Cigar::Del(4),
2877
                Cigar::Ins(100),
2878
                Cigar::RefSkip(300),
2879
                Cigar::Equal(1102),
2880
                Cigar::Pad(10),
2881
                Cigar::Diff(25),
2882
                Cigar::SoftClip(11),
2883
            ]),
2884
            CigarString(vec![Cigar::Match(100)]),
2885
            CigarString(vec![]),
2886
            CigarString(vec![
2887
                Cigar::HardClip(1),
2888
                Cigar::Equal(1),
2889
                Cigar::HardClip(1),
2890
            ]),
2891
            CigarString(vec![
2892
                Cigar::SoftClip(1),
2893
                Cigar::Equal(1),
2894
                Cigar::SoftClip(1),
2895
            ]),
2896
            CigarString(vec![
2897
                Cigar::HardClip(11),
2898
                Cigar::SoftClip(11),
2899
                Cigar::Equal(11),
2900
                Cigar::SoftClip(11),
2901
                Cigar::HardClip(11),
2902
            ]),
2903
            CigarString(vec![Cigar::HardClip(10)]),
2904
            CigarString(vec![Cigar::SoftClip(10)]),
2905
        ];
2906
        // compare
2907
        for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
2908
            let cigar_parse = CigarString::try_from(cigar_str)
2909
                .expect(&format!("Unable to parse cigar: {}", cigar_str));
2910
            assert_eq!(&cigar_parse, truth);
2911
        }
2912
    }
2913
}
2914

2915
#[cfg(test)]
2916
mod basemod_tests {
2917
    use crate::bam::{Read, Reader};
2918

2919
    #[test]
2920
    pub fn test_count_recorded() {
2921
        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2922

2923
        for r in bam.records() {
2924
            let record = r.unwrap();
2925
            if let Ok(mods) = record.basemods_iter() {
2926
                let n = mods.recorded().len();
2927
                assert_eq!(n, 3);
2928
            };
2929
        }
2930
    }
2931

2932
    #[test]
2933
    pub fn test_query_type() {
2934
        let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
2935

2936
        let mut n_fwd = 0;
2937
        let mut n_rev = 0;
2938

2939
        for r in bam.records() {
2940
            let record = r.unwrap();
2941
            if let Ok(mods) = record.basemods_iter() {
2942
                for mod_code in mods.recorded() {
2943
                    if let Ok(mod_metadata) = mods.query_type(*mod_code) {
2944
                        if mod_metadata.strand == 0 {
2945
                            n_fwd += 1;
2946
                        }
2947
                        if mod_metadata.strand == 1 {
2948
                            n_rev += 1;
2949
                        }
2950
                    }
2951
                }
2952
            };
2953
        }
2954
        assert_eq!(n_fwd, 2);
2955
        assert_eq!(n_rev, 2);
2956
    }
2957

2958
    #[test]
2959
    pub fn test_mod_iter() {
2960
        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2961
        let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
2962
        let mut i = 0;
2963

2964
        for r in bam.records() {
2965
            let record = r.unwrap();
2966
            for res in record.basemods_iter().unwrap() {
2967
                if let Ok((position, _m)) = res {
2968
                    assert_eq!(position, expected_positions[i]);
2969
                    i += 1;
2970
                }
2971
            }
2972
        }
2973
    }
2974

2975
    #[test]
2976
    pub fn test_position_iter() {
2977
        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2978
        let expected_positions = [1, 7, 12, 13, 22, 30, 31];
2979
        let expected_counts = [1, 1, 1, 2, 1, 1, 1];
2980
        let mut i = 0;
2981

2982
        for r in bam.records() {
2983
            let record = r.unwrap();
2984
            for res in record.basemods_position_iter().unwrap() {
2985
                if let Ok((position, elements)) = res {
2986
                    assert_eq!(position, expected_positions[i]);
2987
                    assert_eq!(elements.len(), expected_counts[i]);
2988
                    i += 1;
2989
                }
2990
            }
2991
        }
2992
    }
2993
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc