• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rust-bio / rust-bio-types / 5550351717

pending completion
5550351717

Pull #63

github

web-flow
Merge be9eac796 into 907b3d9da
Pull Request #63: fix: pretty print with suffix xclip or yclip

2 of 2 new or added lines in 1 file covered. (100.0%)

474 of 746 relevant lines covered (63.54%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

68.6
/src/annot/spliced.rs
1
// Copyright 2017 Nicholas Ingolia
2
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3
// This file may not be copied, modified, or distributed
4
// except according to those terms.
5

6
//! Spliced region on a named sequence, e.g., the reverse strand of
7
//! chromosome V, exon #1 at 166,885 through 166,875 and exon #2 at
8
//! 166,771 through 166,237.
9

10
use std::cmp::{max, min};
11
use std::convert::Into;
12
use std::fmt::{self, Display, Formatter};
13
use std::ops::Neg;
14
use std::str::FromStr;
15

16
use regex::Regex;
17

18
use crate::annot::contig::Contig;
19
use crate::annot::loc::Loc;
20
use crate::annot::pos::Pos;
21
use crate::annot::*;
22
use crate::strand::*;
23

24
// The spliced location representation inherently cannot represent
25
// "bad" splicing structures. Locations comprise a first exon length,
26
// with a vector of intron-length / exon-length pairs, all type usize
27
// and hence non-negative.
28
//
29
// The InEx data type used to represent intron-exon pairs after the
30
// first exon enforces an additional strict positivity constraint
31
// (i.e., >0) on the lengths. This further eliminates degeneracy in
32
// representations: only one unique set of positive-length exons with
33
// interleaved positive-length introns can represent a splicing
34
// structure.
35
mod inex {
36
    use std::slice::Iter;
37

38
    use super::SplicingError;
39

40
    #[derive(Debug, Clone, Hash, PartialEq, Eq)]
41
    pub struct InEx {
42
        intron_length: usize,
43
        exon_length: usize,
44
    }
45

46
    impl InEx {
47
        pub fn new(intron_length: usize, exon_length: usize) -> Result<Self, SplicingError> {
1✔
48
            if intron_length < 1 {
1✔
49
                Err(SplicingError::IntronLength)
×
50
            } else if exon_length < 1 {
2✔
51
                Err(SplicingError::ExonLength)
×
52
            } else {
53
                Ok(InEx {
1✔
54
                    intron_length,
55
                    exon_length,
56
                })
57
            }
58
        }
59
        pub fn intron_length(&self) -> usize {
1✔
60
            self.intron_length
1✔
61
        }
62
        pub fn exon_length(&self) -> usize {
1✔
63
            self.exon_length
1✔
64
        }
65
        pub fn length(&self) -> usize {
1✔
66
            self.intron_length + self.exon_length
1✔
67
        }
68
    }
69

70
    // Represent just the start (relative to the start of the location) and length of exons
71
    // Useful for internal coordinate math
72
    #[derive(Debug, Clone, Hash, PartialEq, Eq)]
73
    pub struct Ex {
74
        start: usize,
75
        length: usize,
76
    }
77

78
    impl Ex {
79
        pub fn start(&self) -> usize {
1✔
80
            self.start
1✔
81
        }
82
        pub fn length(&self) -> usize {
1✔
83
            self.length
1✔
84
        }
85
        pub fn end(&self) -> usize {
1✔
86
            self.start + self.length
1✔
87
        }
88
    }
89

90
    // Iterator over the Ex exons from lowest to highest coordinate
91
    pub struct Exes<'a> {
92
        state: ExesState,
93
        curr_start: usize,
94
        rest: Iter<'a, InEx>,
95
    }
96

97
    enum ExesState {
98
        FirstExon(usize),
99
        LaterExon,
100
    }
101

102
    impl<'a> Exes<'a> {
103
        pub fn new(exon_0_length: usize, inexes: &'a [InEx]) -> Self {
1✔
104
            Exes {
105
                state: ExesState::FirstExon(exon_0_length),
1✔
106
                curr_start: 0,
107
                rest: inexes.iter(),
1✔
108
            }
109
        }
110
    }
111

112
    impl<'a> Iterator for Exes<'a> {
113
        type Item = Ex;
114

115
        fn next(&mut self) -> Option<Ex> {
1✔
116
            match self.state {
1✔
117
                ExesState::FirstExon(len) => {
1✔
118
                    let ex = Ex {
119
                        start: self.curr_start,
1✔
120
                        length: len,
121
                    };
122
                    self.curr_start += len;
2✔
123
                    self.state = ExesState::LaterExon;
1✔
124
                    Some(ex)
1✔
125
                }
126
                ExesState::LaterExon => match self.rest.next() {
1✔
127
                    Some(inex) => {
1✔
128
                        let ex = Ex {
129
                            start: self.curr_start + inex.intron_length(),
1✔
130
                            length: inex.exon_length(),
1✔
131
                        };
132
                        self.curr_start += inex.length();
2✔
133
                        Some(ex)
1✔
134
                    }
135
                    None => None,
1✔
136
                },
137
            }
138
        }
139
    }
140

141
    // Represent just the start (relative to the start of the location) and length of introns
142
    // Useful for internal coordinate math
143
    #[derive(Debug, Clone, Hash, PartialEq, Eq)]
144
    pub struct In {
145
        start: usize,
146
        length: usize,
147
    }
148

149
    #[allow(dead_code)]
150
    impl In {
151
        pub fn start(&self) -> usize {
×
152
            self.start
×
153
        }
154
        pub fn length(&self) -> usize {
×
155
            self.length
×
156
        }
157
        pub fn end(&self) -> usize {
×
158
            self.start + self.length
×
159
        }
160
    }
161

162
    // Iterator over the Ex introns from lowest to highest coordinate
163
    pub struct Ins<'a> {
164
        curr_start: usize,
165
        rest: Iter<'a, InEx>,
166
    }
167

168
    impl<'a> Ins<'a> {
169
        #[allow(dead_code)]
170
        pub fn new(exon_0_length: usize, inexes: &'a [InEx]) -> Self {
×
171
            Ins {
172
                curr_start: exon_0_length,
173
                rest: inexes.iter(),
×
174
            }
175
        }
176
    }
177

178
    impl<'a> Iterator for Ins<'a> {
179
        type Item = In;
180

181
        fn next(&mut self) -> Option<In> {
×
182
            match self.rest.next() {
×
183
                Some(inex) => {
×
184
                    let intr = In {
185
                        start: self.curr_start,
×
186
                        length: inex.intron_length(),
×
187
                    };
188
                    self.curr_start += inex.length();
×
189
                    Some(intr)
×
190
                }
191
                None => None,
×
192
            }
193
        }
194
    }
195
}
196

197
/// Spliced sequence annotation on a particular, named sequence
198
/// (e.g. a chromosome).
199
///
200
/// Parameterized over the type of the reference sequence identifier
201
/// and over the strandedness of the position.
202
///
203
/// The display format for a `Spliced` is
204
/// _chr:start_0-end_0;start_1-end_1;...;start_N-end_N(+/-/.)_. The
205
/// boundaries for each individual exon are given as a half-open
206
/// 0-based interval, like the Rust `Range` and BED format.
207
///
208
/// ```
209
/// # use bio_types::strand::ReqStrand;
210
/// # use bio_types::annot::AnnotError;
211
/// # use bio_types::annot::spliced::{Spliced,SplicingError};
212
/// # fn try_main() -> Result<(), Box<SplicingError>> {
213
/// let tad3 = Spliced::with_lengths_starts("chrXII".to_owned(), 765265,
214
///                                         &vec![808,52,109], &vec![0,864,984],
215
///                                         ReqStrand::Reverse)?;
216
/// assert_eq!(tad3.to_string(), "chrXII:765265-766073;766129-766181;766249-766358(-)");
217
/// let tad3_exons = tad3.exon_contigs();
218
/// assert_eq!(tad3_exons.len(), 3);
219
/// assert_eq!(tad3_exons[0].to_string(), "chrXII:766249-766358(-)");
220
/// assert_eq!(tad3_exons[1].to_string(), "chrXII:766129-766181(-)");
221
/// assert_eq!(tad3_exons[2].to_string(), "chrXII:765265-766073(-)");
222
/// # Ok(())
223
/// # }
224
/// # fn main() { try_main().unwrap(); }
225
/// ```
226
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
227
pub struct Spliced<R, S> {
228
    refid: R,
229
    start: isize,
230
    exon_0_length: usize,
231
    inexes: Vec<inex::InEx>,
232
    strand: S,
233
}
234

235
impl<R, S> Spliced<R, S> {
236
    /// Construct a new, single-exon "spliced" location
237
    ///
238
    /// ```
239
    /// use std::rc::Rc;
240
    /// use bio_types::annot::spliced::Spliced;
241
    /// use bio_types::strand::ReqStrand;
242
    /// let chr = Rc::new("chrX".to_owned());
243
    /// let tma22 = Spliced::new(chr, 461829, 462426 - 461829, ReqStrand::Forward);
244
    /// ```
245
    pub fn new(refid: R, start: isize, exon_0_length: usize, strand: S) -> Self {
×
246
        Spliced {
247
            refid,
248
            start,
249
            exon_0_length,
250
            inexes: Vec::new(),
×
251
            strand,
252
        }
253
    }
254

255
    /// Construct a multi-exon "spliced" location using BED-style exon
256
    /// starts and lengths.
257
    pub fn with_lengths_starts(
1✔
258
        refid: R,
259
        start: isize,
260
        exon_lengths: &[usize],
261
        exon_starts: &[usize],
262
        strand: S,
263
    ) -> Result<Self, SplicingError> {
264
        if exon_starts.is_empty() {
2✔
265
            return Err(SplicingError::NoExons);
×
266
        } else if exon_starts[0] != 0 {
2✔
267
            return Err(SplicingError::BlockStart);
×
268
        } else if exon_starts.len() != exon_lengths.len() {
1✔
269
            return Err(SplicingError::BlockMismatch);
×
270
        }
271

272
        let exon_0_length = exon_lengths[0];
2✔
273
        let mut intron_start = exon_0_length;
1✔
274
        let mut inexes = Vec::new();
1✔
275
        for exno in 1..exon_starts.len() {
4✔
276
            let exon_start = exon_starts[exno];
1✔
277
            if intron_start >= exon_start {
1✔
278
                return Err(SplicingError::BlockOverlap);
×
279
            }
280
            let intron_length = exon_start - intron_start;
2✔
281
            let exon_length = exon_lengths[exno];
2✔
282
            if exon_length == 0 {
1✔
283
                return Err(SplicingError::ExonLength);
×
284
            }
285
            inexes.push(inex::InEx::new(intron_length, exon_length)?);
1✔
286
            intron_start = exon_start + exon_length;
1✔
287
        }
288

289
        Ok(Spliced {
1✔
290
            refid,
1✔
291
            start,
×
292
            exon_0_length,
×
293
            inexes,
1✔
294
            strand,
×
295
        })
296
    }
297

298
    /// Number of exons
299
    pub fn exon_count(&self) -> usize {
×
300
        self.inexes.len() + 1
×
301
    }
302

303
    // This is the unique representation for zero-length Spliced
304
    // locations, because InEx pairs have positive lengths for both
305
    // introns and exons.
306
    #[allow(dead_code)]
307
    fn is_zero_length(&self) -> bool {
×
308
        self.exon_0_length == 0 && self.inexes.is_empty()
×
309
    }
310

311
    fn exes(&self) -> inex::Exes {
1✔
312
        inex::Exes::new(self.exon_0_length, &self.inexes)
1✔
313
    }
314

315
    /// Vector of exon starting positions, relative to the start of
316
    /// the location overall.
317
    ///
318
    /// These positions run from left to right on the reference
319
    /// sequence, regardless of the location's strand.
320
    pub fn exon_starts(&self) -> Vec<usize> {
1✔
321
        let mut starts = vec![0];
1✔
322
        let mut intron_start = self.exon_0_length;
1✔
323
        for inex in self.inexes.iter() {
4✔
324
            starts.push(intron_start + inex.intron_length());
1✔
325
            intron_start += inex.length();
1✔
326
        }
327
        starts
1✔
328
    }
329

330
    /// Vector of exon lengths.
331
    ///
332
    /// Exon lengths are given from left to right on the reference
333
    /// sequence, regardless of the location's strand.
334
    pub fn exon_lengths(&self) -> Vec<usize> {
1✔
335
        let mut lengths = vec![self.exon_0_length];
1✔
336
        for inex in self.inexes.iter() {
3✔
337
            lengths.push(inex.exon_length());
1✔
338
        }
339
        lengths
1✔
340
    }
341

342
    /// Total length of exons only.
343
    ///
344
    /// The `length` method from the `Loc` trait returns the total
345
    /// length spanned by the annotation, including both introns and
346
    /// exons.
347
    pub fn exon_total_length(&self) -> usize {
1✔
348
        self.exes().map(|e| e.length()).sum()
3✔
349
    }
350

351
    /// Convert into a stranded sequence location on the specified strand
352
    pub fn into_stranded(self, strand: ReqStrand) -> Spliced<R, ReqStrand> {
×
353
        Spliced {
354
            refid: self.refid,
×
355
            start: self.start,
×
356
            exon_0_length: self.exon_0_length,
×
357
            inexes: self.inexes,
×
358
            strand,
359
        }
360
    }
361

362
    pub fn contig_cover(self) -> Contig<R, S> {
×
363
        let length = self.length();
×
364
        Contig::new(self.refid, self.start, length, self.strand)
×
365
    }
366

367
    // Get exons in reference sequence order.
368
    fn exon_contigs_vec(&self) -> Vec<Contig<R, S>>
1✔
369
    where
370
        R: Clone,
371
        S: Copy,
372
    {
373
        let mut exons = Vec::new();
1✔
374

375
        for ex in self.exes() {
3✔
376
            exons.push(Contig::new(
3✔
377
                self.refid().clone(),
1✔
378
                self.start + ex.start() as isize,
2✔
379
                ex.length(),
1✔
380
                self.strand,
1✔
381
            ));
382
        }
383

384
        exons
1✔
385
    }
386
}
387

388
impl<R> Spliced<R, ReqStrand> {
389
    pub fn exon_contigs(&self) -> Vec<Contig<R, ReqStrand>>
1✔
390
    where
391
        R: Clone,
392
    {
393
        let mut exons = self.exon_contigs_vec();
1✔
394
        if self.strand == ReqStrand::Reverse {
2✔
395
            exons.reverse()
1✔
396
        }
397
        exons
1✔
398
    }
399
}
400

401
impl<R, S> Loc for Spliced<R, S> {
402
    type RefID = R;
403
    type Strand = S;
404
    fn refid(&self) -> &R {
1✔
405
        &self.refid
×
406
    }
407
    fn start(&self) -> isize {
×
408
        self.start
×
409
    }
410
    fn length(&self) -> usize {
×
411
        let mut len = self.exon_0_length;
×
412
        for inex in self.inexes.iter() {
×
413
            len += inex.length()
×
414
        }
415
        len
×
416
    }
417
    fn strand(&self) -> S
1✔
418
    where
419
        S: Copy,
420
    {
421
        self.strand
1✔
422
    }
423

424
    fn pos_into<T>(&self, pos: &Pos<Self::RefID, T>) -> Option<Pos<(), T>>
1✔
425
    where
426
        Self::RefID: Eq,
427
        Self::Strand: Into<ReqStrand> + Copy,
428
        T: Neg<Output = T> + Copy,
429
    {
430
        if (self.refid != *pos.refid()) || pos.pos() < self.start {
1✔
431
            return None;
1✔
432
        }
433

434
        let pos_offset = (pos.pos() - self.start) as usize;
2✔
435

436
        let mut offset_before = 0;
1✔
437
        for ex in self.exes() {
4✔
438
            if pos_offset >= ex.start() && pos_offset < ex.end() {
1✔
439
                let offset = (offset_before + pos_offset - ex.start()) as isize;
1✔
440
                let into = match self.strand().into() {
1✔
441
                    ReqStrand::Forward => Pos::new((), offset, pos.strand()),
1✔
442
                    ReqStrand::Reverse => Pos::new(
443
                        (),
444
                        self.exon_total_length() as isize - (offset + 1),
1✔
445
                        -pos.strand(),
1✔
446
                    ),
447
                };
448
                return Some(into);
1✔
449
            }
450
            offset_before += ex.length();
2✔
451
        }
452

453
        None
1✔
454
    }
455

456
    fn pos_outof<Q, T>(&self, pos: &Pos<Q, T>) -> Option<Pos<Self::RefID, T>>
1✔
457
    where
458
        Self::RefID: Clone,
459
        Self::Strand: Into<ReqStrand> + Copy,
460
        T: Neg<Output = T> + Copy,
461
    {
462
        let mut offset = match self.strand().into() {
1✔
463
            ReqStrand::Forward => pos.pos(),
1✔
464
            ReqStrand::Reverse => self.exon_total_length() as isize - (pos.pos() + 1),
1✔
465
        };
466

467
        if offset < 0 {
1✔
468
            return None;
1✔
469
        }
470

471
        for ex in self.exes() {
4✔
472
            if offset < ex.length() as isize {
1✔
473
                return Some(Pos::new(
2✔
474
                    self.refid.clone(),
1✔
475
                    self.start + ex.start() as isize + offset,
2✔
476
                    self.strand.into().on_strand(pos.strand()),
2✔
477
                ));
478
            }
479
            offset -= ex.length() as isize;
2✔
480
        }
481

482
        None
1✔
483
    }
484

485
    fn contig_intersection<T>(&self, contig: &Contig<Self::RefID, T>) -> Option<Self>
1✔
486
    where
487
        Self::RefID: PartialEq + Clone,
488
        Self::Strand: Copy,
489
    {
490
        if self.refid() != contig.refid() {
1✔
491
            return None;
×
492
        }
493

494
        let contig_rel_start = if contig.start() < self.start {
2✔
495
            0
1✔
496
        } else {
497
            (contig.start() - self.start) as usize
2✔
498
        };
499
        let contig_end = contig.start() + contig.length() as isize;
2✔
500
        let contig_rel_end = if contig_end < self.start {
2✔
501
            0
1✔
502
        } else {
503
            (contig_end - self.start) as usize
2✔
504
        };
505

506
        let mut exon_lengths = Vec::new();
1✔
507
        let mut exon_starts = Vec::new();
1✔
508

509
        for ex in self.exes() {
3✔
510
            let start = max(contig_rel_start, ex.start());
1✔
511
            let end = min(contig_rel_end, ex.end());
1✔
512

513
            if start < end {
1✔
514
                exon_starts.push(start - contig_rel_start);
1✔
515
                exon_lengths.push(end - start);
1✔
516
            }
517
        }
518

519
        if !exon_starts.is_empty() {
3✔
520
            let first_start = exon_starts[0];
1✔
521
            for start in exon_starts.iter_mut() {
3✔
522
                *start -= first_start;
1✔
523
            }
524
            let ixn = Self::with_lengths_starts(
525
                self.refid.clone(),
2✔
526
                max(self.start, contig.start()) + first_start as isize,
2✔
527
                &exon_lengths,
1✔
528
                &exon_starts,
1✔
529
                self.strand,
1✔
530
            )
531
            .unwrap_or_else(|e| {
1✔
532
                panic!(
×
533
                    "Creating intersection spliced: {:?} for {:?} {:?}",
×
534
                    e, exon_lengths, exon_starts
×
535
                )
536
            });
537

538
            Some(ixn)
1✔
539
        } else {
540
            None
1✔
541
        }
542
    }
543
}
544

545
impl<R, S> Display for Spliced<R, S>
546
where
547
    R: Display,
548
    S: Display + Clone + Into<Strand>,
549
{
550
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1✔
551
        write!(f, "{}:", self.refid)?;
1✔
552

553
        let mut sep = false;
1✔
554
        for ex in self.exes() {
4✔
555
            let ex_start = self.start + ex.start() as isize;
1✔
556
            write!(
1✔
557
                f,
×
558
                "{}{}-{}",
559
                if sep { ";" } else { "" },
2✔
560
                ex_start,
×
561
                ex_start + ex.length() as isize
1✔
562
            )?;
563
            sep = true;
1✔
564
        }
565
        let strand: Strand = self.strand.clone().into();
1✔
566
        if !strand.is_unknown() {
1✔
567
            write!(f, "({})", self.strand)?;
3✔
568
        }
569
        Ok(())
1✔
570
    }
571
}
572

573
impl<R, S> FromStr for Spliced<R, S>
574
where
575
    R: From<String>,
576
    S: FromStr<Err = StrandError>,
577
{
578
    type Err = ParseAnnotError;
579

580
    fn from_str(s: &str) -> Result<Self, Self::Err> {
1✔
581
        lazy_static! {
×
582
            static ref SPLICED_RE: Regex =
×
583
                Regex::new(r"^(.*):(\d+)-(\d+)((?:;\d+-\d+)*)(\([+-]\))?$").unwrap();
1✔
584
            static ref EXON_RE: Regex = Regex::new(r";(\d+)-(\d+)").unwrap();
1✔
585
        }
586

587
        let cap = SPLICED_RE.captures(s).ok_or(ParseAnnotError::BadAnnot)?;
1✔
588

589
        let mut starts = Vec::new();
1✔
590
        let mut lengths = Vec::new();
1✔
591

592
        let first_start = cap[2].parse::<isize>().map_err(ParseAnnotError::ParseInt)?;
2✔
593
        let first_end = cap[3].parse::<isize>().map_err(ParseAnnotError::ParseInt)?;
2✔
594
        let strand = cap[5].parse::<S>().map_err(ParseAnnotError::ParseStrand)?;
2✔
595

596
        starts.push(0);
1✔
597
        lengths.push((first_end - first_start) as usize);
1✔
598

599
        let exon_caps = EXON_RE.captures_iter(&cap[4]);
1✔
600

601
        for exon_cap in exon_caps {
1✔
602
            let next_start = exon_cap[1]
2✔
603
                .parse::<isize>()
604
                .map_err(ParseAnnotError::ParseInt)?;
×
605
            let next_end = exon_cap[2]
2✔
606
                .parse::<isize>()
607
                .map_err(ParseAnnotError::ParseInt)?;
×
608
            starts.push((next_start - first_start) as usize);
2✔
609
            lengths.push((next_end - next_start) as usize);
1✔
610
        }
611

612
        let spliced = Spliced::with_lengths_starts(
613
            R::from(cap[1].to_owned()),
2✔
614
            first_start,
×
615
            lengths.as_slice(),
1✔
616
            starts.as_slice(),
1✔
617
            strand,
1✔
618
        )
619
        .map_err(ParseAnnotError::Splicing)?;
×
620
        Ok(spliced)
1✔
621
    }
622
}
623

624
impl<R> From<Spliced<R, ReqStrand>> for Spliced<R, Strand> {
625
    fn from(x: Spliced<R, ReqStrand>) -> Self {
×
626
        Spliced {
627
            refid: x.refid,
×
628
            start: x.start,
×
629
            exon_0_length: x.exon_0_length,
×
630
            inexes: x.inexes,
×
631
            strand: match x.strand {
×
632
                ReqStrand::Forward => Strand::Forward,
633
                ReqStrand::Reverse => Strand::Reverse,
634
            },
635
        }
636
    }
637
}
638

639
impl<R> From<Spliced<R, NoStrand>> for Spliced<R, Strand> {
640
    fn from(x: Spliced<R, NoStrand>) -> Self {
×
641
        Spliced {
642
            refid: x.refid,
×
643
            start: x.start,
×
644
            exon_0_length: x.exon_0_length,
×
645
            inexes: x.inexes,
×
646
            strand: Strand::Unknown,
647
        }
648
    }
649
}
650

651
impl<R> From<Spliced<R, Strand>> for Spliced<R, NoStrand> {
652
    fn from(x: Spliced<R, Strand>) -> Self {
×
653
        Spliced {
654
            refid: x.refid,
×
655
            start: x.start,
×
656
            exon_0_length: x.exon_0_length,
×
657
            inexes: x.inexes,
×
658
            strand: NoStrand::Unknown,
659
        }
660
    }
661
}
662

663
impl<R> From<Spliced<R, ReqStrand>> for Spliced<R, NoStrand> {
664
    fn from(x: Spliced<R, ReqStrand>) -> Self {
×
665
        Spliced {
666
            refid: x.refid,
×
667
            start: x.start,
×
668
            exon_0_length: x.exon_0_length,
×
669
            inexes: x.inexes,
×
670
            strand: NoStrand::Unknown,
671
        }
672
    }
673
}
674

675
/// Default stranded sequence position on a reference sequence named
676
/// by a `String`.
677
pub type SeqSplicedStranded = Spliced<String, ReqStrand>;
678

679
/// Default unstranded sequence position on a reference sequence named
680
/// by a `String`
681
pub type SeqSplicedUnstranded = Spliced<String, NoStrand>;
682

683
#[derive(Error, Debug)]
684
pub enum SplicingError {
685
    #[error("Invalid (non-positive) intron length")]
686
    IntronLength,
687
    #[error("Invalid (non-positive) exon length")]
688
    ExonLength,
689
    #[error("No exons")]
690
    NoExons,
691
    #[error("Exons do not start at position 0")]
692
    BlockStart,
693
    #[error("Number of exon starts != number of exon lengths")]
694
    BlockMismatch,
695
    #[error("Exon blocks overlap")]
696
    BlockOverlap,
697
}
698

699
#[cfg(test)]
700
mod tests {
701
    use super::*;
702

703
    #[test]
704
    fn length_start_to_contig() {
705
        //chrV    166236  166885  YER007C-A       0       -       166236  166885  0       2       535,11, 0,638,
706
        let tma20 = Spliced::with_lengths_starts(
707
            "chrV".to_owned(),
708
            166236,
709
            &vec![535, 11],
710
            &vec![0, 638],
711
            ReqStrand::Reverse,
712
        )
713
        .unwrap();
714
        assert_eq!(tma20.exon_starts(), vec![0, 638]);
715
        assert_eq!(tma20.exon_lengths(), vec![535, 11]);
716
        assert_eq!(tma20.to_string(), "chrV:166236-166771;166874-166885(-)");
717
        assert_eq!(
718
            tma20,
719
            tma20
720
                .to_string()
721
                .parse::<Spliced<String, ReqStrand>>()
722
                .unwrap()
723
        );
724
        let tma20_exons = tma20.exon_contigs();
725
        assert_eq!(tma20_exons.len(), 2);
726
        assert_eq!(tma20_exons[0].to_string(), "chrV:166874-166885(-)");
727
        assert_eq!(tma20_exons[1].to_string(), "chrV:166236-166771(-)");
728

729
        //chrXVI  173151  174702  YPL198W 0       +       173151  174702  0       3       11,94,630,      0,420,921,
730
        let rpl7b = Spliced::with_lengths_starts(
731
            "chrXVI".to_owned(),
732
            173151,
733
            &vec![11, 94, 630],
734
            &vec![0, 420, 921],
735
            ReqStrand::Forward,
736
        )
737
        .unwrap();
738
        assert_eq!(
739
            rpl7b.to_string(),
740
            "chrXVI:173151-173162;173571-173665;174072-174702(+)"
741
        );
742
        assert_eq!(
743
            rpl7b,
744
            rpl7b
745
                .to_string()
746
                .parse::<Spliced<String, ReqStrand>>()
747
                .unwrap()
748
        );
749
        let rpl7b_exons = rpl7b.exon_contigs();
750
        assert_eq!(rpl7b_exons.len(), 3);
751
        assert_eq!(rpl7b_exons[0].to_string(), "chrXVI:173151-173162(+)");
752
        assert_eq!(rpl7b_exons[1].to_string(), "chrXVI:173571-173665(+)");
753
        assert_eq!(rpl7b_exons[2].to_string(), "chrXVI:174072-174702(+)");
754

755
        //chrXII  765265  766358  YLR316C 0       -       765265  766358  0       3       808,52,109,     0,864,984,
756
        let tad3 = Spliced::with_lengths_starts(
757
            "chrXII".to_owned(),
758
            765265,
759
            &vec![808, 52, 109],
760
            &vec![0, 864, 984],
761
            ReqStrand::Reverse,
762
        )
763
        .unwrap();
764
        assert_eq!(
765
            tad3.to_string(),
766
            "chrXII:765265-766073;766129-766181;766249-766358(-)"
767
        );
768
        assert_eq!(
769
            tad3,
770
            tad3.to_string()
771
                .parse::<Spliced<String, ReqStrand>>()
772
                .unwrap()
773
        );
774
        let tad3_exons = tad3.exon_contigs();
775
        assert_eq!(tad3_exons.len(), 3);
776
        assert_eq!(tad3_exons[0].to_string(), "chrXII:766249-766358(-)");
777
        assert_eq!(tad3_exons[1].to_string(), "chrXII:766129-766181(-)");
778
        assert_eq!(tad3_exons[2].to_string(), "chrXII:765265-766073(-)");
779
    }
780

781
    fn test_into_outof(
782
        loc: &Spliced<String, ReqStrand>,
783
        outstr: &str,
784
        in_offset: isize,
785
        in_strand: ReqStrand,
786
    ) -> () {
787
        let p0 = outstr.parse::<Pos<String, ReqStrand>>().unwrap();
788
        let p0_into_expected = Pos::new((), in_offset, in_strand);
789
        let p0_into_actual = loc.pos_into(&p0);
790
        let p0_back_out_actual = loc.pos_outof(&p0_into_expected);
791
        println!(
792
            "{}\t{}\t{:?}\t{:?}\t{:?}",
793
            outstr, p0, p0_into_expected, p0_into_actual, p0_back_out_actual
794
        );
795
        assert!(Some(p0_into_expected).same(&p0_into_actual));
796
        assert!(Some(p0).same(&p0_back_out_actual));
797
    }
798

799
    fn test_no_into(loc: &Spliced<String, ReqStrand>, outstr: &str) -> () {
800
        let p0 = outstr.parse::<Pos<String, ReqStrand>>().unwrap();
801
        assert!(None.same(&loc.pos_into(&p0)));
802
    }
803

804
    #[test]
805
    fn into_outof() {
806
        //chrXVI  173151  174702  YPL198W 0       +       173151  174702  0       3       11,94,630,      0,420,921,
807
        let rpl7b = Spliced::with_lengths_starts(
808
            "chrXVI".to_owned(),
809
            173151,
810
            &vec![11, 94, 630],
811
            &vec![0, 420, 921],
812
            ReqStrand::Forward,
813
        )
814
        .unwrap();
815
        let p0_into = Pos::new((), -1, ReqStrand::Forward);
816
        assert!(None.same(&rpl7b.pos_outof(&p0_into)));
817

818
        test_no_into(&rpl7b, "chrXVI:173150(+)");
819
        test_into_outof(&rpl7b, "chrXVI:173151(+)", 0, ReqStrand::Forward);
820
        test_into_outof(&rpl7b, "chrXVI:173152(-)", 1, ReqStrand::Reverse);
821
        test_into_outof(&rpl7b, "chrXVI:173161(+)", 10, ReqStrand::Forward);
822
        test_no_into(&rpl7b, "chrXVI:173162(+)");
823
        test_no_into(&rpl7b, "chrXVI:173570(+)");
824
        test_into_outof(&rpl7b, "chrXVI:173571(+)", 11, ReqStrand::Forward);
825
        test_into_outof(&rpl7b, "chrXVI:173664(+)", 104, ReqStrand::Forward);
826
        test_no_into(&rpl7b, "chrXVI:173665(+)");
827
        test_no_into(&rpl7b, "chrXVI:174071(+)");
828
        test_into_outof(&rpl7b, "chrXVI:174072(+)", 105, ReqStrand::Forward);
829
        test_into_outof(&rpl7b, "chrXVI:174701(+)", 734, ReqStrand::Forward);
830
        test_no_into(&rpl7b, "chrXVI:174702(+)");
831

832
        let p0_into = Pos::new((), 735, ReqStrand::Forward);
833
        assert!(None.same(&rpl7b.pos_outof(&p0_into)));
834

835
        //chrXII  765265  766358  YLR316C 0       -       765265  766358  0       3       808,52,109,     0,864,984,
836
        let tad3 = Spliced::with_lengths_starts(
837
            "chrXII".to_owned(),
838
            765265,
839
            &vec![808, 52, 109],
840
            &vec![0, 864, 984],
841
            ReqStrand::Reverse,
842
        )
843
        .unwrap();
844

845
        let p0_into = Pos::new((), -1, ReqStrand::Forward);
846
        assert!(None.same(&tad3.pos_outof(&p0_into)));
847

848
        test_no_into(&tad3, "chrXII:765264(-)");
849
        test_into_outof(&tad3, "chrXII:765265(-)", 968, ReqStrand::Forward);
850
        test_into_outof(&tad3, "chrXII:765266(+)", 967, ReqStrand::Reverse);
851
        test_into_outof(&tad3, "chrXII:766072(-)", 161, ReqStrand::Forward);
852
        test_no_into(&tad3, "chrXII:766073(-)");
853

854
        test_no_into(&tad3, "chrXII:766128(-)");
855
        test_into_outof(&tad3, "chrXII:766129(-)", 160, ReqStrand::Forward);
856
        test_into_outof(&tad3, "chrXII:766180(-)", 109, ReqStrand::Forward);
857
        test_no_into(&tad3, "chrXII:766181(-)");
858

859
        test_no_into(&tad3, "chrXII:766248(-)");
860
        test_into_outof(&tad3, "chrXII:766249(-)", 108, ReqStrand::Forward);
861
        test_into_outof(&tad3, "chrXII:766357(-)", 0, ReqStrand::Forward);
862
        test_no_into(&tad3, "chrXII:766358(-)");
863

864
        let p0_into = Pos::new((), 969, ReqStrand::Forward);
865
        assert!(None.same(&tad3.pos_outof(&p0_into)));
866
    }
867

868
    fn test_contig_ixn(
869
        spl: &Spliced<String, ReqStrand>,
870
        cb_str: &str,
871
        cab_str: Option<String>,
872
    ) -> () {
873
        let cb = cb_str.parse::<Contig<String, ReqStrand>>().unwrap();
874
        match spl.contig_intersection(&cb) {
875
            None => assert_eq!(None, cab_str),
876
            Some(cab) => assert_eq!(Some(cab.to_string()), cab_str),
877
        };
878
    }
879

880
    #[test]
881
    fn intersection() {
882
        //chrXVI  173151  174702  YPL198W 0       +       173151  174702  0       3       11,94,630,      0,420,921,
883
        let rpl7b = Spliced::with_lengths_starts(
884
            "chrXVI".to_owned(),
885
            173151,
886
            &vec![11, 94, 630],
887
            &vec![0, 420, 921],
888
            ReqStrand::Forward,
889
        )
890
        .unwrap();
891

892
        test_contig_ixn(
893
            &rpl7b,
894
            "chrXVI:173000-175000(+)",
895
            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
896
        );
897

898
        test_contig_ixn(
899
            &rpl7b,
900
            "chrXVI:173150-175000(+)",
901
            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
902
        );
903
        test_contig_ixn(
904
            &rpl7b,
905
            "chrXVI:173151-175000(+)",
906
            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
907
        );
908
        test_contig_ixn(
909
            &rpl7b,
910
            "chrXVI:173152-175000(+)",
911
            Some("chrXVI:173152-173162;173571-173665;174072-174702(+)".to_owned()),
912
        );
913
        test_contig_ixn(
914
            &rpl7b,
915
            "chrXVI:173155-175000(+)",
916
            Some("chrXVI:173155-173162;173571-173665;174072-174702(+)".to_owned()),
917
        );
918
        test_contig_ixn(
919
            &rpl7b,
920
            "chrXVI:173161-175000(+)",
921
            Some("chrXVI:173161-173162;173571-173665;174072-174702(+)".to_owned()),
922
        );
923
        test_contig_ixn(
924
            &rpl7b,
925
            "chrXVI:173162-175000(+)",
926
            Some("chrXVI:173571-173665;174072-174702(+)".to_owned()),
927
        );
928
        test_contig_ixn(
929
            &rpl7b,
930
            "chrXVI:173500-175000(+)",
931
            Some("chrXVI:173571-173665;174072-174702(+)".to_owned()),
932
        );
933
        test_contig_ixn(
934
            &rpl7b,
935
            "chrXVI:173570-175000(+)",
936
            Some("chrXVI:173571-173665;174072-174702(+)".to_owned()),
937
        );
938
        test_contig_ixn(
939
            &rpl7b,
940
            "chrXVI:173571-175000(+)",
941
            Some("chrXVI:173571-173665;174072-174702(+)".to_owned()),
942
        );
943
        test_contig_ixn(
944
            &rpl7b,
945
            "chrXVI:173572-175000(+)",
946
            Some("chrXVI:173572-173665;174072-174702(+)".to_owned()),
947
        );
948
        test_contig_ixn(
949
            &rpl7b,
950
            "chrXVI:173600-175000(+)",
951
            Some("chrXVI:173600-173665;174072-174702(+)".to_owned()),
952
        );
953
        test_contig_ixn(
954
            &rpl7b,
955
            "chrXVI:173664-175000(+)",
956
            Some("chrXVI:173664-173665;174072-174702(+)".to_owned()),
957
        );
958
        test_contig_ixn(
959
            &rpl7b,
960
            "chrXVI:173665-175000(+)",
961
            Some("chrXVI:174072-174702(+)".to_owned()),
962
        );
963
        test_contig_ixn(
964
            &rpl7b,
965
            "chrXVI:174100-175000(+)",
966
            Some("chrXVI:174100-174702(+)".to_owned()),
967
        );
968
        test_contig_ixn(&rpl7b, "chrXVI:174800-175000(+)", None);
969

970
        test_contig_ixn(
971
            &rpl7b,
972
            "chrXVI:173150-174703(+)",
973
            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
974
        );
975
        test_contig_ixn(
976
            &rpl7b,
977
            "chrXVI:173150-174702(+)",
978
            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
979
        );
980
        test_contig_ixn(
981
            &rpl7b,
982
            "chrXVI:173150-174701(+)",
983
            Some("chrXVI:173151-173162;173571-173665;174072-174701(+)".to_owned()),
984
        );
985
        test_contig_ixn(
986
            &rpl7b,
987
            "chrXVI:173000-174500(+)",
988
            Some("chrXVI:173151-173162;173571-173665;174072-174500(+)".to_owned()),
989
        );
990
        test_contig_ixn(
991
            &rpl7b,
992
            "chrXVI:173000-174072(+)",
993
            Some("chrXVI:173151-173162;173571-173665(+)".to_owned()),
994
        );
995
        test_contig_ixn(
996
            &rpl7b,
997
            "chrXVI:173000-173800(+)",
998
            Some("chrXVI:173151-173162;173571-173665(+)".to_owned()),
999
        );
1000
        test_contig_ixn(
1001
            &rpl7b,
1002
            "chrXVI:173000-173666(+)",
1003
            Some("chrXVI:173151-173162;173571-173665(+)".to_owned()),
1004
        );
1005
        test_contig_ixn(
1006
            &rpl7b,
1007
            "chrXVI:173000-173665(+)",
1008
            Some("chrXVI:173151-173162;173571-173665(+)".to_owned()),
1009
        );
1010
        test_contig_ixn(
1011
            &rpl7b,
1012
            "chrXVI:173000-173664(+)",
1013
            Some("chrXVI:173151-173162;173571-173664(+)".to_owned()),
1014
        );
1015
        test_contig_ixn(
1016
            &rpl7b,
1017
            "chrXVI:173000-173600(+)",
1018
            Some("chrXVI:173151-173162;173571-173600(+)".to_owned()),
1019
        );
1020
        test_contig_ixn(
1021
            &rpl7b,
1022
            "chrXVI:173000-173571(+)",
1023
            Some("chrXVI:173151-173162(+)".to_owned()),
1024
        );
1025
        test_contig_ixn(
1026
            &rpl7b,
1027
            "chrXVI:173000-173300(+)",
1028
            Some("chrXVI:173151-173162(+)".to_owned()),
1029
        );
1030
        test_contig_ixn(
1031
            &rpl7b,
1032
            "chrXVI:173000-173155(+)",
1033
            Some("chrXVI:173151-173155(+)".to_owned()),
1034
        );
1035
        test_contig_ixn(&rpl7b, "chrXVI:173000-173100(+)", None);
1036

1037
        test_contig_ixn(
1038
            &rpl7b,
1039
            "chrXVI:173155-174500(+)",
1040
            Some("chrXVI:173155-173162;173571-173665;174072-174500(+)".to_owned()),
1041
        );
1042
        test_contig_ixn(
1043
            &rpl7b,
1044
            "chrXVI:173600-174500(+)",
1045
            Some("chrXVI:173600-173665;174072-174500(+)".to_owned()),
1046
        );
1047
        test_contig_ixn(
1048
            &rpl7b,
1049
            "chrXVI:173155-173600(+)",
1050
            Some("chrXVI:173155-173162;173571-173600(+)".to_owned()),
1051
        );
1052
        test_contig_ixn(
1053
            &rpl7b,
1054
            "chrXVI:173590-173610(+)",
1055
            Some("chrXVI:173590-173610(+)".to_owned()),
1056
        );
1057

1058
        test_contig_ixn(
1059
            &rpl7b,
1060
            "chrXVI:173155-173160(+)",
1061
            Some("chrXVI:173155-173160(+)".to_owned()),
1062
        );
1063
        test_contig_ixn(
1064
            &rpl7b,
1065
            "chrXVI:174400-174500(+)",
1066
            Some("chrXVI:174400-174500(+)".to_owned()),
1067
        );
1068

1069
        test_contig_ixn(&rpl7b, "chrXVI:173200-173300(+)", None);
1070
        test_contig_ixn(&rpl7b, "chrXVI:173800-174000(+)", None);
1071
    }
1072
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc