• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rust-bio / rust-bio-types / 5550351717

pending completion
5550351717

Pull #63

github

web-flow
Merge be9eac796 into 907b3d9da
Pull Request #63: fix: pretty print with suffix xclip or yclip

2 of 2 new or added lines in 1 file covered. (100.0%)

474 of 746 relevant lines covered (63.54%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

55.75
/src/annot/contig.rs
1
// Copyright 2017 Nicholas Ingolia
2
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3
// This file may not be copied, modified, or distributed
4
// except according to those terms.
5

6
//! Contiguous region on a named sequence, e.g., chromosome XI
7
//! 334,915-334,412.
8

9
use std::cmp::{max, min};
10
use std::convert::Into;
11
use std::fmt::{self, Display, Formatter};
12
use std::ops::Neg;
13
use std::str::FromStr;
14

15
use regex::Regex;
16

17
use crate::annot::loc::Loc;
18
use crate::annot::pos::Pos;
19
use crate::annot::*;
20
use crate::strand::*;
21

22
/// Contiguous sequence region on a particular, named sequence (e.g. a
23
/// chromosome)
24
///
25
/// Parameterized over the type of the reference sequence identifier
26
/// and over the strandedness of the position.
27
///
28
/// The display format for a `Contig` is _chr:start-end(+/-/.)_. The
29
/// boundaries are given as a half-open 0-based interval, like the
30
/// Rust `Range` and BED format.
31
///
32
/// ```
33
/// # use bio_types::annot::ParseAnnotError;
34
/// # fn try_main() -> Result<(), Box<ParseAnnotError>> {
35
/// use bio_types::annot::contig::Contig;
36
/// use bio_types::strand::ReqStrand;
37
/// let tma19 = Contig::new("chrXI".to_owned(), 334412, (334916 - 334412), ReqStrand::Reverse);
38
/// let tma19_str = tma19.to_string();
39
/// assert_eq!(tma19_str, "chrXI:334412-334916(-)");
40
/// let tma19_str_loc = tma19_str.parse()?;
41
/// assert_eq!(tma19, tma19_str_loc);
42
/// # Ok(())
43
/// # }
44
/// # fn main() { try_main().unwrap(); }
45
/// ```
46
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
47
pub struct Contig<R, S> {
48
    refid: R,
49
    start: isize,
50
    length: usize,
51
    strand: S,
52
}
53

54
impl<R, S> Contig<R, S> {
55
    /// Construct a new sequence contig location
56
    ///
57
    /// ```
58
    /// use std::rc::Rc;
59
    /// use bio_types::annot::contig::Contig;
60
    /// use bio_types::strand::ReqStrand;
61
    /// let chr = Rc::new("chrX".to_owned());
62
    /// let tma22 = Contig::new(chr, 461829, 462426 - 461829, ReqStrand::Forward);
63
    /// ```
64
    pub fn new(refid: R, start: isize, length: usize, strand: S) -> Self {
1✔
65
        Contig {
66
            refid,
67
            start,
68
            length,
69
            strand,
70
        }
71
    }
72

73
    /// Construct a new sequence contig location from a starting
74
    /// position and length.
75
    ///
76
    /// In general, the starting position must have a "strandedness",
77
    /// and reverse-strand starting positions will extend towards
78
    /// lower coordinates from the starting position.
79
    ///
80
    ///
81
    ///
82
    /// ```
83
    /// # use bio_types::annot::AnnotError;
84
    /// # fn try_main() -> Result<(), Box<AnnotError>> {
85
    /// use bio_types::annot::contig::Contig;
86
    /// use bio_types::annot::pos::Pos;
87
    /// use bio_types::strand::ReqStrand;
88
    ///
89
    /// let tma22_first = Pos::new("chrX".to_string(), 461829, ReqStrand::Forward);
90
    /// let tma22 = Contig::with_first_length(&tma22_first, 462426 - 461829)?;
91
    /// assert_eq!(tma22.to_string(), "chrX:461829-462426(+)");
92
    ///
93
    /// let tma19_first = Pos::new("chrXI".to_string(), 335015, ReqStrand::Reverse);
94
    /// let tma19 = Contig::with_first_length(&tma19_first, 335016 - 334412)?;
95
    /// assert_eq!(tma19.to_string(), "chrXI:334412-335016(-)");
96
    /// # Ok(())
97
    /// # }
98
    /// # fn main() { try_main().unwrap(); }
99
    /// ```
100
    pub fn with_first_length(pos: &Pos<R, S>, length: usize) -> Result<Self, AnnotError>
1✔
101
    where
102
        R: Clone,
103
        S: Into<Option<ReqStrand>> + Copy,
104
    {
105
        if length < 2 {
2✔
106
            Ok(Contig {
×
107
                refid: pos.refid().clone(),
×
108
                start: pos.start(),
×
109
                length,
×
110
                strand: pos.strand(),
×
111
            })
112
        } else {
113
            let start = match pos.strand().into() {
3✔
114
                None => Err(AnnotError::NoStrand),
×
115
                Some(ReqStrand::Forward) => Ok(pos.start()),
1✔
116
                Some(ReqStrand::Reverse) => Ok(1 + pos.start() - length as isize),
1✔
117
            }?;
118

119
            Ok(Contig {
1✔
120
                refid: pos.refid().clone(),
1✔
121
                start,
×
122
                length,
×
123
                strand: pos.strand(),
1✔
124
            })
125
        }
126
    }
127

128
    /// Convert into a stranded sequence location on the specified strand
129
    pub fn into_stranded(self, strand: ReqStrand) -> Contig<R, ReqStrand> {
×
130
        Contig {
131
            refid: self.refid,
×
132
            start: self.start,
×
133
            length: self.length,
×
134
            strand,
135
        }
136
    }
137
}
138

139
impl<R> Contig<R, ReqStrand> {
140
    /// Extend the annotation by `dist` in the upstream direction on the
141
    /// annotated strand.
142
    ///
143
    /// # Arguments
144
    ///
145
    /// * `dist` specifies the offset for sliding the position. The
146
    /// left, 5'-most end of the contig will expand for forward-strand
147
    /// annotations and the right, 3'-most end will expand for
148
    /// reverse-strand annotations.
149
    ///
150
    /// ```
151
    /// use bio_types::annot::contig::Contig;
152
    /// use bio_types::strand::ReqStrand;
153
    /// let mut tma22 = Contig::new("chrX".to_owned(), 461829, 462426 - 461829, ReqStrand::Forward);
154
    /// tma22.extend_upstream(100);
155
    /// assert_eq!(tma22.to_string(), "chrX:461729-462426(+)");
156
    /// let mut tma19 = Contig::new("chrXI".to_owned(), 334412, 334916 - 334412, ReqStrand::Reverse);
157
    /// tma19.extend_upstream(100);
158
    /// assert_eq!(tma19.to_string(), "chrXI:334412-335016(-)");
159
    /// ```
160
    pub fn extend_upstream(&mut self, dist: usize) {
×
161
        self.length += dist;
×
162
        if self.strand == ReqStrand::Forward {
×
163
            self.start -= dist as isize;
×
164
        }
165
    }
166

167
    /// Extend the annotation by `dist` in the downstream direction on the
168
    /// annotated strand.
169
    ///
170
    /// # Arguments
171
    ///
172
    /// * `dist` specifies the offset for sliding the position. The
173
    /// right, 3'-most end of the contig will expand for
174
    /// forward-strand annotations and the left, 5'-most end will
175
    /// expand for reverse-strand annotations.
176
    ///
177
    /// ```
178
    /// use bio_types::annot::contig::Contig;
179
    /// use bio_types::strand::ReqStrand;
180
    /// let mut tma22 = Contig::new("chrX".to_owned(), 461829, 462426 - 461829, ReqStrand::Forward);
181
    /// tma22.extend_downstream(100);
182
    /// assert_eq!(tma22.to_string(), "chrX:461829-462526(+)");
183
    /// let mut tma19 = Contig::new("chrXI".to_owned(), 334412, 334916 - 334412, ReqStrand::Reverse);
184
    /// tma19.extend_downstream(100);
185
    /// assert_eq!(tma19.to_string(), "chrXI:334312-334916(-)");
186
    /// ```
187
    pub fn extend_downstream(&mut self, dist: usize) {
×
188
        self.length += dist;
×
189
        if self.strand == ReqStrand::Reverse {
×
190
            self.start -= dist as isize;
×
191
        }
192
    }
193
}
194

195
impl<R, S> Loc for Contig<R, S> {
196
    type RefID = R;
197
    type Strand = S;
198
    fn refid(&self) -> &R {
1✔
199
        &self.refid
×
200
    }
201
    fn start(&self) -> isize {
1✔
202
        self.start
1✔
203
    }
204
    fn length(&self) -> usize {
1✔
205
        self.length
1✔
206
    }
207
    fn strand(&self) -> S
1✔
208
    where
209
        S: Copy,
210
    {
211
        self.strand
1✔
212
    }
213

214
    fn pos_into<T>(&self, pos: &Pos<Self::RefID, T>) -> Option<Pos<(), T>>
1✔
215
    where
216
        Self::RefID: Eq,
217
        Self::Strand: Into<ReqStrand> + Copy,
218
        T: Neg<Output = T> + Copy,
219
    {
220
        if self.refid != *pos.refid() {
1✔
221
            None
1✔
222
        } else {
223
            let offset = pos.pos() - self.start;
2✔
224
            if offset < 0 || offset >= self.length as isize {
4✔
225
                None
1✔
226
            } else {
227
                Some(match self.strand().into() {
2✔
228
                    ReqStrand::Forward => Pos::new((), offset, pos.strand()),
1✔
229
                    ReqStrand::Reverse => {
×
230
                        Pos::new((), self.length as isize - (offset + 1), -pos.strand())
×
231
                    }
232
                })
233
            }
234
        }
235
    }
236

237
    fn pos_outof<Q, T>(&self, pos: &Pos<Q, T>) -> Option<Pos<Self::RefID, T>>
1✔
238
    where
239
        Self::RefID: Clone,
240
        Self::Strand: Into<ReqStrand> + Copy,
241
        T: Neg<Output = T> + Copy,
242
    {
243
        let offset = match self.strand().into() {
1✔
244
            ReqStrand::Forward => pos.pos(),
1✔
245
            ReqStrand::Reverse => self.length as isize - (pos.pos() + 1),
×
246
        };
247

248
        if offset >= 0 && offset < self.length as isize {
2✔
249
            Some(Pos::new(
2✔
250
                self.refid.clone(),
1✔
251
                self.start + offset,
1✔
252
                self.strand().into().on_strand(pos.strand()),
2✔
253
            ))
254
        } else {
255
            None
×
256
        }
257
    }
258

259
    fn contig_intersection<T>(&self, contig: &Contig<Self::RefID, T>) -> Option<Self>
1✔
260
    where
261
        Self::RefID: PartialEq + Clone,
262
        Self::Strand: Copy,
263
    {
264
        if self.refid() != contig.refid() {
1✔
265
            return None;
1✔
266
        }
267

268
        let start = max(self.start, contig.start);
1✔
269
        let end = min(
270
            self.start + self.length as isize,
1✔
271
            contig.start + contig.length as isize,
1✔
272
        );
273

274
        if start <= end {
3✔
275
            Some(Self::new(
2✔
276
                self.refid.clone(),
1✔
277
                start,
×
278
                (end - start) as usize,
1✔
279
                self.strand,
1✔
280
            ))
281
        } else {
282
            None
1✔
283
        }
284
    }
285
}
286

287
impl<R, S> Display for Contig<R, S>
288
where
289
    R: Display,
290
    S: Display + Clone + Into<Strand>,
291
{
292
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1✔
293
        write!(
2✔
294
            f,
×
295
            "{}:{}-{}",
296
            self.refid,
×
297
            self.start,
×
298
            self.start + self.length as isize
1✔
299
        )?;
300
        let strand: Strand = self.strand.clone().into();
1✔
301
        if !strand.is_unknown() {
1✔
302
            write!(f, "({})", strand)?;
2✔
303
        }
304
        Ok(())
1✔
305
    }
306
}
307

308
impl<R, S> FromStr for Contig<R, S>
309
where
310
    R: From<String>,
311
    S: FromStr<Err = StrandError>,
312
{
313
    type Err = ParseAnnotError;
314

315
    fn from_str(s: &str) -> Result<Self, Self::Err> {
1✔
316
        lazy_static! {
×
317
            static ref CONTIG_RE: Regex = Regex::new(r"^(.*):(\d+)-(\d+)(\([+-]\))?$").unwrap();
1✔
318
        }
319

320
        let cap = CONTIG_RE.captures(s).ok_or(ParseAnnotError::BadAnnot)?;
1✔
321

322
        let start = cap[2].parse::<isize>().map_err(ParseAnnotError::ParseInt)?;
2✔
323
        let end = cap[3].parse::<isize>().map_err(ParseAnnotError::ParseInt)?;
2✔
324
        let strand = cap
2✔
325
            .get(4)
326
            .map_or("", |m| m.as_str())
2✔
327
            .parse::<S>()
328
            .map_err(ParseAnnotError::ParseStrand)?;
×
329

330
        if start <= end {
2✔
331
            Ok(Contig::new(
2✔
332
                R::from(cap[1].to_owned()),
3✔
333
                start,
×
334
                (end - start) as usize,
1✔
335
                strand,
1✔
336
            ))
337
        } else {
338
            Err(ParseAnnotError::EndBeforeStart)
×
339
        }
340
    }
341
}
342

343
impl<R> From<Contig<R, ReqStrand>> for Contig<R, Strand> {
344
    fn from(x: Contig<R, ReqStrand>) -> Self {
×
345
        Contig {
346
            refid: x.refid,
×
347
            start: x.start,
×
348
            length: x.length,
×
349
            strand: match x.strand {
×
350
                ReqStrand::Forward => Strand::Forward,
351
                ReqStrand::Reverse => Strand::Reverse,
352
            },
353
        }
354
    }
355
}
356

357
impl<R> From<Contig<R, NoStrand>> for Contig<R, Strand> {
358
    fn from(x: Contig<R, NoStrand>) -> Self {
×
359
        Contig {
360
            refid: x.refid,
×
361
            start: x.start,
×
362
            length: x.length,
×
363
            strand: Strand::Unknown,
364
        }
365
    }
366
}
367

368
impl<R> From<Contig<R, Strand>> for Contig<R, NoStrand> {
369
    fn from(x: Contig<R, Strand>) -> Self {
×
370
        Contig {
371
            refid: x.refid,
×
372
            start: x.start,
×
373
            length: x.length,
×
374
            strand: NoStrand::Unknown,
375
        }
376
    }
377
}
378

379
impl<R> From<Contig<R, ReqStrand>> for Contig<R, NoStrand> {
380
    fn from(x: Contig<R, ReqStrand>) -> Self {
×
381
        Contig {
382
            refid: x.refid,
×
383
            start: x.start,
×
384
            length: x.length,
×
385
            strand: NoStrand::Unknown,
386
        }
387
    }
388
}
389

390
/// Default stranded sequence position on a reference sequence named
391
/// by a `String`.
392
pub type SeqContigStranded = Contig<String, ReqStrand>;
393

394
/// Default unstranded sequence position on a reference sequence named
395
/// by a `String`
396
pub type SeqContigUnstranded = Contig<String, NoStrand>;
397

398
#[cfg(test)]
399
mod tests {
400
    use super::*;
401

402
    #[test]
403
    fn first_and_last() {
404
        let tma22 = "chrX:461829-462426(+)"
405
            .parse::<SeqContigStranded>()
406
            .unwrap();
407
        let first = tma22.first_pos();
408
        assert_eq!(first.to_string(), "chrX:461829(+)");
409
        let last = tma22.last_pos();
410
        assert_eq!(last.to_string(), "chrX:462425(+)");
411

412
        let tma19 = "chrXI:334412-334916(-)"
413
            .parse::<SeqContigStranded>()
414
            .unwrap();
415
        let first = tma19.first_pos();
416
        assert_eq!(first.to_string(), "chrXI:334915(-)");
417
        let last = tma19.last_pos();
418
        assert_eq!(last.to_string(), "chrXI:334412(-)");
419

420
        let tma22_first = Pos::new("chrX".to_string(), 461829, ReqStrand::Forward);
421
        let tma22 = Contig::with_first_length(&tma22_first, 462426 - 461829).unwrap();
422
        assert_eq!(tma22.to_string(), "chrX:461829-462426(+)");
423

424
        let tma19_first = Pos::new("chrXI".to_string(), 335015, ReqStrand::Reverse);
425
        let tma19 = Contig::with_first_length(&tma19_first, 335016 - 334412).unwrap();
426
        assert_eq!(tma19.to_string(), "chrXI:334412-335016(-)");
427
    }
428

429
    #[test]
430
    fn into_outof() {
431
        let tma22 = "chrX:461829-462426(+)"
432
            .parse::<SeqContigStranded>()
433
            .unwrap();
434
        let p0 = "chrX:461829(+)".parse::<Pos<String, ReqStrand>>().unwrap();
435
        let p0_into = tma22.pos_into(&p0);
436
        assert!(Some(Pos::new((), 0, ReqStrand::Forward)).same(&p0_into));
437
        let p0_outof = tma22.pos_outof(&p0_into.unwrap());
438
        assert!(Some(p0).same(&p0_outof));
439

440
        let p0 = "chrX:461839(-)".parse::<Pos<String, ReqStrand>>().unwrap();
441
        let p0_into = tma22.pos_into(&p0);
442
        assert!(Some(Pos::new((), 10, ReqStrand::Reverse)).same(&p0_into));
443
        let p0_outof = tma22.pos_outof(&p0_into.unwrap());
444
        assert!(Some(p0).same(&p0_outof));
445

446
        let p0 = "chrX:462425(+)".parse::<Pos<String, ReqStrand>>().unwrap();
447
        let p0_into = tma22.pos_into(&p0);
448
        assert!(Some(Pos::new((), 596, ReqStrand::Forward)).same(&p0_into));
449
        let p0_outof = tma22.pos_outof(&p0_into.unwrap());
450
        assert!(Some(p0).same(&p0_outof));
451

452
        let p0 = "chrX:461828(+)".parse::<Pos<String, ReqStrand>>().unwrap();
453
        let p0_into = tma22.pos_into(&p0);
454
        assert!(None.same(&p0_into));
455

456
        let p0 = "chrV:461829(+)".parse::<Pos<String, ReqStrand>>().unwrap();
457
        let p0_into = tma22.pos_into(&p0);
458
        assert!(None.same(&p0_into));
459

460
        let p0 = "chrV:462426(+)".parse::<Pos<String, ReqStrand>>().unwrap();
461
        let p0_into = tma22.pos_into(&p0);
462
        assert!(None.same(&p0_into));
463
    }
464

465
    fn test_contig_ixn(ca_str: &str, cb_str: &str, cab_str: Option<String>) -> () {
466
        let ca = ca_str.parse::<SeqContigStranded>().unwrap();
467
        let cb = cb_str.parse::<SeqContigStranded>().unwrap();
468
        match ca.contig_intersection(&cb) {
469
            None => assert_eq!(None, cab_str),
470
            Some(cab) => assert_eq!(Some(cab.to_string()), cab_str),
471
        };
472
    }
473

474
    #[test]
475
    fn test_display_fmt() {
476
        let tma19 = Contig::new(
477
            "chrXI".to_owned(),
478
            334412,
479
            334916 - 334412,
480
            ReqStrand::Reverse,
481
        );
482
        assert_eq!(format!("{}", tma19), "chrXI:334412-334916(-)");
483
    }
484

485
    #[test]
486
    fn intersection() {
487
        test_contig_ixn(
488
            "chrX:461829-462426(+)",
489
            "chrX:461800-461900(+)",
490
            Some("chrX:461829-461900(+)".to_owned()),
491
        );
492
        test_contig_ixn(
493
            "chrX:461829-462426(-)",
494
            "chrX:461800-461900(+)",
495
            Some("chrX:461829-461900(-)".to_owned()),
496
        );
497
        test_contig_ixn(
498
            "chrX:461829-462426(+)",
499
            "chrX:461800-461900(-)",
500
            Some("chrX:461829-461900(+)".to_owned()),
501
        );
502

503
        test_contig_ixn(
504
            "chrX:461829-462426(+)",
505
            "chrX:462000-463000(+)",
506
            Some("chrX:462000-462426(+)".to_owned()),
507
        );
508
        test_contig_ixn(
509
            "chrX:461829-462426(+)",
510
            "chrX:461000-463000(+)",
511
            Some("chrX:461829-462426(+)".to_owned()),
512
        );
513
        test_contig_ixn(
514
            "chrX:461829-462426(+)",
515
            "chrX:462000-462100(+)",
516
            Some("chrX:462000-462100(+)".to_owned()),
517
        );
518

519
        test_contig_ixn("chrX:461829-462426(+)", "chrX:461000-461500(+)", None);
520
        test_contig_ixn("chrX:461829-462426(+)", "chrX:463000-463500(+)", None);
521
        test_contig_ixn("chrX:461829-462426(+)", "chrV:461000-463000(+)", None);
522
    }
523
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc