• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / rust-debruijn / 14357227430

09 Apr 2025 12:32PM UTC coverage: 84.104% (-0.03%) from 84.13%
14357227430

push

github

evolp
fix " fontcolor

1 of 1 new or added line in 1 file covered. (100.0%)

18 existing lines in 1 file now uncovered.

6386 of 7593 relevant lines covered (84.1%)

4012867.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.2
/src/graph.rs
1
// Copyright 2017 10x Genomics
2

3
//! Containers for path-compressed De Bruijn graphs
4

5
use bit_set::BitSet;
6
use indicatif::ProgressBar;
7
use indicatif::ProgressIterator;
8
use indicatif::ProgressStyle;
9
use itertools::enumerate;
10
use log::{debug, trace};
11
use rayon::prelude::*;
12
use rayon::current_num_threads;
13
use serde_derive::{Deserialize, Serialize};
14
use smallvec::SmallVec;
15
use std::borrow::Borrow;
16

17
use std::collections::HashSet;
18
use std::collections::VecDeque;
19
use std::f32;
20
use std::fmt::{self, Debug, Display};
21
use std::fs::{remove_file, File};
22
use std::hash::Hash;
23
use std::io::BufWriter;
24
use std::io::{BufReader, Error, Read};
25
use std::io::Write;
26
use std::iter::FromIterator;
27
use std::marker::PhantomData;
28
use std::path::Path;
29

30
use boomphf::hashmap::BoomHashMap;
31

32
use serde_json;
33
use serde_json::Value;
34

35
type SmallVec4<T> = SmallVec<[T; 4]>;
36
type SmallVec8<T> = SmallVec<[T; 8]>;
37

38
use crate::colors::Colors;
39
use crate::compression::CompressionSpec;
40
use crate::dna_string::{DnaString, DnaStringSlice, PackedDnaStringSet};
41
use crate::summarizer::SummaryConfig;
42
use crate::summarizer::SummaryData;
43
use crate::BUF;
44
use crate::{Dir, Exts, Kmer, Mer, Vmer};
45

46
/// A compressed DeBruijn graph carrying auxiliary data on each node of type `D`.
47
/// This type does not carry the sorted index arrays the allow the graph
48
/// to be walked efficiently. The `DeBruijnGraph` type wraps this type and add those
49
/// vectors.
50
#[derive(Serialize, Deserialize, Clone, Debug)]
51
pub struct BaseGraph<K, D> {
52
    pub sequences: PackedDnaStringSet,
53
    pub exts: Vec<Exts>,
54
    pub data: Vec<D>,
55
    pub stranded: bool,
56
    phantom: PhantomData<K>,
57
}
58

59
impl<K, D> BaseGraph<K, D> {
60
    pub fn new(stranded: bool) -> Self {
2,360✔
61
        BaseGraph {
2,360✔
62
            sequences: PackedDnaStringSet::new(),
2,360✔
63
            exts: Vec::new(),
2,360✔
64
            data: Vec::new(),
2,360✔
65
            phantom: PhantomData,
2,360✔
66
            stranded,
2,360✔
67
        }
2,360✔
68
    }
2,360✔
69

70
    pub fn len(&self) -> usize {
820,171✔
71
        self.sequences.len()
820,171✔
72
    }
820,171✔
73

74
    pub fn is_empty(&self) -> bool {
1✔
75
        self.sequences.is_empty()
1✔
76
    }
1✔
77

78
    pub fn combine<I: Iterator<Item = BaseGraph<K, D>>>(graphs: I) -> Self {
13✔
79
        let mut sequences = PackedDnaStringSet::new();
13✔
80
        let mut exts = Vec::new();
13✔
81
        let mut data = Vec::new();
13✔
82
        let mut stranded = Vec::new();
13✔
83

84
        for g in graphs {
2,014✔
85
            for s in 0..g.sequences.len() {
10,758✔
86
                sequences.add(&g.sequences.get(s));
10,758✔
87
            }
10,758✔
88

89
            exts.extend(g.exts);
2,001✔
90
            data.extend(g.data);
2,001✔
91
            stranded.push(g.stranded);
2,001✔
92
        }
93

94
        let out_stranded = stranded.iter().all(|x| *x);
13✔
95

13✔
96
        if !out_stranded && !stranded.iter().all(|x| !*x) {
2,001✔
97
            panic!("attempted to combine stranded and unstranded graphs");
×
98
        }
13✔
99

13✔
100
        BaseGraph {
13✔
101
            sequences,
13✔
102
            stranded: out_stranded,
13✔
103
            exts,
13✔
104
            data,
13✔
105
            phantom: PhantomData,
13✔
106
        }
13✔
107
    }
13✔
108
}
109

110
impl<K: Kmer, D> BaseGraph<K, D> {
111
    pub fn add<R: Borrow<u8>, S: IntoIterator<Item = R>>(
784,811✔
112
        &mut self,
784,811✔
113
        sequence: S,
784,811✔
114
        exts: Exts,
784,811✔
115
        data: D,
784,811✔
116
    ) {
784,811✔
117
        self.sequences.add(sequence);
784,811✔
118
        self.exts.push(exts);
784,811✔
119
        self.data.push(data);
784,811✔
120
    }
784,811✔
121
}
122

123
impl<K: Kmer + Send + Sync, D> BaseGraph<K, D> {
124
    pub fn finish(self) -> DebruijnGraph<K, D> {
359✔
125
        let indices: Vec<u32> = (0..self.len() as u32).collect();
359✔
126

127
        let left_order = {
359✔
128
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
359✔
129
            for idx in &indices {
784,437✔
130
                kmers.push(self.sequences.get(*idx as usize).first_kmer());
784,078✔
131
                
784,078✔
132
            }
784,078✔
133
            
134
            BoomHashMap::new_parallel(kmers, indices.clone())
359✔
135
        };
136

137
        let right_order = {
359✔
138
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
359✔
139
            for idx in &indices {
784,437✔
140
                kmers.push(self.sequences.get(*idx as usize).last_kmer());
784,078✔
141
            }
784,078✔
142
            
143
            BoomHashMap::new_parallel(kmers, indices)
359✔
144
        };
359✔
145
        debug!("finish graph loops: 2x {}", self.len());
359✔
146

147
        DebruijnGraph {
359✔
148
            base: self,
359✔
149
            left_order,
359✔
150
            right_order,
359✔
151
        }
359✔
152
    }
359✔
153
}
154

155
impl<K: Kmer, D> BaseGraph<K, D> {
156
    pub fn finish_serial(self) -> DebruijnGraph<K, D> {
×
157
        let indices: Vec<u32> = (0..self.len() as u32).collect();
×
158

159
        let left_order = {
×
160
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
×
161
            let mut sequences: Vec<String> = Vec::new();
×
162
            for idx in &indices {
×
163
                kmers.push(self.sequences.get(*idx as usize).first_kmer());
×
164
                sequences.push(self.sequences.get(*idx as usize).to_dna_string());
×
165
            }
×
166
            println!("left kmers: {:?}", kmers);
×
167
            println!("left seqs: {:?}", sequences);
×
168
            BoomHashMap::new(kmers, indices.clone())
×
169
        };
170

171
        let right_order = {
×
172
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
×
173
            let mut sequences: Vec<String> = Vec::new();
×
174
            for idx in &indices {
×
175
                kmers.push(self.sequences.get(*idx as usize).last_kmer());
×
176
                sequences.push(self.sequences.get(*idx as usize).to_dna_string());
×
177
            }
×
178
            println!("right kmers: {:?}", kmers);
×
179
            println!("right seqs: {:?}", sequences);
×
180
            BoomHashMap::new(kmers, indices)
×
181
        };
×
182

×
183
        DebruijnGraph {
×
184
            base: self,
×
185
            left_order,
×
186
            right_order,
×
187
        }
×
188
    }
×
189

190
    pub fn remove_duplicate_kmers(self) {
×
191
        
×
192
    }
×
193
}
194

195
/// A compressed DeBruijn graph carrying auxiliary data on each node of type `D`.
196
/// The struct carries sorted index arrays the allow the graph
197
/// to be walked efficiently.
198
#[derive(Serialize, Deserialize, Debug)]
199
pub struct DebruijnGraph<K: Hash, D> {
200
    pub base: BaseGraph<K, D>,
201
    left_order: BoomHashMap<K, u32>,
202
    right_order: BoomHashMap<K, u32>,
203
}
204

205
impl<K: Kmer, D: Debug> DebruijnGraph<K, D> {
206
    /// Total number of nodes in the DeBruijn graph
207
    pub fn len(&self) -> usize {
819,070✔
208
        self.base.len()
819,070✔
209
    }
819,070✔
210

211
    pub fn is_empty(&self) -> bool {
1✔
212
        self.base.is_empty()
1✔
213
    }
1✔
214

215
    /// Get a node given it's `node_id`
216
    pub fn get_node(&self, node_id: usize) -> Node<K, D> {
4,892,888✔
217
        Node {
4,892,888✔
218
            node_id,
4,892,888✔
219
            graph: self,
4,892,888✔
220
        }
4,892,888✔
221
    }
4,892,888✔
222

223
    /// Get a node given it's `node_id`
224
    pub fn get_node_kmer(&self, node_id: usize) -> NodeKmer<K, D> {
×
225
        let node = self.get_node(node_id);
×
226
        let node_seq = node.sequence();
×
227

×
228
        NodeKmer {
×
229
            node_id,
×
230
            node_seq_slice: node_seq,
×
231
            phantom_d: PhantomData,
×
232
            phantom_k: PhantomData,
×
233
        }
×
234
    }
×
235

236
    /// Return an iterator over all nodes in the graph
237
    pub fn iter_nodes(&self) -> NodeIter<K, D> {
129✔
238
        NodeIter {
129✔
239
            graph: self,
129✔
240
            node_id: 0,
129✔
241
        }
129✔
242
    }
129✔
243

244
    /// Find the edges leaving node `node_id` in direction `Dir`. Should generally be
245
    /// accessed via a Node wrapper object
246
    fn find_edges(&self, node_id: usize, dir: Dir) -> SmallVec4<(u8, usize, Dir, bool)> {
480,519✔
247
        let exts = self.base.exts[node_id];
480,519✔
248
        let sequence = self.base.sequences.get(node_id);
480,519✔
249
        let kmer: K = sequence.term_kmer(dir);
480,519✔
250
        let mut edges = SmallVec4::new();
480,519✔
251

252
        for i in 0..4 {
2,402,595✔
253
            if exts.has_ext(dir, i) {
1,922,076✔
254
                let link = self.find_link(kmer.extend(i, dir), dir); //.expect("missing link");
810,398✔
255
                if let Some(l) = link {
810,398✔
256
                    edges.push((i, l.0, l.1, l.2));
810,398✔
257
                }
810,398✔
258
                // Otherwise, this edge doesn't exist within this shard, so ignore it.
259
                // NOTE: this should be allowed in a 'complete' DBG
260
            }
1,111,678✔
261
        }
262

263
        edges
480,519✔
264
    }
480,519✔
265

266
    /// Seach for the kmer `kmer`, appearing at the given `side` of a node sequence.
267
    fn search_kmer(&self, kmer: K, side: Dir) -> Option<usize> {
4,483,024✔
268
        match side {
4,483,024✔
269
            Dir::Left => self.left_order.get(&kmer).map(|pos| *pos as usize),
2,239,913✔
270
            Dir::Right => self.right_order.get(&kmer).map(|pos| *pos as usize),
2,243,111✔
271
        }
272
    }
4,483,024✔
273

274
    /// Find a link in the graph, possibly handling a RC switch.
275
    pub fn find_link(&self, kmer: K, dir: Dir) -> Option<(usize, Dir, bool)> {
3,128,396✔
276
        // Only test self-consistent paths through
3,128,396✔
277
        // the edges
3,128,396✔
278
        // Avoids problems due to single kmer edges
3,128,396✔
279
        // (dir, next_side_incoming, rc)
3,128,396✔
280
        // (Dir::Left, Dir::Right, false) => true,
3,128,396✔
281
        // (Dir::Left, Dir::Left,  true) => true,
3,128,396✔
282
        // (Dir::Right, Dir::Left, false) => true,
3,128,396✔
283
        // (Dir::Right, Dir::Right, true) => true,
3,128,396✔
284

3,128,396✔
285
        let rc = kmer.rc();
3,128,396✔
286

3,128,396✔
287
        match dir {
3,128,396✔
288
            Dir::Left => {
289
                if let Some(idx) = self.search_kmer(kmer, Dir::Right) {
1,567,156✔
290
                    return Some((idx, Dir::Right, false));
888,483✔
291
                }
678,673✔
292

678,673✔
293
                if !self.base.stranded {
678,673✔
294
                    if let Some(idx) = self.search_kmer(rc, Dir::Left) {
678,673✔
295
                        return Some((idx, Dir::Left, true));
678,673✔
296
                    }
×
297
                }
×
298
            }
299

300
            Dir::Right => {
301
                if let Some(idx) = self.search_kmer(kmer, Dir::Left) {
1,561,240✔
302
                    return Some((idx, Dir::Left, false));
885,285✔
303
                }
675,955✔
304

675,955✔
305
                if !self.base.stranded {
675,955✔
306
                    if let Some(idx) = self.search_kmer(rc, Dir::Right) {
675,955✔
307
                        return Some((idx, Dir::Right, true));
675,955✔
308
                    }
×
309
                }
×
310
            }
311
        }
312

313
        None
×
314
    }
3,128,396✔
315

316
    /// Check whether the graph is fully compressed. Return `None` if it's compressed,
317
    /// otherwise return `Some(node1, node2)` representing a pair of node that could
318
    /// be collapsed. Probably only useful for testing.
319
    pub fn is_compressed<S: CompressionSpec<D>>(&self, spec: &S) -> Option<(usize, usize)> {
567✔
320
        for i in 0..self.len() {
102,276✔
321
            let n = self.get_node(i);
102,276✔
322

323
            for dir in &[Dir::Left, Dir::Right] {
306,828✔
324
                let dir_edges = n.edges(*dir);
204,552✔
325
                if dir_edges.len() == 1 {
204,552✔
326
                    let (_, next_id, return_dir, _) = dir_edges[0];
145,201✔
327
                    let next = self.get_node(next_id);
145,201✔
328

145,201✔
329
                    let ret_edges = next.edges(return_dir);
145,201✔
330
                    if ret_edges.len() == 1 {
145,201✔
331
                        // Test for us being a palindrome: this makes it OK
332
                        if n.len() == K::k() && n.sequence().first_kmer::<K>().is_palindrome() {
18✔
333
                            continue;
12✔
334
                        }
6✔
335

6✔
336
                        // Test for a neighbor being a palindrome: this makes it OK
6✔
337
                        if next.len() == K::k() && next.sequence().first_kmer::<K>().is_palindrome()
6✔
338
                        {
339
                            continue;
6✔
UNCOV
340
                        }
×
UNCOV
341

×
UNCOV
342
                        // Test for this edge representing a smooth circle (biting it's own tail):
×
UNCOV
343
                        if n.node_id == next_id {
×
UNCOV
344
                            continue;
×
345
                        }
×
346

×
347
                        if spec.join_test(n.data(), next.data()) {
×
348
                            // Found a unbranched edge that should have been eliminated
349
                            return Some((i, next_id));
×
350
                        }
×
351
                    }
145,183✔
352
                }
59,351✔
353
            }
354
        }
355

356
        None
567✔
357
    }
567✔
358

359
    /// Remove non-existent extensions that may be created due to filtered kmers
360
    pub fn fix_exts(&mut self, valid_nodes: Option<&BitSet>) {
246✔
361
        for i in 0..self.len() {
764,223✔
362
            let valid_exts = self.get_valid_exts(i, valid_nodes);
764,223✔
363
            self.base.exts[i] = valid_exts;
764,223✔
364
        }
764,223✔
365
    }
246✔
366

367
    pub fn get_valid_exts(&self, node_id: usize, valid_nodes: Option<&BitSet>) -> Exts {
764,223✔
368
        let mut new_exts = Exts::empty();
764,223✔
369
        let node = self.get_node(node_id);
764,223✔
370
        let exts = node.exts();
764,223✔
371
        let l_kmer: K = node.sequence().first_kmer();
764,223✔
372
        let r_kmer: K = node.sequence().last_kmer();
764,223✔
373

764,223✔
374
        let check_node = |id| match valid_nodes {
1,567,050✔
375
            Some(bs) => bs.contains(id),
1,501,972✔
376
            None => true,
65,078✔
377
        };
1,567,050✔
378

379
        for i in 0..4 {
3,821,115✔
380
            if exts.has_ext(Dir::Left, i) {
3,056,892✔
381
                match self.find_link(l_kmer.extend_left(i), Dir::Left) {
783,511✔
382
                    Some((target, _, _)) if check_node(target) => {
783,511✔
383
                        new_exts = new_exts.set(Dir::Left, i)
783,511✔
384
                    }
385
                    _ => (),
×
386
                }
387
            }
2,273,381✔
388

389
            if exts.has_ext(Dir::Right, i) {
3,056,892✔
390
                match self.find_link(r_kmer.extend_right(i), Dir::Right) {
783,539✔
391
                    Some((target, _, _)) if check_node(target) => {
783,539✔
392
                        new_exts = new_exts.set(Dir::Right, i)
783,539✔
393
                    }
394
                    _ => (),
×
395
                }
396
            }
2,273,353✔
397
        }
398

399
        new_exts
764,223✔
400
    }
764,223✔
401

402
    /// Find the highest-scoring, unambiguous path in the graph. Each node get a score
403
    /// given by `score`. Any node where `solid_path(node) == True` are valid paths -
404
    /// paths will be terminated if there are multiple valid paths emanating from a node.
405
    pub fn max_path<F, F2>(&self, score: F, solid_path: F2) -> Vec<(usize, Dir)>
×
406
    where
×
407
        F: Fn(&D) -> f32,
×
408
        F2: Fn(&D) -> bool,
×
409
    {
×
410
        if self.is_empty() {
×
411
            return Vec::default();
×
412
        }
×
413

×
414
        let mut best_node = 0;
×
415
        let mut best_score = f32::MIN;
×
416
        for i in 0..self.len() {
×
417
            let node = self.get_node(i);
×
418
            let node_score = score(node.data());
×
419

×
420
            if node_score > best_score {
×
421
                best_node = i;
×
422
                best_score = node_score;
×
423
            }
×
424
        }
425

426
        let oscore = |state| match state {
×
427
            None => 0.0,
×
428
            Some((id, _)) => score(self.get_node(id).data()),
×
429
        };
×
430

431
        let osolid_path = |state| match state {
×
432
            None => false,
×
433
            Some((id, _)) => solid_path(self.get_node(id).data()),
×
434
        };
×
435

436
        // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
437
        // already put into the path
438
        let mut used_nodes = HashSet::new();
×
439
        let mut path = VecDeque::new();
×
440

×
441
        // Start w/ initial state
×
442
        used_nodes.insert(best_node);
×
443
        path.push_front((best_node, Dir::Left));
×
444

445
        for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
×
446
            let &(start_node, dir, do_flip) = init;
×
447
            let mut current = (start_node, dir);
×
448

449
            loop {
450
                let mut next = None;
×
451
                let (cur_id, incoming_dir) = current;
×
452
                let node = self.get_node(cur_id);
×
453
                let edges = node.edges(incoming_dir.flip());
×
454

×
455
                let mut solid_paths = 0;
×
456
                for (_, id, dir, _) in edges {
×
457
                    let cand = Some((id, dir));
×
458
                    if osolid_path(cand) {
×
459
                        solid_paths += 1;
×
460
                    }
×
461

462
                    if oscore(cand) > oscore(next) {
×
463
                        next = cand;
×
464
                    }
×
465
                }
466

467
                if solid_paths > 1 {
×
468
                    break;
×
469
                }
×
470

471
                match next {
×
472
                    Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
×
473
                        if do_flip {
×
474
                            path.push_front((next_id, next_incoming.flip()));
×
475
                        } else {
×
476
                            path.push_back((next_id, next_incoming));
×
477
                        }
×
478

479
                        used_nodes.insert(next_id);
×
480
                        current = (next_id, next_incoming);
×
481
                    }
482
                    _ => break,
×
483
                }
484
            }
485
        }
486

487
        Vec::from_iter(path)
×
488
    }
×
489

490

491
    /// Find the highest-scoring, unambiguous path in the graph. Each node get a score
492
    /// given by `score`. Any node where `solid_path(node) == True` are valid paths -
493
    /// paths will be terminated if there are multiple valid paths emanating from a node.
494
    /// Returns vec with path for each component
495
    pub fn max_path_comp<F, F2>(&self, score: F, solid_path: F2) -> Vec<VecDeque<(usize, Dir)>>
1✔
496
    where
1✔
497
        F: Fn(&D) -> f32,
1✔
498
        F2: Fn(&D) -> bool,
1✔
499
    {
1✔
500
        if self.is_empty() {
1✔
501
            let vec: Vec<VecDeque<(usize, Dir)>> = Vec::new();
×
502
            return vec;
×
503
        }
1✔
504

1✔
505
        let components = self.iter_components();
1✔
506
        let mut paths: Vec<VecDeque<(usize, Dir)>> = Vec::new();
1✔
507

508
        for component in components {
3✔
509

510
            let current_comp = &component;
2✔
511
            
2✔
512

2✔
513
            let mut best_node = current_comp[0];
2✔
514
            let mut best_score = f32::MIN;
2✔
515
            for c in current_comp.iter() {
6✔
516
                let node = self.get_node(*c);
6✔
517
                let node_score = score(node.data());
6✔
518

6✔
519
                if node_score > best_score {
6✔
520
                    best_node = *c;
2✔
521
                    best_score = node_score;
2✔
522
                }
4✔
523
            }
524

525
            let oscore = |state| match state {
4✔
526
                None => 0.0,
2✔
527
                Some((id, _)) => score(self.get_node(id).data()),
2✔
528
            };
4✔
529

530
            let osolid_path = |state| match state {
2✔
531
                None => false,
×
532
                Some((id, _)) => solid_path(self.get_node(id).data()),
2✔
533
            };
2✔
534

535
            // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
536
            // already put into the path
537
            let mut used_nodes = HashSet::new();
2✔
538
            let mut path = VecDeque::new();
2✔
539

2✔
540
            // Start w/ initial state
2✔
541
            used_nodes.insert(best_node);
2✔
542
            path.push_front((best_node, Dir::Left));
2✔
543

544
            for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
4✔
545
                let &(start_node, dir, do_flip) = init;
4✔
546
                let mut current = (start_node, dir);
4✔
547

548
                loop {
549
                    let mut next = None;
6✔
550
                    let (cur_id, incoming_dir) = current;
6✔
551
                    let node = self.get_node(cur_id);
6✔
552
                    let edges = node.edges(incoming_dir.flip());
6✔
553

6✔
554
                    let mut solid_paths = 0;
6✔
555
                    for (_, id, dir, _) in edges {
8✔
556
                        let cand = Some((id, dir));
2✔
557
                        if osolid_path(cand) {
2✔
558
                            solid_paths += 1;
2✔
559
                        }
2✔
560

561
                        if oscore(cand) > oscore(next) {
2✔
562
                            next = cand;
2✔
563
                        }
2✔
564
                    }
565

566
                    if solid_paths > 1 {
6✔
UNCOV
567
                        break;
×
568
                    }
6✔
569

570
                    match next {
2✔
571
                        Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
2✔
572
                            if do_flip {
2✔
573
                                path.push_front((next_id, next_incoming.flip()));
2✔
574
                            } else {
2✔
575
                                path.push_back((next_id, next_incoming));
×
576
                            }
×
577

578
                            used_nodes.insert(next_id);
2✔
579
                            current = (next_id, next_incoming);
2✔
580
                        }
581
                        _ => break,
4✔
582
                    }
583
                }
584
            }
585
            
586
            paths.push(path);
2✔
587
            //paths.push(Vec::from_iter(path));
588
        }
589

590
        paths
1✔
591
    
592
    }
1✔
593

594
    pub fn iter_max_path_comp<F, F2>(&self, score: F, solid_path: F2) -> PathCompIter<K, D, F, F2> 
2✔
595
    where 
2✔
596
    F: Fn(&D) -> f32,
2✔
597
    F2: Fn(&D) -> bool
2✔
598
    {
2✔
599
        let component_iterator = self.iter_components();
2✔
600
        PathCompIter { graph: self, component_iterator, graph_pos: 0, score, solid_path }
2✔
601
    }
2✔
602

603
    /// write the paths from `iter_max_path_comp` to a fasta file
604
    pub fn path_to_fasta<F, F2>(&self, f: &mut dyn std::io::Write, path_iter: PathCompIter<K, D, F, F2>, return_lens: bool) -> (Vec<usize>, Vec<usize>)
1✔
605
    where 
1✔
606
    F: Fn(&D) -> f32,
1✔
607
    F2: Fn(&D) -> bool
1✔
608
    {
1✔
609
        // width of fasta lines
1✔
610
        let columns = 80;
1✔
611

1✔
612
        // sizes of components and of paths
1✔
613
        let mut comp_sizes = Vec::new();
1✔
614
        let mut path_lens = Vec::new();
1✔
615

616
        for (seq_counter, (component, path)) in path_iter.enumerate() {
2✔
617
            // get dna sequence from path
618
            let seq = self.sequence_of_path(path.iter());
2✔
619

2✔
620
            //write header with length & start node
2✔
621
            writeln!(f, ">path{} len={} start_node={}", seq_counter, seq.len(), path[0].0).unwrap();
2✔
622

2✔
623
            // calculate how sequence has to be split up
2✔
624
            let slices = (seq.len() / columns) + 1;
2✔
625
            let mut ranges = Vec::with_capacity(slices);
2✔
626

2✔
627
            let mut start = 0;
2✔
628
            while start < seq.len() {
5✔
629
                ranges.push(start..start + columns);
3✔
630
                start += columns;
3✔
631
            }
3✔
632

633
            let last_start = ranges.pop().expect("no kmers in parallel ranges").start;
2✔
634
            ranges.push(last_start..seq.len());
2✔
635

636
            // split up sequence and write to file accordingly
637
            for range in ranges {
5✔
638
                writeln!(f, "{:?}", seq.slice(range.start, range.end)).unwrap();
3✔
639
            }
3✔
640

641
            if return_lens {
2✔
642
                comp_sizes.push(component.len());
×
643
                path_lens.push(path.len());
×
644
            }
2✔
645
        }    
646

647
        (comp_sizes, path_lens)
1✔
648
        
1✔
649
    }
1✔
650

651

652
    /// Get the sequence of a path through the graph. The path is given as a sequence of node_id integers
653
    pub fn sequence_of_path<'a, I: 'a + Iterator<Item = &'a (usize, Dir)>>(
22,894✔
654
        &self,
22,894✔
655
        path: I,
22,894✔
656
    ) -> DnaString {
22,894✔
657
        let mut seq = DnaString::new();
22,894✔
658

659
        for (idx, &(node_id, dir)) in path.enumerate() {
741,347✔
660
            let start = if idx == 0 { 0 } else { K::k() - 1 };
741,347✔
661

662
            let node_seq = match dir {
741,347✔
663
                Dir::Left => self.get_node(node_id).sequence(),
385,481✔
664
                Dir::Right => self.get_node(node_id).sequence().rc(),
355,866✔
665
            };
666

667
            for p in start..node_seq.len() {
1,523,803✔
668
                seq.push(node_seq.get(p))
1,523,803✔
669
            }
670
        }
671

672
        seq
22,894✔
673
    }
22,894✔
674

675
    /// write a node to a dot file
676
    /// 
677
    /// ### Arguments: 
678
    /// 
679
    /// * `node`: [`Node<K, D>`] which will be written to a dot file
680
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
681
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
682
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
683
    /// * `f`: writer
684
    fn node_to_dot<FN: Fn(&Node<K, D>) -> String, FE: Fn(&Node<K, D>, u8, Dir, bool) -> String>(
65,316✔
685
        &self,
65,316✔
686
        node: &Node<'_, K, D>,
65,316✔
687
        node_label: &FN,
65,316✔
688
        edge_label: &FE,
65,316✔
689
        f: &mut dyn Write,
65,316✔
690
    ) {
65,316✔
691
        writeln!(f, "n{} {}", node.node_id, node_label(node)).unwrap();
65,316✔
692

693
        for (base, id, incoming_dir, flipped) in node.l_edges() {
65,316✔
694

65,058✔
695
            writeln!(f, "n{} -> n{} {}", id, node.node_id, edge_label(node, base, incoming_dir, flipped)).unwrap();
65,058✔
696
        }
65,058✔
697

698
        for (base, id, incoming_dir, flipped) in node.r_edges() {
65,316✔
699
            writeln!(f, "n{} -> n{} {}", node.node_id, id, edge_label(node, base, incoming_dir, flipped)).unwrap();
64,774✔
700
        }
64,774✔
701
    }
65,316✔
702

703
    /// Write the graph to a dot file
704
    /// 
705
    /// ### Arguments: 
706
    /// 
707
    /// * `path`: path to the output file
708
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
709
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
710
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
711
    pub fn to_dot<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE) 
1✔
712
    where 
1✔
713
    P: AsRef<Path>,
1✔
714
    FN: Fn(&Node<K, D>) -> String,
1✔
715
    FE: Fn(&Node<K, D>, u8, Dir, bool) -> String,
1✔
716
    {
1✔
717
        let mut f = BufWriter::with_capacity(BUF, File::create(path).expect("error creating dot file"));
1✔
718

1✔
719
        let pb = ProgressBar::new(self.len() as u64);
1✔
720
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
1✔
721
        pb.set_message(format!("{:<32}", "writing graph to DOT file"));
1✔
722

1✔
723
        writeln!(&mut f, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
1✔
724
        for i in (0..self.len()).progress_with(pb) {
32,658✔
725
            self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
32,658✔
726
        }
32,658✔
727
        writeln!(&mut f, "}}").unwrap();
1✔
728
        
1✔
729
        f.flush().unwrap();
1✔
730
        debug!("large to dot loop: {}", self.len());
1✔
731
    }
1✔
732

733
    /// Write the graph to a dot file in parallel
734
    /// Will write in to n_threads files simultaniously,
735
    /// then go though the files and add the contents to a larger file, 
736
    /// and delete the small files.
737
    /// 
738
    /// ### Arguments: 
739
    /// 
740
    /// * `path`: path to the output file
741
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
742
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
743
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
744
    pub fn to_dot_parallel<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE) 
1✔
745
    where 
1✔
746
        D: Sync,
1✔
747
        K: Sync,
1✔
748
        P: AsRef<Path> + Display + Sync,
1✔
749
        FN: Fn(&Node<K, D>) -> String + Sync,
1✔
750
        FE: Fn(&Node<K, D>, u8, Dir, bool) -> String + Sync,
1✔
751
    {        
1✔
752
        let slices = current_num_threads();
1✔
753
        let n_nodes = self.len();
1✔
754
        let sz = n_nodes / slices + 1;
1✔
755

1✔
756
        debug!("n_nodes: {}", n_nodes);
1✔
757
        debug!("sz: {}", sz);
1✔
758

759
        let mut parallel_ranges = Vec::with_capacity(slices);
1✔
760
        let mut start = 0;
1✔
761
        while start < n_nodes {
5✔
762
            parallel_ranges.push(start..start + sz);
4✔
763
            start += sz;
4✔
764
        }
4✔
765

766
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
1✔
767
        parallel_ranges.push(last_start..n_nodes);
1✔
768
        debug!("parallel ranges: {:?}", parallel_ranges);
1✔
769

770
        let mut files = Vec::with_capacity(current_num_threads());
1✔
771

772
        for i in 0..parallel_ranges.len() {
4✔
773
            files.push(format!("{}-{}.dot", path, i));
4✔
774
        } 
4✔
775

776
        let pb = ProgressBar::new(self.len() as u64);
1✔
777
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
1✔
778
        pb.set_message(format!("{:<32}", "writing graph to DOT files"));
1✔
779
    
1✔
780
        parallel_ranges.into_par_iter().enumerate().for_each(|(i, range)| {
4✔
781
            let mut f = BufWriter::with_capacity(BUF, File::create(&files[i]).expect("error creating parallel dot file"));
4✔
782

783
            for i in range {
32,662✔
784
                self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
32,658✔
785
                pb.inc(1);
32,658✔
786
            }
32,658✔
787

788
            f.flush().unwrap();
4✔
789
        });
4✔
790
        pb.finish_and_clear();
1✔
791

1✔
792
        let mut out_file = BufWriter::with_capacity(BUF, File::create(path).expect("error creating combined dot file"));
1✔
793

1✔
794
        writeln!(&mut out_file, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
1✔
795

1✔
796
        let pb = ProgressBar::new(files.len() as u64);
1✔
797
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
1✔
798
        pb.set_message(format!("{:<32}", "combining files"));
1✔
799

800
        for file in files.iter().progress_with(pb) {
4✔
801
            let open_file = File::open(file).expect("error opening parallel dot file");
4✔
802
            let mut reader = BufReader::new(open_file);
4✔
803
            let mut buffer = [0; BUF];
4✔
804

805
            loop {
806
                let linecount = reader.read(&mut buffer).unwrap();
220✔
807
                if linecount == 0 { break }
220✔
808
                out_file.write_all(&buffer[..linecount]).unwrap();
216✔
809
            }
810

811
            remove_file(file).unwrap();
4✔
812
        }
813

814
        writeln!(&mut out_file, "}}").unwrap();
1✔
815

1✔
816
        out_file.flush().unwrap();
1✔
817

1✔
818

1✔
819
    }
1✔
820

821
    /// Write part of the graph to a dot file
822
    /// 
823
    /// ### Arguments: 
824
    /// 
825
    /// * `path`: path to the output file
826
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
827
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
828
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
829
    /// * `nodes`: [`Vec<usize>`] listing all IDs of nodes which should be included
830
    pub fn to_dot_partial<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE, nodes: Vec<usize>) 
×
831
    where 
×
832
        P: AsRef<Path>,
×
833
        FN: Fn(&Node<K, D>) -> String,
×
834
        FE: Fn(&Node<K, D>, u8, Dir, bool) -> String,
×
835
    {
×
836
        let mut f = BufWriter::with_capacity(BUF, File::create(path).expect("error creating dot file"));
×
837

×
838
        let pb = ProgressBar::new(nodes.len() as u64);
×
839
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
×
840
        pb.set_message(format!("{:<32}", "writing graph to DOT file"));
×
841

×
842
        writeln!(&mut f, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
×
843
        for i in nodes.into_iter().progress_with(pb) {
×
844
            self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
×
845
        }
×
846
        writeln!(&mut f, "}}").unwrap();
×
847

×
848
        f.flush().unwrap();
×
849

×
850
        debug!("large to dot loop: {}", self.len());
×
851
    }
×
852

853
    fn node_to_gfa<F: Fn(&Node<'_, K, D>) -> String>(
12✔
854
        &self,
12✔
855
        node: &Node<'_, K, D>,
12✔
856
        w: &mut dyn Write,
12✔
857
        tag_func: Option<&F>,
12✔
858
    ) -> Result<(), Error> {
12✔
859
        match tag_func {
12✔
860
            Some(f) => {
12✔
861
                let tags = (f)(node);
12✔
862
                writeln!(
12✔
863
                    w,
12✔
864
                    "S\t{}\t{}\t{}",
12✔
865
                    node.node_id,
12✔
866
                    node.sequence().to_dna_string(),
12✔
867
                    tags
12✔
868
                )?;
12✔
869
            }
870
            _ => writeln!(
×
871
                w,
×
872
                "S\t{}\t{}",
×
873
                node.node_id,
×
874
                node.sequence().to_dna_string()
×
875
            )?,
×
876
        }
877

878
        for (_, target, dir, _) in node.l_edges() {
12✔
879
            if target >= node.node_id {
12✔
880
                let to_dir = match dir {
8✔
881
                    Dir::Left => "+",
4✔
882
                    Dir::Right => "-",
4✔
883
                };
884
                writeln!(
8✔
885
                    w,
8✔
886
                    "L\t{}\t-\t{}\t{}\t{}M",
8✔
887
                    node.node_id,
8✔
888
                    target,
8✔
889
                    to_dir,
8✔
890
                    K::k() - 1
8✔
891
                )?;
8✔
892
            }
4✔
893
        }
894

895
        for (_, target, dir, _) in node.r_edges() {
12✔
896
            if target > node.node_id {
4✔
UNCOV
897
                let to_dir = match dir {
×
UNCOV
898
                    Dir::Left => "+",
×
UNCOV
899
                    Dir::Right => "-",
×
900
                };
UNCOV
901
                writeln!(
×
UNCOV
902
                    w,
×
UNCOV
903
                    "L\t{}\t+\t{}\t{}\t{}M",
×
UNCOV
904
                    node.node_id,
×
UNCOV
905
                    target,
×
UNCOV
906
                    to_dir,
×
UNCOV
907
                    K::k() - 1
×
UNCOV
908
                )?;
×
909
            }
4✔
910
        }
911

912
        Ok(())
12✔
913
    }
12✔
914

915
    /// Write the graph to GFA format
916
    pub fn to_gfa<P: AsRef<Path>>(&self, gfa_out: P) -> Result<(), Error> {
×
917
        let wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creating gfa file"));
×
918
        self.write_gfa(&mut std::io::BufWriter::new(wtr))
×
919
    }
×
920

921
    pub fn write_gfa(&self, wtr: &mut impl Write) -> Result<(), Error> {
×
922
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
×
923

924
        type DummyFn<K, D> = fn(&Node<'_, K, D>) -> String;
925
        let dummy_opt: Option<&DummyFn<K, D>> = None;
×
926

×
927
        let pb = ProgressBar::new(self.len() as u64);
×
928
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
×
929
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
×
930

931
        for i in (0..self.len()).progress_with(pb) {
×
932
            let n = self.get_node(i);
×
933
            self.node_to_gfa(&n, wtr, dummy_opt)?;
×
934
        }
935

936
        wtr.flush().unwrap();
×
937

×
938
        Ok(())
×
939
    }
×
940

941
    /// Write the graph to GFA format
942
    pub fn to_gfa_with_tags<P: AsRef<Path>, F: Fn(&Node<'_, K, D>) -> String>(
1✔
943
        &self,
1✔
944
        gfa_out: P,
1✔
945
        tag_func: F,
1✔
946
    ) -> Result<(), Error> {
1✔
947
        let mut wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creatinf gfa file"));
1✔
948
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
1✔
949

950
        let pb = ProgressBar::new(self.len() as u64);
1✔
951
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
1✔
952
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
1✔
953

954
        for i in (0..self.len()).progress_with(pb) {
6✔
955
            let n = self.get_node(i);
6✔
956
            self.node_to_gfa(&n, &mut wtr, Some(&tag_func))?;
6✔
957
        }
958

959
        wtr.flush().unwrap();
1✔
960

1✔
961
        Ok(())
1✔
962
    }
1✔
963

964
    /// Write the graph to GFA format, with multithreading, 
965
    /// pass `tag_func=None` to write without tags
966
    pub fn to_gfa_otags_parallel<P: AsRef<Path> + Display + Sync, F: Fn(&Node<'_, K, D>) -> String + Sync>(
1✔
967
        &self,
1✔
968
        gfa_out: P,
1✔
969
        tag_func: Option<&F>,
1✔
970
    ) -> Result<(), Error> 
1✔
971
    where 
1✔
972
    K: Sync,
1✔
973
    D: Sync,
1✔
974
    {
1✔
975
        // split into ranges according to thread count
1✔
976
        let slices = current_num_threads();
1✔
977
        let n_nodes = self.len();
1✔
978
        let sz = n_nodes / slices + 1;
1✔
979

1✔
980
        debug!("n_nodes: {}", n_nodes);
1✔
981
        debug!("sz: {}", sz);
1✔
982

983
        let mut parallel_ranges = Vec::with_capacity(slices);
1✔
984
        let mut start = 0;
1✔
985
        while start < n_nodes {
4✔
986
            parallel_ranges.push(start..start + sz);
3✔
987
            start += sz;
3✔
988
        }
3✔
989

990
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
1✔
991
        parallel_ranges.push(last_start..n_nodes);
1✔
992
        debug!("parallel ranges: {:?}", parallel_ranges);
1✔
993

994
        let mut files = Vec::with_capacity(current_num_threads());
1✔
995

996
        for i in 0..parallel_ranges.len() {
3✔
997
            files.push(format!("{}-{}.gfa", gfa_out, i));
3✔
998
        } 
3✔
999

1000
        let pb = ProgressBar::new(self.len() as u64);
1✔
1001
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
1✔
1002
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
1✔
1003
        
1✔
1004
        
1✔
1005
        parallel_ranges.into_par_iter().enumerate().for_each(|(i, range)| {
3✔
1006
            let mut wtr = BufWriter::with_capacity(BUF, File::create(&files[i]).expect("error creating parallel gfa file"));
3✔
1007

1008
            for i in range {
9✔
1009
                let n = self.get_node(i);
6✔
1010
                self.node_to_gfa(&n, &mut wtr, tag_func).unwrap();
6✔
1011
                pb.inc(1);
6✔
1012
            }
6✔
1013

1014
            wtr.flush().unwrap();
3✔
1015
        });
3✔
1016

1✔
1017
        pb.finish_and_clear();
1✔
1018

1✔
1019
        // combine files
1✔
1020
        let mut out_file = BufWriter::with_capacity(BUF, File::create(format!("{}.gfa", gfa_out)).expect("error creating combined gfa file"));
1✔
1021
        writeln!(out_file, "H\tVN:Z:debruijn-rs")?;
1✔
1022

1023
        let pb = ProgressBar::new(files.len() as u64);
1✔
1024
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
1✔
1025
        pb.set_message(format!("{:<32}", "combining files"));
1✔
1026

1027
        for file in files.iter() {
3✔
1028
            let open_file = File::open(file).expect("error opening parallel gfa file");
3✔
1029
            let mut reader = BufReader::new(open_file);
3✔
1030
            let mut buffer = [0; BUF];
3✔
1031

1032
            loop {
1033
                let linecount = reader.read(&mut buffer).unwrap();
6✔
1034
                if linecount == 0 { break }
6✔
1035
                out_file.write_all(&buffer[..linecount]).unwrap();
3✔
1036
            }
1037

1038
            remove_file(file).unwrap();
3✔
1039
        }
1040

1041
        out_file.flush().unwrap();
1✔
1042

1✔
1043
        Ok(())
1✔
1044
    }
1✔
1045

1046
    /// Write the graph to GFA format
1047
    pub fn to_gfa_partial<P: AsRef<Path>, F: Fn(&Node<'_, K, D>) -> String>(&self, gfa_out: P, tag_func: Option<&F>, nodes: Vec<usize>) -> Result<(), Error> {
×
1048
        let mut wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creating gfa file"));
×
1049
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
×
1050

1051
        let pb = ProgressBar::new(self.len() as u64);
×
1052
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
×
1053
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
×
1054

1055
        for i in nodes.into_iter().progress_with(pb) {
×
1056
            let n = self.get_node(i);
×
1057
            self.node_to_gfa(&n, &mut wtr, tag_func)?;
×
1058
        }
1059

1060
        wtr.flush().unwrap();
×
1061

×
1062
        Ok(())    
×
1063
    }
×
1064

1065
    pub fn to_json_rest<W: Write, F: Fn(&D) -> Value>(
×
1066
        &self,
×
1067
        fmt_func: F,
×
1068
        mut writer: &mut W,
×
1069
        rest: Option<Value>,
×
1070
    ) {
×
1071
        writeln!(writer, "{{\n\"nodes\": [").unwrap();
×
1072
        for i in 0..self.len() {
×
1073
            let node = self.get_node(i);
×
1074
            node.to_json(&fmt_func, writer);
×
1075
            if i == self.len() - 1 {
×
1076
                writeln!(writer).unwrap();
×
1077
            } else {
×
1078
                writeln!(writer, ",").unwrap();
×
1079
            }
×
1080
        }
1081
        writeln!(writer, "],").unwrap();
×
1082

×
1083
        writeln!(writer, "\"links\": [").unwrap();
×
1084
        for i in 0..self.len() {
×
1085
            let node = self.get_node(i);
×
1086
            match node.edges_to_json(writer) {
×
1087
                true => {
1088
                    if i == self.len() - 1 {
×
1089
                        writeln!(writer).unwrap();
×
1090
                    } else {
×
1091
                        writeln!(writer, ",").unwrap();
×
1092
                    }
×
1093
                }
1094
                _ => continue,
×
1095
            }
1096
        }
1097
        writeln!(writer, "]").unwrap();
×
1098

1099
        match rest {
×
1100
            Some(Value::Object(v)) => {
×
1101
                for (k, v) in v.iter() {
×
1102
                    writeln!(writer, ",").expect("io error");
×
1103
                    write!(writer, "\"{}\": ", k).expect("io error");
×
1104
                    serde_json::to_writer(&mut writer, v).expect("io error");
×
1105
                    writeln!(writer).expect("io error");
×
1106
                }
×
1107
            }
1108
            _ => {
×
1109
                writeln!(writer).expect("io error");
×
1110
            }
×
1111
        }
1112

1113
        writeln!(writer, "}}").expect("io error");
×
1114
    }
×
1115

1116
    /// Write the graph to JSON
1117
    pub fn to_json<W: Write, F: Fn(&D) -> Value, RF: Fn(&mut W)>(
×
1118
        &self,
×
1119
        fmt_func: F,
×
1120
        writer: &mut W,
×
1121
    ) {
×
1122
        self.to_json_rest(fmt_func, writer, None);
×
1123
    }
×
1124

1125
    /// Print a text representation of the graph.
1126
    pub fn print(&self) {
2✔
1127
        println!("DebruijnGraph {{ len: {}, K: {} }} :", self.len(), K::k());
2✔
1128
        for node in self.iter_nodes() {
8✔
1129
            println!("{:?}", node);
8✔
1130
        }
8✔
1131
    }
2✔
1132

1133
    pub fn print_with_data(&self) {
×
1134
        println!("DebruijnGraph {{ len: {}, K: {} }} :", self.len(), K::k());
×
1135
        for node in self.iter_nodes() {
×
1136
            println!("{:?} ({:?})", node, node.data());
×
1137
        }
×
1138
    }
×
1139

1140
    pub fn max_path_beam<F, F2>(&self, beam: usize, score: F, _solid_path: F2) -> Vec<(usize, Dir)>
×
1141
    where
×
1142
        F: Fn(&D) -> f32,
×
1143
        F2: Fn(&D) -> bool,
×
1144
    {
×
1145
        if self.is_empty() {
×
1146
            return Vec::default();
×
1147
        }
×
1148

×
1149
        let mut states = Vec::new();
×
1150

1151
        for i in 0..self.len() {
×
1152
            let node = self.get_node(i);
×
1153

×
1154
            // Initialize beam search on terminal nodes
×
1155
            if node.exts().num_exts_l() == 0 || node.exts().num_exts_r() == 0 {
×
1156
                let dir = if node.exts().num_exts_l() > 0 {
×
1157
                    Dir::Right
×
1158
                } else {
1159
                    Dir::Left
×
1160
                };
1161

1162
                let status = if node.exts().num_exts_l() == 0 && node.exts().num_exts_r() == 0 {
×
1163
                    Status::End
×
1164
                } else {
1165
                    Status::Active
×
1166
                };
1167

1168
                let mut path = SmallVec8::new();
×
1169
                path.push((i as u32, dir));
×
1170

×
1171
                let s = State {
×
1172
                    path,
×
1173
                    status,
×
1174
                    score: score(node.data()),
×
1175
                };
×
1176
                states.push(s);
×
1177
            }
×
1178
        }
1179

1180
        // No end nodes -- just start on the first node
1181
        if states.is_empty() {
×
1182
            // Make a start
×
1183
            let node = self.get_node(0);
×
1184
            let mut path = SmallVec8::new();
×
1185
            path.push((0, Dir::Left));
×
1186
            states.push(State {
×
1187
                path,
×
1188
                status: Status::Active,
×
1189
                score: score(node.data()),
×
1190
            });
×
1191
        }
×
1192

1193
        // Beam search until we can't find any more expansions
1194
        let mut active = true;
×
1195
        while active {
×
1196
            let mut new_states = Vec::with_capacity(states.len());
×
1197
            active = false;
×
1198

1199
            for s in states {
×
1200
                if s.status == Status::Active {
×
1201
                    active = true;
×
1202
                    let expanded = self.expand_state(&s, &score);
×
1203
                    new_states.extend(expanded);
×
1204
                } else {
×
1205
                    new_states.push(s)
×
1206
                }
1207
            }
1208

1209
            // workaround to sort by descending score - will panic if there are NaN scores
1210
            new_states.sort_by(|a, b| (-(a.score)).partial_cmp(&-(b.score)).unwrap());
×
1211
            new_states.truncate(beam);
×
1212
            states = new_states;
×
1213
        }
×
1214

1215
        for (i, state) in states.iter().take(5).enumerate() {
×
1216
            trace!("i:{}  -- {:?}", i, state);
×
1217
        }
1218

1219
        // convert back to using usize for node_id
1220
        states[0]
×
1221
            .path
×
1222
            .iter()
×
1223
            .map(|&(node, dir)| (node as usize, dir))
×
1224
            .collect()
×
1225
    }
×
1226

1227
    fn expand_state<F>(&self, state: &State, score: &F) -> SmallVec4<State>
×
1228
    where
×
1229
        F: Fn(&D) -> f32,
×
1230
    {
×
1231
        if state.status != Status::Active {
×
1232
            panic!("only attempt to expand active states")
×
1233
        }
×
1234

×
1235
        let (node_id, dir) = state.path[state.path.len() - 1];
×
1236
        let node = self.get_node(node_id as usize);
×
1237
        let mut new_states = SmallVec4::new();
×
1238

1239
        for (_, next_node_id, incoming_dir, _) in node.edges(dir.flip()) {
×
1240
            let next_node = self.get_node(next_node_id);
×
1241
            let new_score = state.score + score(next_node.data());
×
1242

×
1243
            let cycle = state
×
1244
                .path
×
1245
                .iter()
×
1246
                .any(|&(prev_node, _)| prev_node == (next_node_id as u32));
×
1247

1248
            let status = if cycle {
×
1249
                Status::Cycle
×
1250
            } else if next_node.edges(incoming_dir.flip()).is_empty() {
×
1251
                Status::End
×
1252
            } else {
1253
                Status::Active
×
1254
            };
1255

1256
            let mut new_path = state.path.clone();
×
1257
            new_path.push((next_node_id as u32, incoming_dir));
×
1258

×
1259
            let next_state = State {
×
1260
                path: new_path,
×
1261
                score: new_score,
×
1262
                status,
×
1263
            };
×
1264

×
1265
            new_states.push(next_state);
×
1266
        }
1267

1268
        new_states
×
1269
    }
×
1270

1271

1272
    pub fn iter_components(&self) -> IterComponents<K, D> {
4✔
1273
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
4✔
1274
        let pos = 0;
4✔
1275

1276
        for _i in 0..self.len() {
24✔
1277
            visited.push(false);
24✔
1278
        }
24✔
1279

1280
        IterComponents { 
4✔
1281
            graph: self, 
4✔
1282
            visited, 
4✔
1283
            pos }
4✔
1284
    }
4✔
1285

1286

1287
    /// iteratively returns 2D Vec with node_ids grouped according to the connected components they form
1288
    pub fn components_i(&self) -> Vec<Vec<usize>> {
1✔
1289
        let mut components: Vec<Vec<usize>> = Vec::with_capacity(self.len());
1✔
1290
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
1✔
1291

1292
        for _i in 0..self.len() {
6✔
1293
            visited.push(false);
6✔
1294
        }
6✔
1295

1296
        for i in 0..self.len() {
6✔
1297
            if !visited[i] {
6✔
1298
                let comp = self.component_i(&mut visited, i);
2✔
1299
                components.push(comp);
2✔
1300
            }
4✔
1301
        }
1302

1303
        components
1✔
1304
    }
1✔
1305

1306
    /// recursively detects which nodes form separate graph components
1307
    /// returns 2D vector with node ids per component
1308
    /// (may lead to stack overflow)
1309
    pub fn components_r(&self) -> Vec<Vec<usize>> {
2✔
1310
        let mut components: Vec<Vec<usize>> = Vec::with_capacity(self.len());
2✔
1311
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
2✔
1312

1313
        for _i in 0..self.len() {
8✔
1314
            visited.push(false);
8✔
1315
        }
8✔
1316

1317
        for i in 0..self.len() {
8✔
1318
            if !visited[i] {
8✔
1319
                let mut comp: Vec<usize> = Vec::new();
4✔
1320
                self.component_r(&mut visited, i, &mut comp);
4✔
1321
                components.push(comp);
4✔
1322
            }
4✔
1323
        }
1324

1325
        components
2✔
1326

2✔
1327
    }
2✔
1328

1329
    fn component_r<'a>(&'a self, visited: &'a mut Vec<bool>, i: usize, comp: &'a mut Vec<usize>) {
8✔
1330
        
8✔
1331
        visited[i] = true;
8✔
1332
        comp.push(i);
8✔
1333
        let mut edges = self.find_edges(i, Dir::Left);
8✔
1334
        let mut r_edges = self.find_edges(i, Dir::Right);
8✔
1335

8✔
1336
        edges.append(&mut r_edges);
8✔
1337

1338
        for (_, edge, _, _) in edges.iter() {
8✔
1339
            if !visited[*edge] {
8✔
1340
                self.component_r(visited, *edge, comp);
4✔
1341
            }
4✔
1342
        }
1343
    }
8✔
1344

1345
    fn component_i<'a>(&'a self, visited: &'a mut [bool], i: usize) -> Vec<usize> {
10✔
1346
        let mut edges: Vec<usize> = Vec::new();
10✔
1347
        let mut comp: Vec<usize> = Vec::new();
10✔
1348

10✔
1349
        edges.push(i);
10✔
1350

1351
        while let Some(current_edge) = edges.pop() {
40✔
1352
            if !visited[current_edge] { 
30✔
1353
                comp.push(current_edge);
30✔
1354
                visited[current_edge] = true;
30✔
1355

30✔
1356
                let mut l_edges = self.find_edges(current_edge, Dir::Left);
30✔
1357
                let mut r_edges = self.find_edges(current_edge, Dir::Right);
30✔
1358

30✔
1359
                l_edges.append(&mut r_edges);
30✔
1360

1361
                for (_, new_edge, _, _) in l_edges.into_iter() {
40✔
1362
                    if !visited[new_edge] {
40✔
1363
                        edges.push(new_edge);
20✔
1364
                    }
20✔
1365
                }
1366
            }
×
1367
        }
1368
        comp
10✔
1369
    }
10✔
1370

1371
    pub fn find_bad_nodes<F: Fn(&Node<'_, K, D>) -> bool>(&self, valid: F) -> Vec<usize> {
×
1372
        let mut bad_nodes = Vec::new();
×
1373

1374
        for (i, node) in enumerate(self.iter_nodes()) {
×
1375
            if !valid(&node) { bad_nodes.push(i); }
×
1376
        }
1377
        
1378
        bad_nodes
×
1379
    }
×
1380
}
1381

1382
impl<K: Kmer, SD: SummaryData<u8> + Debug> DebruijnGraph<K, SD> {
1383
    pub fn create_colors(&self, config: &SummaryConfig) -> Colors<SD> {
×
1384
        Colors::new(self, config)
×
1385
    }
×
1386
}
1387

1388
#[derive(Debug, Eq, PartialEq)]
1389
enum Status {
1390
    Active,
1391
    End,
1392
    Cycle,
1393
}
1394

1395
#[derive(Debug)]
1396
struct State {
1397
    path: SmallVec8<(u32, Dir)>,
1398
    score: f32,
1399
    status: Status,
1400
}
1401

1402
impl State {}
1403

1404
/// Iterator over nodes in a `DeBruijnGraph`
1405
pub struct NodeIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1406
    graph: &'a DebruijnGraph<K, D>,
1407
    node_id: usize,
1408
}
1409

1410
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeIter<'a, K, D> {
1411
    type Item = Node<'a, K, D>;
1412

1413
    fn next(&mut self) -> Option<Node<'a, K, D>> {
542,512✔
1414
        if self.node_id < self.graph.len() {
542,512✔
1415
            let node = self.graph.get_node(self.node_id);
542,383✔
1416
            self.node_id += 1;
542,383✔
1417
            Some(node)
542,383✔
1418
        } else {
1419
            None
129✔
1420
        }
1421
    }
542,512✔
1422
}
1423

1424
impl<'a, K: Kmer + 'a, D: Debug + 'a> IntoIterator for &'a DebruijnGraph<K, D> {
1425
    type Item = NodeKmer<'a, K, D>;
1426
    type IntoIter = NodeIntoIter<'a, K, D>;
1427

1428
    fn into_iter(self) -> Self::IntoIter {
2,238✔
1429
        NodeIntoIter {
2,238✔
1430
            graph: self,
2,238✔
1431
            node_id: 0,
2,238✔
1432
        }
2,238✔
1433
    }
2,238✔
1434
}
1435

1436
/// Iterator over nodes in a `DeBruijnGraph`
1437
pub struct NodeIntoIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1438
    graph: &'a DebruijnGraph<K, D>,
1439
    node_id: usize,
1440
}
1441

1442
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeIntoIter<'a, K, D> {
1443
    type Item = NodeKmer<'a, K, D>;
1444

1445
    fn next(&mut self) -> Option<Self::Item> {
275,560✔
1446
        if self.node_id < self.graph.len() {
275,560✔
1447
            let node_id = self.node_id;
275,560✔
1448
            let node = self.graph.get_node(node_id);
275,560✔
1449
            let node_seq = node.sequence();
275,560✔
1450

275,560✔
1451
            self.node_id += 1;
275,560✔
1452
            Some(NodeKmer {
275,560✔
1453
                node_id,
275,560✔
1454
                node_seq_slice: node_seq,
275,560✔
1455
                phantom_d: PhantomData,
275,560✔
1456
                phantom_k: PhantomData,
275,560✔
1457
            })
275,560✔
1458
        } else {
1459
            None
×
1460
        }
1461
    }
275,560✔
1462
}
1463

1464
/// A `DebruijnGraph` node with a reference to the sequence of the node.
1465
#[derive(Clone)]
1466
pub struct NodeKmer<'a, K: Kmer + 'a, D: Debug + 'a> {
1467
    pub node_id: usize,
1468
    node_seq_slice: DnaStringSlice<'a>,
1469
    phantom_k: PhantomData<K>,
1470
    phantom_d: PhantomData<D>,
1471
}
1472

1473
/// An iterator over the kmers in a `DeBruijn graph node`
1474
pub struct NodeKmerIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1475
    kmer_id: usize,
1476
    kmer: K,
1477
    num_kmers: usize,
1478
    node_seq_slice: DnaStringSlice<'a>,
1479
    phantom_k: PhantomData<K>,
1480
    phantom_d: PhantomData<D>,
1481
}
1482

1483
impl<'a, K: Kmer + 'a, D: Debug + 'a> IntoIterator for NodeKmer<'a, K, D> {
1484
    type Item = K;
1485
    type IntoIter = NodeKmerIter<'a, K, D>;
1486

1487
    fn into_iter(self) -> Self::IntoIter {
551,120✔
1488
        let num_kmers = self.node_seq_slice.len() - K::k() + 1;
551,120✔
1489

1490
        let kmer = if num_kmers > 0 {
551,120✔
1491
            self.node_seq_slice.get_kmer::<K>(0)
551,120✔
1492
        } else {
1493
            K::empty()
×
1494
        };
1495

1496
        NodeKmerIter {
551,120✔
1497
            kmer_id: 0,
551,120✔
1498
            kmer,
551,120✔
1499
            num_kmers,
551,120✔
1500
            node_seq_slice: self.node_seq_slice,
551,120✔
1501
            phantom_k: PhantomData,
551,120✔
1502
            phantom_d: PhantomData,
551,120✔
1503
        }
551,120✔
1504
    }
551,120✔
1505
}
1506

1507
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeKmerIter<'a, K, D> {
1508
    type Item = K;
1509

1510
    fn next(&mut self) -> Option<Self::Item> {
3,626,034✔
1511
        if self.num_kmers == self.kmer_id {
3,626,034✔
1512
            None
×
1513
        } else {
1514
            let current_kmer = self.kmer;
3,626,034✔
1515

3,626,034✔
1516
            self.kmer_id += 1;
3,626,034✔
1517
            if self.kmer_id < self.num_kmers {
3,626,034✔
1518
                let next_base = self.node_seq_slice.get(self.kmer_id + K::k() - 1);
3,555,428✔
1519
                let new_kmer = self.kmer.extend_right(next_base);
3,555,428✔
1520
                self.kmer = new_kmer;
3,555,428✔
1521
            }
3,555,428✔
1522

1523
            Some(current_kmer)
3,626,034✔
1524
        }
1525
    }
3,626,034✔
1526

1527
    fn size_hint(&self) -> (usize, Option<usize>) {
275,560✔
1528
        (self.num_kmers, Some(self.num_kmers))
275,560✔
1529
    }
275,560✔
1530

1531
    /// Provide a 'fast-forward' capability for this iterator
1532
    /// MPHF will use this to reduce the number of kmers that
1533
    /// need to be produced.
1534
    fn nth(&mut self, n: usize) -> Option<Self::Item> {
2,611,674✔
1535
        if n <= 4 {
2,611,674✔
1536
            // for small skips forward, shift one base at a time
1537
            for _ in 0..n {
2,349,694✔
1538
                self.next();
1,014,360✔
1539
            }
1,014,360✔
1540
        } else {
261,980✔
1541
            self.kmer_id += n;
261,980✔
1542
            self.kmer = self.node_seq_slice.get_kmer::<K>(self.kmer_id);
261,980✔
1543
        }
261,980✔
1544

1545
        self.next()
2,611,674✔
1546
    }
2,611,674✔
1547
}
1548

1549
/// Marker signifying that NodeKmerIter has a known size.
1550
impl<'a, K: Kmer + 'a, D: Debug + 'a> ExactSizeIterator for NodeKmerIter<'a, K, D> {}
1551

1552
/// Unbranched sequence in the DeBruijn graph
1553
pub struct Node<'a, K: Kmer + 'a, D: 'a> {
1554
    pub node_id: usize,
1555
    pub graph: &'a DebruijnGraph<K, D>,
1556
}
1557

1558
impl<'a, K: Kmer, D: Debug> Node<'a, K, D> {
1559
    /// Length of the sequence of this node
1560
    pub fn len(&self) -> usize {
1,632,566✔
1561
        self.graph.base.sequences.get(self.node_id).len()
1,632,566✔
1562
    }
1,632,566✔
1563

1564
    pub fn is_empty(&self) -> bool {
×
1565
        self.graph.base.sequences.get(self.node_id).is_empty()
×
1566
    }
×
1567

1568
    /// Sequence of the node
1569
    pub fn sequence(&self) -> DnaStringSlice<'a> {
3,394,782✔
1570
        self.graph.base.sequences.get(self.node_id)
3,394,782✔
1571
    }
3,394,782✔
1572

1573
    /// Reference to auxiliarly data associated with the node
1574
    pub fn data(&self) -> &'a D {
3,002,577✔
1575
        &self.graph.base.data[self.node_id]
3,002,577✔
1576
    }
3,002,577✔
1577

1578
    /// Extension bases from this node
1579
    pub fn exts(&self) -> Exts {
2,279,402✔
1580
        self.graph.base.exts[self.node_id]
2,279,402✔
1581
    }
2,279,402✔
1582

1583
    /// Edges leaving the left side of the node in the format
1584
    //// (target_node id, incoming side of target node, whether target node has is flipped)
1585
    pub fn l_edges(&self) -> SmallVec4<(u8, usize, Dir, bool)> {
65,336✔
1586
        self.graph.find_edges(self.node_id, Dir::Left)
65,336✔
1587
    }
65,336✔
1588

1589
    /// Edges leaving the right side of the node in the format
1590
    //// (target_node id, incoming side of target node, whether target node has is flipped)
1591
    pub fn r_edges(&self) -> SmallVec4<(u8, usize, Dir, bool)> {
65,336✔
1592
        self.graph.find_edges(self.node_id, Dir::Right)
65,336✔
1593
    }
65,336✔
1594

1595
    /// Edges leaving the 'dir' side of the node in the format
1596
    //// (target_node id, incoming side of target node, whether target node has is flipped)
1597
    pub fn edges(&self, dir: Dir) -> SmallVec4<(u8, usize, Dir, bool)> {
349,771✔
1598
        self.graph.find_edges(self.node_id, dir)
349,771✔
1599
    }
349,771✔
1600

1601
    fn to_json<F: Fn(&D) -> Value>(&self, func: &F, f: &mut dyn Write) {
×
1602
        write!(
×
1603
            f,
×
1604
            "{{\"id\":\"{}\",\"L\":{},\"D\":{},\"Se\":\"{:?}\"}}",
×
1605
            self.node_id,
×
1606
            self.sequence().len(),
×
1607
            (func)(self.data()),
×
1608
            self.sequence(),
×
1609
        )
×
1610
        .unwrap();
×
1611
    }
×
1612

1613
    fn edges_to_json(&self, f: &mut dyn Write) -> bool {
×
1614
        let mut wrote = false;
×
1615
        let edges = self.r_edges();
×
1616
        for (idx, &(_, id, incoming_dir, _)) in edges.iter().enumerate() {
×
1617
            write!(
×
1618
                f,
×
1619
                "{{\"source\":\"{}\",\"target\":\"{}\",\"D\":\"{}\"}}",
×
1620
                self.node_id,
×
1621
                id,
×
1622
                match incoming_dir {
×
1623
                    Dir::Left => "L",
×
1624
                    Dir::Right => "R",
×
1625
                }
1626
            )
1627
            .unwrap();
×
1628

×
1629
            if idx < edges.len() - 1 {
×
1630
                write!(f, ",").unwrap();
×
1631
            }
×
1632

1633
            wrote = true;
×
1634
        }
1635
        wrote
×
1636
    }
×
1637
}
1638

1639
// TODO make generic instead of u8 (u8 is sufficient for dbg)
1640
impl<K: Kmer, SD: SummaryData<u8> + Debug> Node<'_, K, SD>  {
1641
    /// get default format for dot edges based on node data
1642
    pub fn edge_dot_default(&self, colors: &Colors<SD>, base: u8, incoming_dir: Dir, flipped: bool) -> String {
129,837✔
1643
        // set color based on dir
1644
        let color = match incoming_dir {
129,837✔
1645
            Dir::Left => "blue",
65,059✔
1646
            Dir::Right => "red"
64,778✔
1647
        };
1648
        
1649
        if let Some(em) = self.data().edge_mults() {
129,837✔
1650
            
1651
            let dir = if flipped { 
129,837✔
1652
                incoming_dir 
51,537✔
1653
            } else {
1654
                incoming_dir.flip()
78,300✔
1655
            };
1656

1657
            // set penwidth based on count
1658
            let count = em.edge_mult(base, dir);
129,837✔
1659
            let penwidth = colors.edge_width(count);
129,837✔
1660

129,837✔
1661
            format!("[color={color}, penwidth={penwidth}, label=\"{count}\"]")
129,837✔
1662
        } else {
1663
            format!("[color={color}]")
×
1664
        }
1665
    }
129,837✔
1666

1667
    /// get default format for dot nodes, based on node data
1668
    pub fn node_dot_default(&self, colors: &Colors<SD>, config: &SummaryConfig, tag_translator: &bimap::BiHashMap<String, u8> , outline: bool) -> String {
65,321✔
1669
        // set color based on labels/fold change/p-value
65,321✔
1670
        let color = colors.node_color(self.data(), config, outline);
65,321✔
1671

65,321✔
1672
        let data_info = self.data().print(tag_translator, config);
65,321✔
1673
        const MIN_TEXT_WIDTH: usize = 40;
1674
        let wrap = if self.len() > MIN_TEXT_WIDTH { self.len() } else { MIN_TEXT_WIDTH };
65,321✔
1675

1676
        let label = textwrap::fill(&format!("id: {}, len: {}, seq: {}, {}", 
65,321✔
1677
            self.node_id,
65,321✔
1678
            self.len(),
65,321✔
1679
            self.sequence(),
65,321✔
1680
            data_info
65,321✔
1681
        ), wrap);
65,321✔
1682

65,321✔
1683
        format!("[style=filled, {color}, label=\"{label}\"]")
65,321✔
1684
    }
65,321✔
1685
}
1686

1687
impl<K: Kmer, D> fmt::Debug for Node<'_, K, D>
1688
where
1689
    D: Debug,
1690
{
1691
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
8✔
1692
        write!(
8✔
1693
            f,
8✔
1694
            "Node {{ id:{}, Exts: {:?}, L:{:?} R:{:?}, Seq: {:?}, Data: {:?} }}",
8✔
1695
            self.node_id,
8✔
1696
            self.exts(),
8✔
1697
            self.l_edges(),
8✔
1698
            self.r_edges(),
8✔
1699
            self.sequence().len(),
8✔
1700
            self.data()
8✔
1701
        )
8✔
1702
    }
8✔
1703
}
1704

1705
pub struct IterComponents<'a, K: Kmer, D> {
1706
    graph: &'a DebruijnGraph<K, D>,
1707
    visited: Vec<bool>,
1708
    pos: usize,
1709
}
1710

1711
impl<K: Kmer, D: Debug> Iterator for IterComponents<'_, K, D> {
1712
    type Item = Vec<usize>;
1713
    fn next(&mut self) -> Option<Self::Item> {
12✔
1714
        while self.pos < self.graph.len() {
28✔
1715
            if !self.visited[self.pos] {
24✔
1716
                let comp = self.graph.component_i(&mut self.visited, self.pos);
8✔
1717
                self.pos += 1;
8✔
1718
                return Some(comp)
8✔
1719
            } else {
16✔
1720
                self.pos += 1;
16✔
1721
            }
16✔
1722
        }
1723
        assert!(self.visited.iter().map(|x| *x as usize).sum::<usize>() == self.graph.len());
24✔
1724
        None
4✔
1725
    }
12✔
1726
    
1727
}
1728

1729
pub struct PathCompIter<'a, K: Kmer, D: Debug, F, F2> 
1730
where 
1731
F: Fn(&D) -> f32,
1732
F2: Fn(&D) -> bool
1733
{
1734
    graph: &'a DebruijnGraph<K, D>,
1735
    component_iterator: IterComponents<'a, K, D>,
1736
    graph_pos: usize,
1737
    score: F,
1738
    solid_path: F2,
1739
}
1740

1741
/// returns the component and the "best" path in the component
1742
impl<K: Kmer, D: Debug, F, F2> Iterator for PathCompIter<'_, K, D, F, F2> 
1743
where 
1744
F: Fn(&D) -> f32,
1745
F2: Fn(&D) -> bool
1746
{
1747
    type Item = (Vec<usize>, VecDeque<(usize, Dir)>,);
1748
    fn next(&mut self) -> Option<Self::Item> {
6✔
1749
        match self.component_iterator.next() {
6✔
1750
            Some(component) => {
4✔
1751
                let current_comp = component;
4✔
1752
                
4✔
1753
    
4✔
1754
                let mut best_node = current_comp[0];
4✔
1755
                let mut best_score = f32::MIN;
4✔
1756
                for c in current_comp.iter() {
12✔
1757
                    let node = self.graph.get_node(*c);
12✔
1758
                    let node_score = (self.score)(node.data());
12✔
1759
    
12✔
1760
                    if node_score > best_score {
12✔
1761
                        best_node = *c;
4✔
1762
                        best_score = node_score;
4✔
1763
                    }
8✔
1764
                }
1765
    
1766
                let oscore = |state| match state {
16✔
1767
                    None => 0.0,
4✔
1768
                    Some((id, _)) => (self.score)(self.graph.get_node(id).data()),
12✔
1769
                };
16✔
1770
    
1771
                let osolid_path = |state| match state {
4✔
1772
                    None => false,
×
1773
                    Some((id, _)) => (self.solid_path)(self.graph.get_node(id).data()),
4✔
1774
                };
4✔
1775
    
1776
                // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
1777
                // already put into the path
1778
                let mut used_nodes = HashSet::new();
4✔
1779
                let mut path = VecDeque::new();
4✔
1780
    
4✔
1781
                // Start w/ initial state
4✔
1782
                used_nodes.insert(best_node);
4✔
1783
                path.push_front((best_node, Dir::Left));
4✔
1784
    
1785
                for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
8✔
1786
                    let &(start_node, dir, do_flip) = init;
8✔
1787
                    let mut current = (start_node, dir);
8✔
1788
    
1789
                    loop {
1790
                        let mut next = None;
12✔
1791
                        let (cur_id, incoming_dir) = current;
12✔
1792
                        let node = self.graph.get_node(cur_id);
12✔
1793
                        let edges = node.edges(incoming_dir.flip());
12✔
1794
    
12✔
1795
                        let mut solid_paths = 0;
12✔
1796
                        for (_, id, dir, _) in edges {
16✔
1797
                            let cand = Some((id, dir));
4✔
1798
                            if osolid_path(cand) {
4✔
1799
                                solid_paths += 1;
4✔
1800

4✔
1801
                                // second if clause is outside of first in original code (see max_path) 
4✔
1802
                                // but would basically ignore path validity.
4✔
1803
                                if oscore(cand) > oscore(next) {
4✔
1804
                                    next = cand;
4✔
1805
                                }
4✔
1806
                            }
×
1807
    
1808
                            if oscore(cand) > oscore(next) {
4✔
1809
                                next = cand;
×
1810
                            }
4✔
1811
                        }
1812
    
1813
                        if solid_paths > 1 {
12✔
UNCOV
1814
                            break;
×
1815
                        }
12✔
1816
    
1817
                        match next {
4✔
1818
                            Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
4✔
1819
                                if do_flip {
4✔
1820
                                    path.push_front((next_id, next_incoming.flip()));
4✔
1821
                                } else {
4✔
1822
                                    path.push_back((next_id, next_incoming));
×
1823
                                }
×
1824
    
1825
                                used_nodes.insert(next_id);
4✔
1826
                                current = (next_id, next_incoming);
4✔
1827
                            }
1828
                            _ => break,
8✔
1829
                        }
1830
                    }
1831
                }
1832
                
1833
                
1834
                Some((current_comp, path))
4✔
1835
            }, 
1836
            None => {
1837
                // should technically not need graph_pos after this 
1838
                self.graph_pos += 1;
2✔
1839
                None
2✔
1840
            }
1841
        }
1842
    }
6✔
1843
}
1844

1845
#[cfg(test)]
1846
mod test {
1847
    use std::{fs::File, io::BufReader};
1848

1849
    use crate::{kmer::Kmer16, summarizer::TagsCountsSumData};
1850

1851
    use super::DebruijnGraph;
1852

1853
    #[test]
1854
    #[cfg(not(feature = "sample128"))]
1855
    fn test_components() {
1856
        use crate::{summarizer::SummaryData, Dir, BUF};
1857

1858
        let path = "test_data/400.graph.dbg";
1859
        let file = BufReader::with_capacity(BUF, File::open(path).unwrap());
1860

1861
        let (graph, _, _): (DebruijnGraph<Kmer16, TagsCountsSumData>, Vec<String>, crate::summarizer::SummaryConfig) = 
1862
            bincode::deserialize_from(file).expect("error deserializing graph");
1863

1864
        let components = graph.iter_components();
1865

1866
        let check_components = [
1867
            vec![3, 67, 130, 133, 59, 119, 97, 110, 68, 137, 29, 84, 131, 43, 30, 91, 14, 70, 79, 142, 136, 105, 103, 62, 
1868
                141, 104, 134, 88, 38, 81, 108, 92, 135, 96, 116, 121, 63, 124, 106, 129, 132, 126, 93, 109, 83, 112, 118, 
1869
                123, 125, 78, 122, 115, 75, 128, 140, 111, 26, 143, 113],
1870
            vec![41, 138, 100, 139, 86],
1871
            vec![53, 117, 127],
1872
            vec![69, 144, 77, 120, 114, 107, 101],
1873
        ];
1874

1875
        let mut counter = 0;
1876

1877
        for component in components {
1878
            if component.len() > 1 { 
1879
                println!("component: {:?}", component);
1880
                assert_eq!(component, check_components[counter]);
1881
                counter += 1;
1882
            }
1883
        }
1884
        assert_eq!(vec![(139, Dir::Left)], graph.max_path(|data| data.score(), |_| true));
1885
    }
1886
}
1887

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc