• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / rust-debruijn / 16073895693

04 Jul 2025 12:34PM UTC coverage: 87.158% (-0.3%) from 87.44%
16073895693

push

github

evolp
fix test_colors

7683 of 8815 relevant lines covered (87.16%)

4076677.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.02
/src/graph.rs
1
// Copyright 2017 10x Genomics
2

3
//! Containers for path-compressed De Bruijn graphs
4

5
use bit_set::BitSet;
6
use indicatif::ProgressBar;
7
use indicatif::ProgressIterator;
8
use indicatif::ProgressStyle;
9
use itertools::enumerate;
10
use log::{debug, trace};
11
use rayon::prelude::*;
12
use rayon::current_num_threads;
13
use serde_derive::{Deserialize, Serialize};
14
use smallvec::SmallVec;
15
use std::borrow::Borrow;
16

17
use std::collections::HashMap;
18
use std::collections::HashSet;
19
use std::collections::VecDeque;
20
use std::f32;
21
use std::fmt::{self, Debug, Display};
22
use std::fs::{remove_file, File};
23
use std::hash::Hash;
24
use std::io::BufWriter;
25
use std::io::{BufReader, Error, Read};
26
use std::io::Write;
27
use std::iter::FromIterator;
28
use std::marker::PhantomData;
29
use std::path::Path;
30

31
use boomphf::hashmap::BoomHashMap;
32

33
use serde_json;
34
use serde_json::Value;
35

36
type SmallVec4<T> = SmallVec<[T; 4]>;
37
type SmallVec8<T> = SmallVec<[T; 8]>;
38

39
use crate::bits_to_base;
40
use crate::colors::ColorMode;
41
use crate::colors::Colors;
42
use crate::compression::CompressionSpec;
43
use crate::dna_string::{DnaString, DnaStringSlice, PackedDnaStringSet};
44
use crate::graph;
45
use crate::summarizer::SummaryConfig;
46
use crate::summarizer::SummaryData;
47
use crate::summarizer::Translator;
48
use crate::BUF;
49
use crate::PROGRESS_STYLE;
50
use crate::{Dir, Exts, Kmer, Mer, Vmer};
51

52
/// A compressed DeBruijn graph carrying auxiliary data on each node of type `D`.
53
/// This type does not carry the sorted index arrays the allow the graph
54
/// to be walked efficiently. The `DeBruijnGraph` type wraps this type and add those
55
/// vectors.
56
#[derive(Serialize, Deserialize, Clone, Debug)]
57
pub struct BaseGraph<K, D> {
58
    pub sequences: PackedDnaStringSet,
59
    pub exts: Vec<Exts>,
60
    pub data: Vec<D>,
61
    pub stranded: bool,
62
    phantom: PhantomData<K>,
63
}
64

65
impl<K, D> BaseGraph<K, D> {
66
    pub fn new(stranded: bool) -> Self {
2,788✔
67
        BaseGraph {
2,788✔
68
            sequences: PackedDnaStringSet::new(),
2,788✔
69
            exts: Vec::new(),
2,788✔
70
            data: Vec::new(),
2,788✔
71
            phantom: PhantomData,
2,788✔
72
            stranded,
2,788✔
73
        }
2,788✔
74
    }
2,788✔
75

76
    pub fn len(&self) -> usize {
719,970✔
77
        self.sequences.len()
719,970✔
78
    }
719,970✔
79

80
    pub fn is_empty(&self) -> bool {
1✔
81
        self.sequences.is_empty()
1✔
82
    }
1✔
83

84
    pub fn combine<I: Iterator<Item = BaseGraph<K, D>>>(graphs: I) -> Self {
12✔
85
        let mut sequences = PackedDnaStringSet::new();
12✔
86
        let mut exts = Vec::new();
12✔
87
        let mut data = Vec::new();
12✔
88
        let mut stranded = Vec::new();
12✔
89

90
        for g in graphs {
2,108✔
91
            for s in 0..g.sequences.len() {
8,114✔
92
                sequences.add(&g.sequences.get(s));
8,114✔
93
            }
8,114✔
94

95
            exts.extend(g.exts);
2,096✔
96
            data.extend(g.data);
2,096✔
97
            stranded.push(g.stranded);
2,096✔
98
        }
99

100
        let out_stranded = stranded.iter().all(|x| *x);
12✔
101

12✔
102
        if !out_stranded && !stranded.iter().all(|x| !*x) {
2,096✔
103
            panic!("attempted to combine stranded and unstranded graphs");
×
104
        }
12✔
105

12✔
106
        BaseGraph {
12✔
107
            sequences,
12✔
108
            stranded: out_stranded,
12✔
109
            exts,
12✔
110
            data,
12✔
111
            phantom: PhantomData,
12✔
112
        }
12✔
113
    }
12✔
114
}
115

116
impl<K: Kmer, D> BaseGraph<K, D> {
117
    pub fn add<R: Borrow<u8>, S: IntoIterator<Item = R>>(
1,177,381✔
118
        &mut self,
1,177,381✔
119
        sequence: S,
1,177,381✔
120
        exts: Exts,
1,177,381✔
121
        data: D,
1,177,381✔
122
    ) {
1,177,381✔
123
        self.sequences.add(sequence);
1,177,381✔
124
        self.exts.push(exts);
1,177,381✔
125
        self.data.push(data);
1,177,381✔
126
    }
1,177,381✔
127
}
128

129
impl<K: Kmer + Send + Sync, D> BaseGraph<K, D> {
130
    pub fn finish(self) -> DebruijnGraph<K, D> {
581✔
131
        let indices: Vec<u32> = (0..self.len() as u32).collect();
581✔
132

133
        let left_order = {
581✔
134
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
581✔
135
            for idx in &indices {
620,013✔
136
                kmers.push(self.sequences.get(*idx as usize).first_kmer());
619,432✔
137
                
619,432✔
138
            }
619,432✔
139
            
140
            BoomHashMap::new_parallel(kmers, indices.clone())
581✔
141
        };
142

143
        let right_order = {
581✔
144
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
581✔
145
            for idx in &indices {
620,013✔
146
                kmers.push(self.sequences.get(*idx as usize).last_kmer());
619,432✔
147
            }
619,432✔
148
            
149
            BoomHashMap::new_parallel(kmers, indices)
581✔
150
        };
581✔
151
        debug!("finish graph loops: 2x {}", self.len());
581✔
152

153
        DebruijnGraph {
581✔
154
            base: self,
581✔
155
            left_order,
581✔
156
            right_order,
581✔
157
        }
581✔
158
    }
581✔
159
}
160

161
impl<K: Kmer, D> BaseGraph<K, D> {
162
    pub fn finish_serial(self) -> DebruijnGraph<K, D> {
111✔
163
        let indices: Vec<u32> = (0..self.len() as u32).collect();
111✔
164

165
        let left_order = {
111✔
166
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
111✔
167
            let mut sequences: Vec<String> = Vec::new();
111✔
168
            for idx in &indices {
557,119✔
169
                kmers.push(self.sequences.get(*idx as usize).first_kmer());
557,008✔
170
                sequences.push(self.sequences.get(*idx as usize).to_dna_string());
557,008✔
171
            }
557,008✔
172
            println!("left kmers: {:?}", kmers);
111✔
173
            println!("left seqs: {:?}", sequences);
111✔
174
            BoomHashMap::new(kmers, indices.clone())
111✔
175
        };
176

177
        let right_order = {
111✔
178
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
111✔
179
            let mut sequences: Vec<String> = Vec::new();
111✔
180
            for idx in &indices {
557,119✔
181
                kmers.push(self.sequences.get(*idx as usize).last_kmer());
557,008✔
182
                sequences.push(self.sequences.get(*idx as usize).to_dna_string());
557,008✔
183
            }
557,008✔
184
            println!("right kmers: {:?}", kmers);
111✔
185
            println!("right seqs: {:?}", sequences);
111✔
186
            BoomHashMap::new(kmers, indices)
111✔
187
        };
111✔
188

111✔
189
        DebruijnGraph {
111✔
190
            base: self,
111✔
191
            left_order,
111✔
192
            right_order,
111✔
193
        }
111✔
194
    }
111✔
195
}
196

197
/// A compressed DeBruijn graph carrying auxiliary data on each node of type `D`.
198
/// The struct carries sorted index arrays the allow the graph
199
/// to be walked efficiently.
200
#[derive(Serialize, Deserialize, Debug)]
201
pub struct DebruijnGraph<K: Hash, D> {
202
    pub base: BaseGraph<K, D>,
203
    left_order: BoomHashMap<K, u32>,
204
    right_order: BoomHashMap<K, u32>,
205
}
206

207
impl<K: Kmer, D: Debug> DebruijnGraph<K, D> {
208
    /// Total number of nodes in the DeBruijn graph
209
    pub fn len(&self) -> usize {
717,870✔
210
        self.base.len()
717,870✔
211
    }
717,870✔
212

213
    pub fn is_empty(&self) -> bool {
1✔
214
        self.base.is_empty()
1✔
215
    }
1✔
216

217
    /// Get a node given it's `node_id`
218
    pub fn get_node(&self, node_id: usize) -> Node<K, D> {
3,967,834✔
219
        Node {
3,967,834✔
220
            node_id,
3,967,834✔
221
            graph: self,
3,967,834✔
222
        }
3,967,834✔
223
    }
3,967,834✔
224

225
    /// Get a node given it's `node_id`
226
    pub fn get_node_kmer(&self, node_id: usize) -> NodeKmer<K, D> {
×
227
        let node = self.get_node(node_id);
×
228
        let node_seq = node.sequence();
×
229

×
230
        NodeKmer {
×
231
            node_id,
×
232
            node_seq_slice: node_seq,
×
233
            phantom_d: PhantomData,
×
234
            phantom_k: PhantomData,
×
235
        }
×
236
    }
×
237

238
    /// Return an iterator over all nodes in the graph
239
    pub fn iter_nodes(&self) -> NodeIter<K, D> {
129✔
240
        NodeIter {
129✔
241
            graph: self,
129✔
242
            node_id: 0,
129✔
243
        }
129✔
244
    }
129✔
245

246
    /// Find the edges leaving node `node_id` in direction `Dir`. Should generally be
247
    /// accessed via a Node wrapper object
248
    fn find_edges(&self, node_id: usize, dir: Dir) -> SmallVec4<(u8, usize, Dir, bool)> {
777,542✔
249
        let exts = self.base.exts[node_id];
777,542✔
250
        let sequence = self.base.sequences.get(node_id);
777,542✔
251
        let kmer: K = sequence.term_kmer(dir);
777,542✔
252
        let mut edges = SmallVec4::new();
777,542✔
253

254
        for i in 0..4 {
3,887,710✔
255
            if exts.has_ext(dir, i) {
3,110,168✔
256
                let link = self.find_link(kmer.extend(i, dir), dir).expect("missing link");
1,072,022✔
257
                edges.push((i, link.0, link.1, link.2));
1,072,022✔
258
            }
2,038,146✔
259
        }
260

261
        edges
777,542✔
262
    }
777,542✔
263

264
    /// Find the edges leaving node `node_id` in direction `Dir`. Should generally be
265
    /// accessed via a Node wrapper object
266
    /// 
267
    /// allows missing links
268
    fn _find_edges_sharded(&self, node_id: usize, dir: Dir) -> SmallVec4<(u8, usize, Dir, bool)> {
×
269
        let exts = self.base.exts[node_id];
×
270
        let sequence = self.base.sequences.get(node_id);
×
271
        let kmer: K = sequence.term_kmer(dir);
×
272
        let mut edges = SmallVec4::new();
×
273

274
        for i in 0..4 {
×
275
            if exts.has_ext(dir, i) {
×
276
                let link = self.find_link(kmer.extend(i, dir), dir); //.expect("missing link");
×
277
                if let Some(l) = link {
×
278
                    edges.push((i, l.0, l.1, l.2));
×
279
                }
×
280
                // Otherwise, this edge doesn't exist within this shard, so ignore it.
281
                // NOTE: this should be allowed in a 'complete' DBG
282
            }
×
283
        }
284

285
        edges
×
286
    }
×
287

288
    /// Seach for the kmer `kmer`, appearing at the given `side` of a node sequence.
289
    fn search_kmer(&self, kmer: K, side: Dir) -> Option<usize> {
4,044,663✔
290
        match side {
4,044,663✔
291
            Dir::Left => self.left_order.get(&kmer).map(|pos| *pos as usize),
2,021,688✔
292
            Dir::Right => self.right_order.get(&kmer).map(|pos| *pos as usize),
2,022,975✔
293
        }
294
    }
4,044,663✔
295

296
    /// Find a link in the graph, possibly handling a RC switch.
297
    pub fn find_link(&self, kmer: K, dir: Dir) -> Option<(usize, Dir, bool)> {
2,828,042✔
298
        // Only test self-consistent paths through
2,828,042✔
299
        // the edges
2,828,042✔
300
        // Avoids problems due to single kmer edges
2,828,042✔
301
        // (dir, next_side_incoming, rc)
2,828,042✔
302
        // (Dir::Left, Dir::Right, false) => true,
2,828,042✔
303
        // (Dir::Left, Dir::Left,  true) => true,
2,828,042✔
304
        // (Dir::Right, Dir::Left, false) => true,
2,828,042✔
305
        // (Dir::Right, Dir::Right, true) => true,
2,828,042✔
306

2,828,042✔
307
        let rc = kmer.rc();
2,828,042✔
308

2,828,042✔
309
        match dir {
2,828,042✔
310
            Dir::Left => {
311
                if let Some(idx) = self.search_kmer(kmer, Dir::Right) {
1,415,157✔
312
                    return Some((idx, Dir::Right, false));
806,354✔
313
                }
608,803✔
314

608,803✔
315
                if !self.base.stranded {
608,803✔
316
                    if let Some(idx) = self.search_kmer(rc, Dir::Left) {
608,803✔
317
                        return Some((idx, Dir::Left, true));
608,803✔
318
                    }
×
319
                }
×
320
            }
321

322
            Dir::Right => {
323
                if let Some(idx) = self.search_kmer(kmer, Dir::Left) {
1,412,885✔
324
                    return Some((idx, Dir::Left, false));
805,067✔
325
                }
607,818✔
326

607,818✔
327
                if !self.base.stranded {
607,818✔
328
                    if let Some(idx) = self.search_kmer(rc, Dir::Right) {
607,818✔
329
                        return Some((idx, Dir::Right, true));
607,818✔
330
                    }
×
331
                }
×
332
            }
333
        }
334

335
        None
×
336
    }
2,828,042✔
337

338
    /// Check whether the graph is fully compressed. Return `None` if it's compressed,
339
    /// otherwise return `Some(node1, node2)` representing a pair of node that could
340
    /// be collapsed. Probably only useful for testing.
341
    pub fn is_compressed<S: CompressionSpec<D>>(&self, spec: &S) -> Option<(usize, usize)> {
789✔
342
        for i in 0..self.len() {
93,743✔
343
            let n = self.get_node(i);
93,743✔
344

345
            for dir in &[Dir::Left, Dir::Right] {
281,229✔
346
                let dir_edges = n.edges(*dir);
187,486✔
347
                if dir_edges.len() == 1 {
187,486✔
348
                    let (_, next_id, return_dir, _) = dir_edges[0];
132,713✔
349
                    let next = self.get_node(next_id);
132,713✔
350

132,713✔
351
                    let ret_edges = next.edges(return_dir);
132,713✔
352
                    if ret_edges.len() == 1 {
132,713✔
353
                        // Test for us being a palindrome: this makes it OK
354
                        if n.len() == K::k() && n.sequence().first_kmer::<K>().is_palindrome() {
24✔
355
                            continue;
16✔
356
                        }
8✔
357

8✔
358
                        // Test for a neighbor being a palindrome: this makes it OK
8✔
359
                        if next.len() == K::k() && next.sequence().first_kmer::<K>().is_palindrome()
8✔
360
                        {
361
                            continue;
8✔
362
                        }
×
363

×
364
                        // Test for this edge representing a smooth circle (biting it's own tail):
×
365
                        if n.node_id == next_id {
×
366
                            continue;
×
367
                        }
×
368

×
369
                        if spec.join_test(n.data(), next.data()) {
×
370
                            // Found a unbranched edge that should have been eliminated
371
                            return Some((i, next_id));
×
372
                        }
×
373
                    }
132,689✔
374
                }
54,773✔
375
            }
376
        }
377

378
        None
789✔
379
    }
789✔
380

381
    /// Remove non-existent extensions that may be created due to filtered kmers
382
    /// 
383
    /// if `valid_nodes` if `None`, all nodes are valid
384
    pub fn fix_exts(&mut self, valid_nodes: Option<&BitSet>) {
247✔
385
        for i in 0..self.len() {
579,978✔
386
            let valid_exts = self.get_valid_exts(i, valid_nodes);
579,978✔
387
            self.base.exts[i] = valid_exts;
579,978✔
388
        }
579,978✔
389
    }
247✔
390

391
    pub fn get_valid_exts(&self, node_id: usize, valid_nodes: Option<&BitSet>) -> Exts {
579,978✔
392
        let mut new_exts = Exts::empty();
579,978✔
393
        let node = self.get_node(node_id);
579,978✔
394
        let exts = node.exts();
579,978✔
395
        let l_kmer: K = node.sequence().first_kmer();
579,978✔
396
        let r_kmer: K = node.sequence().last_kmer();
579,978✔
397

579,978✔
398
        let check_node = |id| match valid_nodes {
1,184,732✔
399
            Some(bs) => bs.contains(id),
1,142,634✔
400
            None => true,
42,098✔
401
        };
1,184,732✔
402

403
        for i in 0..4 {
2,899,890✔
404
            if exts.has_ext(Dir::Left, i) {
2,319,912✔
405
                match self.find_link(l_kmer.extend_left(i), Dir::Left) {
592,434✔
406
                    Some((target, _, _)) if check_node(target) => {
592,434✔
407
                        new_exts = new_exts.set(Dir::Left, i)
592,434✔
408
                    }
409
                    _ => (),
×
410
                }
411
            }
1,727,478✔
412

413
            if exts.has_ext(Dir::Right, i) {
2,319,912✔
414
                match self.find_link(r_kmer.extend_right(i), Dir::Right) {
592,298✔
415
                    Some((target, _, _)) if check_node(target) => {
592,298✔
416
                        new_exts = new_exts.set(Dir::Right, i)
592,298✔
417
                    }
418
                    _ => (),
×
419
                }
420
            }
1,727,614✔
421
        }
422

423
        new_exts
579,978✔
424
    }
579,978✔
425

426
    /// Find the highest-scoring, unambiguous path in the graph. Each node get a score
427
    /// given by `score`. Any node where `solid_path(node) == True` are valid paths -
428
    /// paths will be terminated if there are multiple valid paths emanating from a node.
429
    pub fn max_path<F, F2>(&self, score: F, solid_path: F2) -> Vec<(usize, Dir)>
×
430
    where
×
431
        F: Fn(&D) -> f32,
×
432
        F2: Fn(&D) -> bool,
×
433
    {
×
434
        if self.is_empty() {
×
435
            return Vec::default();
×
436
        }
×
437

×
438
        let mut best_node = 0;
×
439
        let mut best_score = f32::MIN;
×
440
        for i in 0..self.len() {
×
441
            let node = self.get_node(i);
×
442
            let node_score = score(node.data());
×
443

×
444
            if node_score > best_score {
×
445
                best_node = i;
×
446
                best_score = node_score;
×
447
            }
×
448
        }
449

450
        let oscore = |state| match state {
×
451
            None => 0.0,
×
452
            Some((id, _)) => score(self.get_node(id).data()),
×
453
        };
×
454

455
        let osolid_path = |state| match state {
×
456
            None => false,
×
457
            Some((id, _)) => solid_path(self.get_node(id).data()),
×
458
        };
×
459

460
        // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
461
        // already put into the path
462
        let mut used_nodes = HashSet::new();
×
463
        let mut path = VecDeque::new();
×
464

×
465
        // Start w/ initial state
×
466
        used_nodes.insert(best_node);
×
467
        path.push_front((best_node, Dir::Left));
×
468

469
        for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
×
470
            let &(start_node, dir, do_flip) = init;
×
471
            let mut current = (start_node, dir);
×
472

473
            loop {
474
                let mut next = None;
×
475
                let (cur_id, incoming_dir) = current;
×
476
                let node = self.get_node(cur_id);
×
477
                let edges = node.edges(incoming_dir.flip());
×
478

×
479
                let mut solid_paths = 0;
×
480
                for (_, id, dir, _) in edges {
×
481
                    let cand = Some((id, dir));
×
482
                    if osolid_path(cand) {
×
483
                        solid_paths += 1;
×
484
                    }
×
485

486
                    if oscore(cand) > oscore(next) {
×
487
                        next = cand;
×
488
                    }
×
489
                }
490

491
                if solid_paths > 1 {
×
492
                    break;
×
493
                }
×
494

495
                match next {
×
496
                    Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
×
497
                        if do_flip {
×
498
                            path.push_front((next_id, next_incoming.flip()));
×
499
                        } else {
×
500
                            path.push_back((next_id, next_incoming));
×
501
                        }
×
502

503
                        used_nodes.insert(next_id);
×
504
                        current = (next_id, next_incoming);
×
505
                    }
506
                    _ => break,
×
507
                }
508
            }
509
        }
510

511
        Vec::from_iter(path)
×
512
    }
×
513

514

515
    /// Find the highest-scoring, unambiguous path in the graph. Each node get a score
516
    /// given by `score`. Any node where `solid_path(node) == True` are valid paths -
517
    /// paths will be terminated if there are multiple valid paths emanating from a node.
518
    /// Returns vec with path for each component
519
    pub fn max_path_comp<F, F2>(&self, score: F, solid_path: F2) -> Vec<VecDeque<(usize, Dir)>>
1✔
520
    where
1✔
521
        F: Fn(&D) -> f32,
1✔
522
        F2: Fn(&D) -> bool,
1✔
523
    {
1✔
524
        if self.is_empty() {
1✔
525
            let vec: Vec<VecDeque<(usize, Dir)>> = Vec::new();
×
526
            return vec;
×
527
        }
1✔
528

1✔
529
        let components = self.iter_components();
1✔
530
        let mut paths: Vec<VecDeque<(usize, Dir)>> = Vec::new();
1✔
531

532
        for component in components {
3✔
533

534
            let current_comp = &component;
2✔
535
            
2✔
536

2✔
537
            let mut best_node = current_comp[0];
2✔
538
            let mut best_score = f32::MIN;
2✔
539
            for c in current_comp.iter() {
6✔
540
                let node = self.get_node(*c);
6✔
541
                let node_score = score(node.data());
6✔
542

6✔
543
                if node_score > best_score {
6✔
544
                    best_node = *c;
2✔
545
                    best_score = node_score;
2✔
546
                }
4✔
547
            }
548

549
            let oscore = |state| match state {
6✔
550
                None => 0.0,
2✔
551
                Some((id, _)) => score(self.get_node(id).data()),
4✔
552
            };
6✔
553

554
            let osolid_path = |state| match state {
3✔
555
                None => false,
×
556
                Some((id, _)) => solid_path(self.get_node(id).data()),
3✔
557
            };
3✔
558

559
            // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
560
            // already put into the path
561
            let mut used_nodes = HashSet::new();
2✔
562
            let mut path = VecDeque::new();
2✔
563

2✔
564
            // Start w/ initial state
2✔
565
            used_nodes.insert(best_node);
2✔
566
            path.push_front((best_node, Dir::Left));
2✔
567

568
            for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
4✔
569
                let &(start_node, dir, do_flip) = init;
4✔
570
                let mut current = (start_node, dir);
4✔
571

572
                loop {
573
                    let mut next = None;
5✔
574
                    let (cur_id, incoming_dir) = current;
5✔
575
                    let node = self.get_node(cur_id);
5✔
576
                    let edges = node.edges(incoming_dir.flip());
5✔
577

5✔
578
                    let mut solid_paths = 0;
5✔
579
                    for (_, id, dir, _) in edges {
8✔
580
                        let cand = Some((id, dir));
3✔
581
                        if osolid_path(cand) {
3✔
582
                            solid_paths += 1;
3✔
583
                        }
3✔
584

585
                        if oscore(cand) > oscore(next) {
3✔
586
                            next = cand;
2✔
587
                        }
2✔
588
                    }
589

590
                    if solid_paths > 1 {
5✔
591
                        break;
1✔
592
                    }
4✔
593

594
                    match next {
1✔
595
                        Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
1✔
596
                            if do_flip {
1✔
597
                                path.push_front((next_id, next_incoming.flip()));
×
598
                            } else {
1✔
599
                                path.push_back((next_id, next_incoming));
1✔
600
                            }
1✔
601

602
                            used_nodes.insert(next_id);
1✔
603
                            current = (next_id, next_incoming);
1✔
604
                        }
605
                        _ => break,
3✔
606
                    }
607
                }
608
            }
609
            
610
            paths.push(path);
2✔
611
            //paths.push(Vec::from_iter(path));
612
        }
613

614
        paths
1✔
615
    
616
    }
1✔
617

618
    pub fn iter_max_path_comp<F, F2>(&self, score: F, solid_path: F2) -> PathCompIter<K, D, F, F2> 
2✔
619
    where 
2✔
620
    F: Fn(&D) -> f32,
2✔
621
    F2: Fn(&D) -> bool
2✔
622
    {
2✔
623
        let component_iterator = self.iter_components();
2✔
624
        PathCompIter { graph: self, component_iterator, graph_pos: 0, score, solid_path }
2✔
625
    }
2✔
626

627
    /// write the paths from `iter_max_path_comp` to a fasta file
628
    pub fn path_to_fasta<F, F2>(&self, f: &mut dyn std::io::Write, path_iter: PathCompIter<K, D, F, F2>, return_lens: bool) -> (Vec<usize>, Vec<usize>)
1✔
629
    where 
1✔
630
    F: Fn(&D) -> f32,
1✔
631
    F2: Fn(&D) -> bool
1✔
632
    {
1✔
633
        // width of fasta lines
1✔
634
        let columns = 80;
1✔
635

1✔
636
        // sizes of components and of paths
1✔
637
        let mut comp_sizes = Vec::new();
1✔
638
        let mut path_lens = Vec::new();
1✔
639

640
        for (seq_counter, (component, path)) in path_iter.enumerate() {
2✔
641
            // get dna sequence from path
642
            let seq = self.sequence_of_path(path.iter());
2✔
643

2✔
644
            //write header with length & start node
2✔
645
            writeln!(f, ">path{} len={} start_node={}", seq_counter, seq.len(), path[0].0).unwrap();
2✔
646

2✔
647
            // calculate how sequence has to be split up
2✔
648
            let slices = (seq.len() / columns) + 1;
2✔
649
            let mut ranges = Vec::with_capacity(slices);
2✔
650

2✔
651
            let mut start = 0;
2✔
652
            while start < seq.len() {
5✔
653
                ranges.push(start..start + columns);
3✔
654
                start += columns;
3✔
655
            }
3✔
656

657
            let last_start = ranges.pop().expect("no kmers in parallel ranges").start;
2✔
658
            ranges.push(last_start..seq.len());
2✔
659

660
            // split up sequence and write to file accordingly
661
            for range in ranges {
5✔
662
                writeln!(f, "{:?}", seq.slice(range.start, range.end)).unwrap();
3✔
663
            }
3✔
664

665
            if return_lens {
2✔
666
                comp_sizes.push(component.len());
×
667
                path_lens.push(path.len());
×
668
            }
2✔
669
        }    
670

671
        (comp_sizes, path_lens)
1✔
672
        
1✔
673
    }
1✔
674

675

676
    /// Get the sequence of a path through the graph. The path is given as a sequence of node_id integers
677
    pub fn sequence_of_path<'a, I: 'a + Iterator<Item = &'a (usize, Dir)>>(
14,856✔
678
        &self,
14,856✔
679
        path: I,
14,856✔
680
    ) -> DnaString {
14,856✔
681
        let mut seq = DnaString::new();
14,856✔
682

683
        for (idx, &(node_id, dir)) in path.enumerate() {
565,131✔
684
            let start = if idx == 0 { 0 } else { K::k() - 1 };
565,131✔
685

686
            let node_seq = match dir {
565,131✔
687
                Dir::Left => self.get_node(node_id).sequence(),
292,328✔
688
                Dir::Right => self.get_node(node_id).sequence().rc(),
272,803✔
689
            };
690

691
            for p in start..node_seq.len() {
1,090,943✔
692
                seq.push(node_seq.get(p))
1,090,943✔
693
            }
694
        }
695

696
        seq
14,856✔
697
    }
14,856✔
698

699
    /// write a node to a dot file
700
    /// 
701
    /// ### Arguments: 
702
    /// 
703
    /// * `node`: [`Node<K, D>`] which will be written to a dot file
704
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
705
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
706
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
707
    /// * `f`: writer
708
    fn node_to_dot<FN: Fn(&Node<K, D>) -> String, FE: Fn(&Node<K, D>, u8, Dir, bool) -> String>(
65,320✔
709
        &self,
65,320✔
710
        node: &Node<'_, K, D>,
65,320✔
711
        node_label: &FN,
65,320✔
712
        edge_label: &FE,
65,320✔
713
        f: &mut dyn Write,
65,320✔
714
    ) {
65,320✔
715
        writeln!(f, "n{} {}", node.node_id, node_label(node)).unwrap();
65,320✔
716
        assert_eq!(node.exts().val.count_ones() as usize, node.l_edges().len() + node.r_edges().len());
65,320✔
717

718
        for (base, id, incoming_dir, flipped) in node.l_edges() {
65,320✔
719
            writeln!(f, "n{} -> n{} {}", id, node.node_id, edge_label(node, base, incoming_dir, flipped)).unwrap();
65,062✔
720
        }
65,062✔
721

722
        for (base, id, incoming_dir, flipped) in node.r_edges() {
65,320✔
723
            writeln!(f, "n{} -> n{} {}", node.node_id, id, edge_label(node, base, incoming_dir, flipped)).unwrap();
64,778✔
724
        }
64,778✔
725
    }
65,320✔
726

727
    /// Write the graph to a dot file
728
    /// 
729
    /// ### Arguments: 
730
    /// 
731
    /// * `path`: path to the output file
732
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
733
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
734
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
735
    pub fn to_dot<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE) 
1✔
736
    where 
1✔
737
    P: AsRef<Path>,
1✔
738
    FN: Fn(&Node<K, D>) -> String,
1✔
739
    FE: Fn(&Node<K, D>, u8, Dir, bool) -> String,
1✔
740
    {
1✔
741
        let mut f = BufWriter::with_capacity(BUF, File::create(path).expect("error creating dot file"));
1✔
742

1✔
743
        let pb = ProgressBar::new(self.len() as u64);
1✔
744
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
745
        pb.set_message(format!("{:<32}", "writing graph to DOT file"));
1✔
746

1✔
747
        writeln!(&mut f, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
1✔
748
        for i in (0..self.len()).progress_with(pb) {
32,658✔
749
            self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
32,658✔
750
        }
32,658✔
751
        writeln!(&mut f, "}}").unwrap();
1✔
752
        
1✔
753
        f.flush().unwrap();
1✔
754
        debug!("large to dot loop: {}", self.len());
1✔
755
    }
1✔
756

757
    /// Write the graph to a dot file in parallel
758
    /// Will write in to n_threads files simultaniously,
759
    /// then go though the files and add the contents to a larger file, 
760
    /// and delete the small files.
761
    /// 
762
    /// ### Arguments: 
763
    /// 
764
    /// * `path`: path to the output file
765
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
766
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
767
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
768
    pub fn to_dot_parallel<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE) 
1✔
769
    where 
1✔
770
        D: Sync,
1✔
771
        K: Sync,
1✔
772
        P: AsRef<Path> + Display + Sync,
1✔
773
        FN: Fn(&Node<K, D>) -> String + Sync,
1✔
774
        FE: Fn(&Node<K, D>, u8, Dir, bool) -> String + Sync,
1✔
775
    {        
1✔
776
        let slices = current_num_threads();
1✔
777
        let n_nodes = self.len();
1✔
778
        let sz = n_nodes / slices + 1;
1✔
779

1✔
780
        debug!("n_nodes: {}", n_nodes);
1✔
781
        debug!("sz: {}", sz);
1✔
782

783
        let mut parallel_ranges = Vec::with_capacity(slices);
1✔
784
        let mut start = 0;
1✔
785
        while start < n_nodes {
5✔
786
            parallel_ranges.push(start..start + sz);
4✔
787
            start += sz;
4✔
788
        }
4✔
789

790
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
1✔
791
        parallel_ranges.push(last_start..n_nodes);
1✔
792
        debug!("parallel ranges: {:?}", parallel_ranges);
1✔
793

794
        let mut files = Vec::with_capacity(current_num_threads());
1✔
795

796
        for i in 0..parallel_ranges.len() {
4✔
797
            files.push(format!("{}-{}.dot", path, i));
4✔
798
        } 
4✔
799

800
        let pb = ProgressBar::new(self.len() as u64);
1✔
801
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
802
        pb.set_message(format!("{:<32}", "writing graph to DOT files"));
1✔
803
    
1✔
804
        parallel_ranges.into_par_iter().enumerate().for_each(|(i, range)| {
4✔
805
            let mut f = BufWriter::with_capacity(BUF, File::create(&files[i]).expect("error creating parallel dot file"));
4✔
806

807
            for i in range {
32,662✔
808
                self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
32,658✔
809
                pb.inc(1);
32,658✔
810
            }
32,658✔
811

812
            f.flush().unwrap();
4✔
813
        });
4✔
814
        pb.finish_and_clear();
1✔
815

1✔
816
        let mut out_file = BufWriter::with_capacity(BUF, File::create(path).expect("error creating combined dot file"));
1✔
817

1✔
818
        writeln!(&mut out_file, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
1✔
819

1✔
820
        let pb = ProgressBar::new(files.len() as u64);
1✔
821
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
822
        pb.set_message(format!("{:<32}", "combining files"));
1✔
823

824
        for file in files.iter().progress_with(pb) {
4✔
825
            let open_file = File::open(file).expect("error opening parallel dot file");
4✔
826
            let mut reader = BufReader::new(open_file);
4✔
827
            let mut buffer = [0; BUF];
4✔
828

829
            loop {
830
                let linecount = reader.read(&mut buffer).unwrap();
219✔
831
                if linecount == 0 { break }
219✔
832
                out_file.write_all(&buffer[..linecount]).unwrap();
215✔
833
            }
834

835
            remove_file(file).unwrap();
4✔
836
        }
837

838
        writeln!(&mut out_file, "}}").unwrap();
1✔
839

1✔
840
        out_file.flush().unwrap();
1✔
841

1✔
842

1✔
843
    }
1✔
844

845
    /// Write part of the graph to a dot file
846
    /// 
847
    /// ### Arguments: 
848
    /// 
849
    /// * `path`: path to the output file
850
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
851
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
852
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
853
    /// * `nodes`: [`Vec<usize>`] listing all IDs of nodes which should be included
854
    pub fn to_dot_partial<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE, nodes: Vec<usize>) 
1✔
855
    where 
1✔
856
        P: AsRef<Path>,
1✔
857
        FN: Fn(&Node<K, D>) -> String,
1✔
858
        FE: Fn(&Node<K, D>, u8, Dir, bool) -> String,
1✔
859
    {
1✔
860
        let mut f = BufWriter::with_capacity(BUF, File::create(path).expect("error creating dot file"));
1✔
861

1✔
862
        let pb = ProgressBar::new(nodes.len() as u64);
1✔
863
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
864
        pb.set_message(format!("{:<32}", "writing graph to DOT file"));
1✔
865

1✔
866
        writeln!(&mut f, "# {:?}", nodes).unwrap();
1✔
867
        writeln!(&mut f, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
1✔
868
        for i in nodes.into_iter().progress_with(pb) {
4✔
869
            self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
4✔
870
        }
4✔
871
        writeln!(&mut f, "}}").unwrap();
1✔
872

1✔
873
        f.flush().unwrap();
1✔
874

1✔
875
        debug!("large to dot loop: {}", self.len());
1✔
876
    }
1✔
877

878
    fn node_to_gfa<F: Fn(&Node<'_, K, D>) -> String>(
97,978✔
879
        &self,
97,978✔
880
        node: &Node<'_, K, D>,
97,978✔
881
        w: &mut dyn Write,
97,978✔
882
        tag_func: Option<&F>,
97,978✔
883
    ) -> Result<(), Error> {
97,978✔
884
        match tag_func {
97,978✔
885
            Some(f) => {
65,320✔
886
                let tags = (f)(node);
65,320✔
887
                writeln!(
65,320✔
888
                    w,
65,320✔
889
                    "S\t{}\t{}\t{}",
65,320✔
890
                    node.node_id,
65,320✔
891
                    node.sequence().to_dna_string(),
65,320✔
892
                    tags
65,320✔
893
                )?;
65,320✔
894
            }
895
            _ => writeln!(
32,658✔
896
                w,
32,658✔
897
                "S\t{}\t{}",
32,658✔
898
                node.node_id,
32,658✔
899
                node.sequence().to_dna_string()
32,658✔
900
            )?,
32,658✔
901
        }
902

903
        for (_, target, dir, _) in node.l_edges() {
97,978✔
904
            if target >= node.node_id {
97,591✔
905
                let to_dir = match dir {
48,748✔
906
                    Dir::Left => "+",
19,433✔
907
                    Dir::Right => "-",
29,315✔
908
                };
909
                writeln!(
48,748✔
910
                    w,
48,748✔
911
                    "L\t{}\t-\t{}\t{}\t{}M",
48,748✔
912
                    node.node_id,
48,748✔
913
                    target,
48,748✔
914
                    to_dir,
48,748✔
915
                    K::k() - 1
48,748✔
916
                )?;
48,748✔
917
            }
48,843✔
918
        }
919

920
        for (_, target, dir, _) in node.r_edges() {
97,978✔
921
            if target > node.node_id {
97,165✔
922
                let to_dir = match dir {
48,634✔
923
                    Dir::Left => "+",
29,415✔
924
                    Dir::Right => "-",
19,219✔
925
                };
926
                writeln!(
48,634✔
927
                    w,
48,634✔
928
                    "L\t{}\t+\t{}\t{}\t{}M",
48,634✔
929
                    node.node_id,
48,634✔
930
                    target,
48,634✔
931
                    to_dir,
48,634✔
932
                    K::k() - 1
48,634✔
933
                )?;
48,634✔
934
            }
48,531✔
935
        }
936

937
        Ok(())
97,978✔
938
    }
97,978✔
939

940
    /// Write the graph to GFA format
941
    pub fn to_gfa<P: AsRef<Path>>(&self, gfa_out: P) -> Result<(), Error> {
1✔
942
        let wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creating gfa file"));
1✔
943
        self.write_gfa(&mut std::io::BufWriter::new(wtr))
1✔
944
    }
1✔
945

946
    pub fn write_gfa(&self, wtr: &mut impl Write) -> Result<(), Error> {
1✔
947
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
1✔
948

949
        type DummyFn<K, D> = fn(&Node<'_, K, D>) -> String;
950
        let dummy_opt: Option<&DummyFn<K, D>> = None;
1✔
951

1✔
952
        let pb = ProgressBar::new(self.len() as u64);
1✔
953
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
954
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
1✔
955

956
        for i in (0..self.len()).progress_with(pb) {
32,658✔
957
            let n = self.get_node(i);
32,658✔
958
            self.node_to_gfa(&n, wtr, dummy_opt)?;
32,658✔
959
        }
960

961
        wtr.flush().unwrap();
1✔
962

1✔
963
        Ok(())
1✔
964
    }
1✔
965

966
    /// Write the graph to GFA format
967
    pub fn to_gfa_with_tags<P: AsRef<Path>, F: Fn(&Node<'_, K, D>) -> String>(
1✔
968
        &self,
1✔
969
        gfa_out: P,
1✔
970
        tag_func: F,
1✔
971
    ) -> Result<(), Error> {
1✔
972
        let mut wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creatinf gfa file"));
1✔
973
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
1✔
974

975
        let pb = ProgressBar::new(self.len() as u64);
1✔
976
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
977
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
1✔
978

979
        for i in (0..self.len()).progress_with(pb) {
32,658✔
980
            let n = self.get_node(i);
32,658✔
981
            self.node_to_gfa(&n, &mut wtr, Some(&tag_func))?;
32,658✔
982
        }
983

984
        wtr.flush().unwrap();
1✔
985

1✔
986
        Ok(())
1✔
987
    }
1✔
988

989
    /// Write the graph to GFA format, with multithreading, 
990
    /// pass `tag_func=None` to write without tags
991
    pub fn to_gfa_otags_parallel<P: AsRef<Path> + Display + Sync, F: Fn(&Node<'_, K, D>) -> String + Sync>(
1✔
992
        &self,
1✔
993
        gfa_out: P,
1✔
994
        tag_func: Option<&F>,
1✔
995
    ) -> Result<(), Error> 
1✔
996
    where 
1✔
997
    K: Sync,
1✔
998
    D: Sync,
1✔
999
    {
1✔
1000
        // split into ranges according to thread count
1✔
1001
        let slices = current_num_threads();
1✔
1002
        let n_nodes = self.len();
1✔
1003
        let sz = n_nodes / slices + 1;
1✔
1004

1✔
1005
        debug!("n_nodes: {}", n_nodes);
1✔
1006
        debug!("sz: {}", sz);
1✔
1007

1008
        let mut parallel_ranges = Vec::with_capacity(slices);
1✔
1009
        let mut start = 0;
1✔
1010
        while start < n_nodes {
5✔
1011
            parallel_ranges.push(start..start + sz);
4✔
1012
            start += sz;
4✔
1013
        }
4✔
1014

1015
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
1✔
1016
        parallel_ranges.push(last_start..n_nodes);
1✔
1017
        debug!("parallel ranges: {:?}", parallel_ranges);
1✔
1018

1019
        let mut files = Vec::with_capacity(current_num_threads());
1✔
1020

1021
        for i in 0..parallel_ranges.len() {
4✔
1022
            files.push(format!("{}-{}.gfa", gfa_out, i));
4✔
1023
        } 
4✔
1024

1025
        let pb = ProgressBar::new(self.len() as u64);
1✔
1026
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
1027
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
1✔
1028
        
1✔
1029
        
1✔
1030
        parallel_ranges.into_par_iter().enumerate().for_each(|(i, range)| {
4✔
1031
            let mut wtr = BufWriter::with_capacity(BUF, File::create(&files[i]).expect("error creating parallel gfa file"));
4✔
1032

1033
            for i in range {
32,662✔
1034
                let n = self.get_node(i);
32,658✔
1035
                self.node_to_gfa(&n, &mut wtr, tag_func).unwrap();
32,658✔
1036
                pb.inc(1);
32,658✔
1037
            }
32,658✔
1038

1039
            wtr.flush().unwrap();
4✔
1040
        });
4✔
1041

1✔
1042
        pb.finish_and_clear();
1✔
1043

1✔
1044
        // combine files
1✔
1045
        let mut out_file = BufWriter::with_capacity(BUF, File::create(format!("{}.gfa", gfa_out)).expect("error creating combined gfa file"));
1✔
1046
        writeln!(out_file, "H\tVN:Z:debruijn-rs")?;
1✔
1047

1048
        let pb = ProgressBar::new(files.len() as u64);
1✔
1049
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
1050
        pb.set_message(format!("{:<32}", "combining files"));
1✔
1051

1052
        for file in files.iter() {
4✔
1053
            let open_file = File::open(file).expect("error opening parallel gfa file");
4✔
1054
            let mut reader = BufReader::new(open_file);
4✔
1055
            let mut buffer = [0; BUF];
4✔
1056

1057
            loop {
1058
                let linecount = reader.read(&mut buffer).unwrap();
113✔
1059
                if linecount == 0 { break }
113✔
1060
                out_file.write_all(&buffer[..linecount]).unwrap();
109✔
1061
            }
1062

1063
            remove_file(file).unwrap();
4✔
1064
        }
1065

1066
        out_file.flush().unwrap();
1✔
1067

1✔
1068
        Ok(())
1✔
1069
    }
1✔
1070

1071
    /// Write the graph to GFA format
1072
    pub fn to_gfa_partial<P: AsRef<Path>, F: Fn(&Node<'_, K, D>) -> String>(&self, gfa_out: P, tag_func: Option<&F>, nodes: Vec<usize>) -> Result<(), Error> {
1✔
1073
        let mut wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creating gfa file"));
1✔
1074
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
1✔
1075

1076
        let pb = ProgressBar::new(self.len() as u64);
1✔
1077
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
1078
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
1✔
1079

1080
        for i in nodes.into_iter().progress_with(pb) {
4✔
1081
            let n = self.get_node(i);
4✔
1082
            self.node_to_gfa(&n, &mut wtr, tag_func)?;
4✔
1083
        }
1084

1085
        wtr.flush().unwrap();
1✔
1086

1✔
1087
        Ok(())    
1✔
1088
    }
1✔
1089

1090
    pub fn to_json_rest<W: Write, F: Fn(&D) -> Value>(
×
1091
        &self,
×
1092
        fmt_func: F,
×
1093
        mut writer: &mut W,
×
1094
        rest: Option<Value>,
×
1095
    ) {
×
1096
        writeln!(writer, "{{\n\"nodes\": [").unwrap();
×
1097
        for i in 0..self.len() {
×
1098
            let node = self.get_node(i);
×
1099
            node.to_json(&fmt_func, writer);
×
1100
            if i == self.len() - 1 {
×
1101
                writeln!(writer).unwrap();
×
1102
            } else {
×
1103
                writeln!(writer, ",").unwrap();
×
1104
            }
×
1105
        }
1106
        writeln!(writer, "],").unwrap();
×
1107

×
1108
        writeln!(writer, "\"links\": [").unwrap();
×
1109
        for i in 0..self.len() {
×
1110
            let node = self.get_node(i);
×
1111
            match node.edges_to_json(writer) {
×
1112
                true => {
1113
                    if i == self.len() - 1 {
×
1114
                        writeln!(writer).unwrap();
×
1115
                    } else {
×
1116
                        writeln!(writer, ",").unwrap();
×
1117
                    }
×
1118
                }
1119
                _ => continue,
×
1120
            }
1121
        }
1122
        writeln!(writer, "]").unwrap();
×
1123

1124
        match rest {
×
1125
            Some(Value::Object(v)) => {
×
1126
                for (k, v) in v.iter() {
×
1127
                    writeln!(writer, ",").expect("io error");
×
1128
                    write!(writer, "\"{}\": ", k).expect("io error");
×
1129
                    serde_json::to_writer(&mut writer, v).expect("io error");
×
1130
                    writeln!(writer).expect("io error");
×
1131
                }
×
1132
            }
1133
            _ => {
×
1134
                writeln!(writer).expect("io error");
×
1135
            }
×
1136
        }
1137

1138
        writeln!(writer, "}}").expect("io error");
×
1139
    }
×
1140

1141
    /// Write the graph to JSON
1142
    pub fn to_json<W: Write, F: Fn(&D) -> Value, RF: Fn(&mut W)>(
×
1143
        &self,
×
1144
        fmt_func: F,
×
1145
        writer: &mut W,
×
1146
    ) {
×
1147
        self.to_json_rest(fmt_func, writer, None);
×
1148
    }
×
1149

1150
    /// Print a text representation of the graph.
1151
    pub fn print(&self) {
2✔
1152
        println!("DebruijnGraph {{ len: {}, K: {} }} :", self.len(), K::k());
2✔
1153
        for node in self.iter_nodes() {
8✔
1154
            println!("{:?}", node);
8✔
1155
        }
8✔
1156
    }
2✔
1157

1158
    pub fn print_with_data(&self) {
×
1159
        println!("DebruijnGraph {{ len: {}, K: {} }} :", self.len(), K::k());
×
1160
        for node in self.iter_nodes() {
×
1161
            println!("{:?} ({:?})", node, node.data());
×
1162
        }
×
1163
    }
×
1164

1165
    pub fn max_path_beam<F, F2>(&self, beam: usize, score: F, _solid_path: F2) -> Vec<(usize, Dir)>
×
1166
    where
×
1167
        F: Fn(&D) -> f32,
×
1168
        F2: Fn(&D) -> bool,
×
1169
    {
×
1170
        if self.is_empty() {
×
1171
            return Vec::default();
×
1172
        }
×
1173

×
1174
        let mut states = Vec::new();
×
1175

1176
        for i in 0..self.len() {
×
1177
            let node = self.get_node(i);
×
1178

×
1179
            // Initialize beam search on terminal nodes
×
1180
            if node.exts().num_exts_l() == 0 || node.exts().num_exts_r() == 0 {
×
1181
                let dir = if node.exts().num_exts_l() > 0 {
×
1182
                    Dir::Right
×
1183
                } else {
1184
                    Dir::Left
×
1185
                };
1186

1187
                let status = if node.exts().num_exts_l() == 0 && node.exts().num_exts_r() == 0 {
×
1188
                    Status::End
×
1189
                } else {
1190
                    Status::Active
×
1191
                };
1192

1193
                let mut path = SmallVec8::new();
×
1194
                path.push((i as u32, dir));
×
1195

×
1196
                let s = State {
×
1197
                    path,
×
1198
                    status,
×
1199
                    score: score(node.data()),
×
1200
                };
×
1201
                states.push(s);
×
1202
            }
×
1203
        }
1204

1205
        // No end nodes -- just start on the first node
1206
        if states.is_empty() {
×
1207
            // Make a start
×
1208
            let node = self.get_node(0);
×
1209
            let mut path = SmallVec8::new();
×
1210
            path.push((0, Dir::Left));
×
1211
            states.push(State {
×
1212
                path,
×
1213
                status: Status::Active,
×
1214
                score: score(node.data()),
×
1215
            });
×
1216
        }
×
1217

1218
        // Beam search until we can't find any more expansions
1219
        let mut active = true;
×
1220
        while active {
×
1221
            let mut new_states = Vec::with_capacity(states.len());
×
1222
            active = false;
×
1223

1224
            for s in states {
×
1225
                if s.status == Status::Active {
×
1226
                    active = true;
×
1227
                    let expanded = self.expand_state(&s, &score);
×
1228
                    new_states.extend(expanded);
×
1229
                } else {
×
1230
                    new_states.push(s)
×
1231
                }
1232
            }
1233

1234
            // workaround to sort by descending score - will panic if there are NaN scores
1235
            new_states.sort_by(|a, b| (-(a.score)).partial_cmp(&-(b.score)).unwrap());
×
1236
            new_states.truncate(beam);
×
1237
            states = new_states;
×
1238
        }
×
1239

1240
        for (i, state) in states.iter().take(5).enumerate() {
×
1241
            trace!("i:{}  -- {:?}", i, state);
×
1242
        }
1243

1244
        // convert back to using usize for node_id
1245
        states[0]
×
1246
            .path
×
1247
            .iter()
×
1248
            .map(|&(node, dir)| (node as usize, dir))
×
1249
            .collect()
×
1250
    }
×
1251

1252
    fn expand_state<F>(&self, state: &State, score: &F) -> SmallVec4<State>
×
1253
    where
×
1254
        F: Fn(&D) -> f32,
×
1255
    {
×
1256
        if state.status != Status::Active {
×
1257
            panic!("only attempt to expand active states")
×
1258
        }
×
1259

×
1260
        let (node_id, dir) = state.path[state.path.len() - 1];
×
1261
        let node = self.get_node(node_id as usize);
×
1262
        let mut new_states = SmallVec4::new();
×
1263

1264
        for (_, next_node_id, incoming_dir, _) in node.edges(dir.flip()) {
×
1265
            let next_node = self.get_node(next_node_id);
×
1266
            let new_score = state.score + score(next_node.data());
×
1267

×
1268
            let cycle = state
×
1269
                .path
×
1270
                .iter()
×
1271
                .any(|&(prev_node, _)| prev_node == (next_node_id as u32));
×
1272

1273
            let status = if cycle {
×
1274
                Status::Cycle
×
1275
            } else if next_node.edges(incoming_dir.flip()).is_empty() {
×
1276
                Status::End
×
1277
            } else {
1278
                Status::Active
×
1279
            };
1280

1281
            let mut new_path = state.path.clone();
×
1282
            new_path.push((next_node_id as u32, incoming_dir));
×
1283

×
1284
            let next_state = State {
×
1285
                path: new_path,
×
1286
                score: new_score,
×
1287
                status,
×
1288
            };
×
1289

×
1290
            new_states.push(next_state);
×
1291
        }
1292

1293
        new_states
×
1294
    }
×
1295

1296

1297
    pub fn iter_components(&self) -> IterComponents<K, D> {
4✔
1298
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
4✔
1299
        let pos = 0;
4✔
1300

1301
        for _i in 0..self.len() {
24✔
1302
            visited.push(false);
24✔
1303
        }
24✔
1304

1305
        IterComponents { 
4✔
1306
            graph: self, 
4✔
1307
            visited, 
4✔
1308
            pos }
4✔
1309
    }
4✔
1310

1311

1312
    /// iteratively returns 2D Vec with node_ids grouped according to the connected components they form
1313
    pub fn components_i(&self) -> Vec<Vec<usize>> {
1✔
1314
        let mut components: Vec<Vec<usize>> = Vec::with_capacity(self.len());
1✔
1315
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
1✔
1316

1317
        for _i in 0..self.len() {
6✔
1318
            visited.push(false);
6✔
1319
        }
6✔
1320

1321
        for i in 0..self.len() {
6✔
1322
            if !visited[i] {
6✔
1323
                let comp = self.component_i(&mut visited, i);
2✔
1324
                components.push(comp);
2✔
1325
            }
4✔
1326
        }
1327

1328
        components
1✔
1329
    }
1✔
1330

1331
    /// recursively detects which nodes form separate graph components
1332
    /// returns 2D vector with node ids per component
1333
    /// (may lead to stack overflow)
1334
    pub fn components_r(&self) -> Vec<Vec<usize>> {
2✔
1335
        let mut components: Vec<Vec<usize>> = Vec::with_capacity(self.len());
2✔
1336
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
2✔
1337

1338
        for _i in 0..self.len() {
8✔
1339
            visited.push(false);
8✔
1340
        }
8✔
1341

1342
        for i in 0..self.len() {
8✔
1343
            if !visited[i] {
8✔
1344
                let mut comp: Vec<usize> = Vec::new();
4✔
1345
                self.component_r(&mut visited, i, &mut comp);
4✔
1346
                components.push(comp);
4✔
1347
            }
4✔
1348
        }
1349

1350
        components
2✔
1351

2✔
1352
    }
2✔
1353

1354
    fn component_r<'a>(&'a self, visited: &'a mut Vec<bool>, i: usize, comp: &'a mut Vec<usize>) {
8✔
1355
        
8✔
1356
        visited[i] = true;
8✔
1357
        comp.push(i);
8✔
1358
        let mut edges = self.find_edges(i, Dir::Left);
8✔
1359
        let mut r_edges = self.find_edges(i, Dir::Right);
8✔
1360

8✔
1361
        edges.append(&mut r_edges);
8✔
1362

1363
        for (_, edge, _, _) in edges.iter() {
8✔
1364
            if !visited[*edge] {
8✔
1365
                self.component_r(visited, *edge, comp);
4✔
1366
            }
4✔
1367
        }
1368
    }
8✔
1369

1370
    fn component_i<'a>(&'a self, visited: &'a mut [bool], i: usize) -> Vec<usize> {
10✔
1371
        let mut edges: Vec<usize> = Vec::new();
10✔
1372
        let mut comp: Vec<usize> = Vec::new();
10✔
1373

10✔
1374
        edges.push(i);
10✔
1375

1376
        while let Some(current_edge) = edges.pop() {
40✔
1377
            if !visited[current_edge] { 
30✔
1378
                comp.push(current_edge);
30✔
1379
                visited[current_edge] = true;
30✔
1380

30✔
1381
                let mut l_edges = self.find_edges(current_edge, Dir::Left);
30✔
1382
                let mut r_edges = self.find_edges(current_edge, Dir::Right);
30✔
1383

30✔
1384
                l_edges.append(&mut r_edges);
30✔
1385

1386
                for (_, new_edge, _, _) in l_edges.into_iter() {
40✔
1387
                    if !visited[new_edge] {
40✔
1388
                        edges.push(new_edge);
20✔
1389
                    }
20✔
1390
                }
1391
            }
×
1392
        }
1393
        comp
10✔
1394
    }
10✔
1395

1396
    /// iterate over all edges of the graph, item: (node, ext base, ext dir, target node)
1397
    pub fn iter_edges(&self) -> EdgeIter<'_, K, D> {
×
1398
        EdgeIter::new(self)
×
1399
    }
×
1400

1401
    pub fn find_bad_nodes<F: Fn(&Node<'_, K, D>) -> bool>(&self, valid: F) -> Vec<usize> {
×
1402
        let mut bad_nodes = Vec::new();
×
1403

1404
        for (i, node) in enumerate(self.iter_nodes()) {
×
1405
            if !valid(&node) { bad_nodes.push(i); }
×
1406
        }
1407
        
1408
        bad_nodes
×
1409
    }
×
1410
}
1411

1412
impl<K: Kmer, SD: Debug> DebruijnGraph<K, SD> {
1413
    pub fn create_colors<'a, 'b: 'a, DI>(&'a self, config: &SummaryConfig, color_mode: ColorMode<'b>) -> Colors<'b, SD, DI> 
×
1414
    where 
×
1415
    SD: SummaryData<DI>,
×
1416
    {
×
1417
        Colors::new(self, config, color_mode)
×
1418
    }
×
1419
    
1420
    /// edge mults will contain hanging edges if the nodes were filtered
1421
    pub fn fix_edge_mults<DI>(&mut self) 
1✔
1422
    where 
1✔
1423
        SD: SummaryData<DI>
1✔
1424
    {
1✔
1425
        if self.get_node(0).data().edge_mults().is_some() {
1✔
1426
            for i in 0..self.len() {
×
1427
                self.base.data[i].fix_edge_mults(self.base.exts[i]);
×
1428
            }
×
1429
        }
1✔
1430
    }
1✔
1431
}
1432

1433
#[derive(Debug, Eq, PartialEq)]
1434
enum Status {
1435
    Active,
1436
    End,
1437
    Cycle,
1438
}
1439

1440
#[derive(Debug)]
1441
struct State {
1442
    path: SmallVec8<(u32, Dir)>,
1443
    score: f32,
1444
    status: Status,
1445
}
1446

1447
impl State {}
1448

1449
/// Iterator over nodes in a `DeBruijnGraph`
1450
pub struct NodeIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1451
    graph: &'a DebruijnGraph<K, D>,
1452
    node_id: usize,
1453
}
1454

1455
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeIter<'a, K, D> {
1456
    type Item = Node<'a, K, D>;
1457

1458
    fn next(&mut self) -> Option<Node<'a, K, D>> {
535,812✔
1459
        if self.node_id < self.graph.len() {
535,812✔
1460
            let node = self.graph.get_node(self.node_id);
535,683✔
1461
            self.node_id += 1;
535,683✔
1462
            Some(node)
535,683✔
1463
        } else {
1464
            None
129✔
1465
        }
1466
    }
535,812✔
1467
}
1468

1469
impl<'a, K: Kmer + 'a, D: Debug + 'a> IntoIterator for &'a DebruijnGraph<K, D> {
1470
    type Item = NodeKmer<'a, K, D>;
1471
    type IntoIter = NodeIntoIter<'a, K, D>;
1472

1473
    fn into_iter(self) -> Self::IntoIter {
2,220✔
1474
        NodeIntoIter {
2,220✔
1475
            graph: self,
2,220✔
1476
            node_id: 0,
2,220✔
1477
        }
2,220✔
1478
    }
2,220✔
1479
}
1480

1481
/// Iterator over nodes in a `DeBruijnGraph`
1482
pub struct NodeIntoIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1483
    graph: &'a DebruijnGraph<K, D>,
1484
    node_id: usize,
1485
}
1486

1487
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeIntoIter<'a, K, D> {
1488
    type Item = NodeKmer<'a, K, D>;
1489

1490
    fn next(&mut self) -> Option<Self::Item> {
180,832✔
1491
        if self.node_id < self.graph.len() {
180,832✔
1492
            let node_id = self.node_id;
180,832✔
1493
            let node = self.graph.get_node(node_id);
180,832✔
1494
            let node_seq = node.sequence();
180,832✔
1495

180,832✔
1496
            self.node_id += 1;
180,832✔
1497
            Some(NodeKmer {
180,832✔
1498
                node_id,
180,832✔
1499
                node_seq_slice: node_seq,
180,832✔
1500
                phantom_d: PhantomData,
180,832✔
1501
                phantom_k: PhantomData,
180,832✔
1502
            })
180,832✔
1503
        } else {
1504
            None
×
1505
        }
1506
    }
180,832✔
1507
}
1508

1509
/// A `DebruijnGraph` node with a reference to the sequence of the node.
1510
#[derive(Clone)]
1511
pub struct NodeKmer<'a, K: Kmer + 'a, D: Debug + 'a> {
1512
    pub node_id: usize,
1513
    node_seq_slice: DnaStringSlice<'a>,
1514
    phantom_k: PhantomData<K>,
1515
    phantom_d: PhantomData<D>,
1516
}
1517

1518
/// An iterator over the kmers in a `DeBruijn graph node`
1519
pub struct NodeKmerIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1520
    kmer_id: usize,
1521
    kmer: K,
1522
    num_kmers: usize,
1523
    node_seq_slice: DnaStringSlice<'a>,
1524
    phantom_k: PhantomData<K>,
1525
    phantom_d: PhantomData<D>,
1526
}
1527

1528
impl<'a, K: Kmer + 'a, D: Debug + 'a> IntoIterator for NodeKmer<'a, K, D> {
1529
    type Item = K;
1530
    type IntoIter = NodeKmerIter<'a, K, D>;
1531

1532
    fn into_iter(self) -> Self::IntoIter {
361,664✔
1533
        let num_kmers = self.node_seq_slice.len() - K::k() + 1;
361,664✔
1534

1535
        let kmer = if num_kmers > 0 {
361,664✔
1536
            self.node_seq_slice.get_kmer::<K>(0)
361,664✔
1537
        } else {
1538
            K::empty()
×
1539
        };
1540

1541
        NodeKmerIter {
361,664✔
1542
            kmer_id: 0,
361,664✔
1543
            kmer,
361,664✔
1544
            num_kmers,
361,664✔
1545
            node_seq_slice: self.node_seq_slice,
361,664✔
1546
            phantom_k: PhantomData,
361,664✔
1547
            phantom_d: PhantomData,
361,664✔
1548
        }
361,664✔
1549
    }
361,664✔
1550
}
1551

1552
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeKmerIter<'a, K, D> {
1553
    type Item = K;
1554

1555
    fn next(&mut self) -> Option<Self::Item> {
2,764,878✔
1556
        if self.num_kmers == self.kmer_id {
2,764,878✔
1557
            None
×
1558
        } else {
1559
            let current_kmer = self.kmer;
2,764,878✔
1560

2,764,878✔
1561
            self.kmer_id += 1;
2,764,878✔
1562
            if self.kmer_id < self.num_kmers {
2,764,878✔
1563
                let next_base = self.node_seq_slice.get(self.kmer_id + K::k() - 1);
2,717,548✔
1564
                let new_kmer = self.kmer.extend_right(next_base);
2,717,548✔
1565
                self.kmer = new_kmer;
2,717,548✔
1566
            }
2,717,548✔
1567

1568
            Some(current_kmer)
2,764,878✔
1569
        }
1570
    }
2,764,878✔
1571

1572
    fn size_hint(&self) -> (usize, Option<usize>) {
180,832✔
1573
        (self.num_kmers, Some(self.num_kmers))
180,832✔
1574
    }
180,832✔
1575

1576
    /// Provide a 'fast-forward' capability for this iterator
1577
    /// MPHF will use this to reduce the number of kmers that
1578
    /// need to be produced.
1579
    fn nth(&mut self, n: usize) -> Option<Self::Item> {
1,990,100✔
1580
        if n <= 4 {
1,990,100✔
1581
            // for small skips forward, shift one base at a time
1582
            for _ in 0..n {
1,788,918✔
1583
                self.next();
774,778✔
1584
            }
774,778✔
1585
        } else {
201,182✔
1586
            self.kmer_id += n;
201,182✔
1587
            self.kmer = self.node_seq_slice.get_kmer::<K>(self.kmer_id);
201,182✔
1588
        }
201,182✔
1589

1590
        self.next()
1,990,100✔
1591
    }
1,990,100✔
1592
}
1593

1594
/// Marker signifying that NodeKmerIter has a known size.
1595
impl<'a, K: Kmer + 'a, D: Debug + 'a> ExactSizeIterator for NodeKmerIter<'a, K, D> {}
1596

1597
/// Unbranched sequence in the DeBruijn graph
1598
pub struct Node<'a, K: Kmer + 'a, D: 'a> {
1599
    pub node_id: usize,
1600
    pub graph: &'a DebruijnGraph<K, D>,
1601
}
1602

1603
impl<'a, K: Kmer, D: Debug> Node<'a, K, D> {
1604
    /// Length of the sequence of this node
1605
    pub fn len(&self) -> usize {
1,273,288✔
1606
        self.graph.base.sequences.get(self.node_id).len()
1,273,288✔
1607
    }
1,273,288✔
1608

1609
    pub fn is_empty(&self) -> bool {
×
1610
        self.graph.base.sequences.get(self.node_id).is_empty()
×
1611
    }
×
1612

1613
    /// Sequence of the node
1614
    pub fn sequence(&self) -> DnaStringSlice<'a> {
2,662,384✔
1615
        self.graph.base.sequences.get(self.node_id)
2,662,384✔
1616
    }
2,662,384✔
1617

1618
    /// Reference to auxiliarly data associated with the node
1619
    pub fn data(&self) -> &'a D {
3,118,913✔
1620
        &self.graph.base.data[self.node_id]
3,118,913✔
1621
    }
3,118,913✔
1622

1623
    /// Extension bases from this node
1624
    pub fn exts(&self) -> Exts {
1,861,902✔
1625
        self.graph.base.exts[self.node_id]
1,861,902✔
1626
    }
1,861,902✔
1627

1628
    /// Edges leaving the left side of the node in the format
1629
    /// (base, target_node id, incoming side of target node, whether target node is flipped)
1630
    pub fn l_edges(&self) -> SmallVec4<(u8, usize, Dir, bool)> {
228,626✔
1631
        self.graph.find_edges(self.node_id, Dir::Left)
228,626✔
1632
    }
228,626✔
1633

1634
    /// Edges leaving the right side of the node in the format
1635
    /// (base, target_node id, incoming side of target node, whether target node is flipped)
1636
    pub fn r_edges(&self) -> SmallVec4<(u8, usize, Dir, bool)> {
228,626✔
1637
        self.graph.find_edges(self.node_id, Dir::Right)
228,626✔
1638
    }
228,626✔
1639

1640
    /// Edges leaving the 'dir' side of the node in the format
1641
    /// (base, target_node id, incoming side of target node, whether target node is flipped)
1642
    pub fn edges(&self, dir: Dir) -> SmallVec4<(u8, usize, Dir, bool)> {
320,214✔
1643
        self.graph.find_edges(self.node_id, dir)
320,214✔
1644
    }
320,214✔
1645

1646
    fn to_json<F: Fn(&D) -> Value>(&self, func: &F, f: &mut dyn Write) {
×
1647
        write!(
×
1648
            f,
×
1649
            "{{\"id\":\"{}\",\"L\":{},\"D\":{},\"Se\":\"{:?}\"}}",
×
1650
            self.node_id,
×
1651
            self.sequence().len(),
×
1652
            (func)(self.data()),
×
1653
            self.sequence(),
×
1654
        )
×
1655
        .unwrap();
×
1656
    }
×
1657

1658
    fn edges_to_json(&self, f: &mut dyn Write) -> bool {
×
1659
        let mut wrote = false;
×
1660
        let edges = self.r_edges();
×
1661
        for (idx, &(_, id, incoming_dir, _)) in edges.iter().enumerate() {
×
1662
            write!(
×
1663
                f,
×
1664
                "{{\"source\":\"{}\",\"target\":\"{}\",\"D\":\"{}\"}}",
×
1665
                self.node_id,
×
1666
                id,
×
1667
                match incoming_dir {
×
1668
                    Dir::Left => "L",
×
1669
                    Dir::Right => "R",
×
1670
                }
1671
            )
1672
            .unwrap();
×
1673

×
1674
            if idx < edges.len() - 1 {
×
1675
                write!(f, ",").unwrap();
×
1676
            }
×
1677

1678
            wrote = true;
×
1679
        }
1680
        wrote
×
1681
    }
×
1682
}
1683

1684
// TODO make generic instead of u8 (u8 is sufficient for dbg)
1685
impl<K: Kmer, SD: Debug> Node<'_, K, SD>  {
1686
    /// get default format for dot edges based on node data
1687
    pub fn edge_dot_default<DI>(&self, colors: &Colors<SD, DI>, base: u8, incoming_dir: Dir, flipped: bool) -> String 
129,845✔
1688
    where SD: SummaryData<DI>
129,845✔
1689
    {
129,845✔
1690
        // set color based on dir
1691
        let color = match incoming_dir {
129,845✔
1692
            Dir::Left => "blue",
65,064✔
1693
            Dir::Right => "red"
64,781✔
1694
        };
1695
        
1696
        if let Some(em) = self.data().edge_mults() {
129,845✔
1697
            
1698
            let dir = if flipped { 
129,845✔
1699
                incoming_dir 
51,540✔
1700
            } else {
1701
                incoming_dir.flip()
78,305✔
1702
            };
1703

1704
            // set penwidth based on count
1705
            let count = em.edge_mult(base, dir);
129,845✔
1706
            let penwidth = colors.edge_width(count);
129,845✔
1707

129,845✔
1708
            format!("[color={color}, penwidth={penwidth}, label=\"{}: {count}\"]", bits_to_base(base))
129,845✔
1709
        } else {
1710
            format!("[color={color}, penwidth={}]", colors.edge_width(1)) // since there should be no edge mults, this will return default value
×
1711
        }
1712
    }
129,845✔
1713

1714
    /// get default format for dot nodes, based on node data
1715
    pub fn node_dot_default<DI>(&self, colors: &Colors<SD, DI>, config: &SummaryConfig, translator: &Translator, outline: bool) -> String
65,336✔
1716
    where SD: SummaryData<DI>
65,336✔
1717
    {
65,336✔
1718
        // set color based on labels/fold change/p-value
65,336✔
1719
        let color = colors.node_color(self.data(), config, outline);
65,336✔
1720

65,336✔
1721
        let data_info = self.data().print(translator, config);
65,336✔
1722
        const MIN_TEXT_WIDTH: usize = 40;
1723
        let wrap = if self.len() > MIN_TEXT_WIDTH { self.len() } else { MIN_TEXT_WIDTH };
65,336✔
1724

1725
        let label = textwrap::fill(&format!("id: {}, len: {}, exts: {:?}, seq: {}\n{}", 
65,336✔
1726
            self.node_id,
65,336✔
1727
            self.len(),
65,336✔
1728
            self.exts(),
65,336✔
1729
            self.sequence(),
65,336✔
1730
            data_info
65,336✔
1731
        ), wrap);
65,336✔
1732

65,336✔
1733
        format!("[style=filled, {color}, label=\"{label}\"]")
65,336✔
1734
    }
65,336✔
1735
}
1736

1737
impl<K: Kmer, D> fmt::Debug for Node<'_, K, D>
1738
where
1739
    D: Debug,
1740
{
1741
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
8✔
1742
        write!(
8✔
1743
            f,
8✔
1744
            "Node {{ id:{}, Exts: {:?}, L:{:?} R:{:?}, Seq: {:?}, Data: {:?} }}",
8✔
1745
            self.node_id,
8✔
1746
            self.exts(),
8✔
1747
            self.l_edges(),
8✔
1748
            self.r_edges(),
8✔
1749
            self.sequence().len(),
8✔
1750
            self.data()
8✔
1751
        )
8✔
1752
    }
8✔
1753
}
1754

1755
pub struct IterComponents<'a, K: Kmer, D> {
1756
    graph: &'a DebruijnGraph<K, D>,
1757
    visited: Vec<bool>,
1758
    pos: usize,
1759
}
1760

1761
impl<K: Kmer, D: Debug> Iterator for IterComponents<'_, K, D> {
1762
    type Item = Vec<usize>;
1763
    fn next(&mut self) -> Option<Self::Item> {
12✔
1764
        while self.pos < self.graph.len() {
28✔
1765
            if !self.visited[self.pos] {
24✔
1766
                let comp = self.graph.component_i(&mut self.visited, self.pos);
8✔
1767
                self.pos += 1;
8✔
1768
                return Some(comp)
8✔
1769
            } else {
16✔
1770
                self.pos += 1;
16✔
1771
            }
16✔
1772
        }
1773
        assert!(self.visited.iter().map(|x| *x as usize).sum::<usize>() == self.graph.len());
24✔
1774
        None
4✔
1775
    }
12✔
1776
    
1777
}
1778

1779
pub struct PathCompIter<'a, K: Kmer, D: Debug, F, F2> 
1780
where 
1781
F: Fn(&D) -> f32,
1782
F2: Fn(&D) -> bool
1783
{
1784
    graph: &'a DebruijnGraph<K, D>,
1785
    component_iterator: IterComponents<'a, K, D>,
1786
    graph_pos: usize,
1787
    score: F,
1788
    solid_path: F2,
1789
}
1790

1791
/// returns the component and the "best" path in the component
1792
impl<K: Kmer, D: Debug, F, F2> Iterator for PathCompIter<'_, K, D, F, F2> 
1793
where 
1794
F: Fn(&D) -> f32,
1795
F2: Fn(&D) -> bool
1796
{
1797
    type Item = (Vec<usize>, VecDeque<(usize, Dir)>,);
1798
    fn next(&mut self) -> Option<Self::Item> {
6✔
1799
        match self.component_iterator.next() {
6✔
1800
            Some(component) => {
4✔
1801
                let current_comp = component;
4✔
1802
                
4✔
1803
    
4✔
1804
                let mut best_node = current_comp[0];
4✔
1805
                let mut best_score = f32::MIN;
4✔
1806
                for c in current_comp.iter() {
12✔
1807
                    let node = self.graph.get_node(*c);
12✔
1808
                    let node_score = (self.score)(node.data());
12✔
1809
    
12✔
1810
                    if node_score > best_score {
12✔
1811
                        best_node = *c;
4✔
1812
                        best_score = node_score;
4✔
1813
                    }
8✔
1814
                }
1815
    
1816
                let oscore = |state| match state {
12✔
1817
                    None => 0.0,
4✔
1818
                    Some((id, _)) => (self.score)(self.graph.get_node(id).data()),
8✔
1819
                };
12✔
1820
    
1821
                let osolid_path = |state| match state {
6✔
1822
                    None => false,
×
1823
                    Some((id, _)) => (self.solid_path)(self.graph.get_node(id).data()),
6✔
1824
                };
6✔
1825
    
1826
                // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
1827
                // already put into the path
1828
                let mut used_nodes = HashSet::new();
4✔
1829
                let mut path = VecDeque::new();
4✔
1830
    
4✔
1831
                // Start w/ initial state
4✔
1832
                used_nodes.insert(best_node);
4✔
1833
                path.push_front((best_node, Dir::Left));
4✔
1834
    
1835
                for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
8✔
1836
                    let &(start_node, dir, do_flip) = init;
8✔
1837
                    let mut current = (start_node, dir);
8✔
1838
    
1839
                    loop {
1840
                        let mut next = None;
10✔
1841
                        let (cur_id, incoming_dir) = current;
10✔
1842
                        let node = self.graph.get_node(cur_id);
10✔
1843
                        let edges = node.edges(incoming_dir.flip());
10✔
1844
    
10✔
1845
                        let mut solid_paths = 0;
10✔
1846
                        for (_, id, dir, _) in edges {
16✔
1847
                            let cand = Some((id, dir));
6✔
1848
                            if osolid_path(cand) {
6✔
1849
                                solid_paths += 1;
6✔
1850

6✔
1851
                                // second if clause is outside of first in original code (see max_path) 
6✔
1852
                                // but would basically ignore path validity.
6✔
1853
                                if oscore(cand) > oscore(next) {
6✔
1854
                                    next = cand;
4✔
1855
                                }
4✔
1856
                            }
×
1857
    
1858
                            /* if oscore(cand) > oscore(next) {
1859
                                next = cand;
1860
                            } */
1861
                        }
1862
    
1863
                        if solid_paths > 1 {
10✔
1864
                            break;
2✔
1865
                        }
8✔
1866
    
1867
                        match next {
2✔
1868
                            Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
2✔
1869
                                if do_flip {
2✔
1870
                                    path.push_front((next_id, next_incoming.flip()));
×
1871
                                } else {
2✔
1872
                                    path.push_back((next_id, next_incoming));
2✔
1873
                                }
2✔
1874
    
1875
                                used_nodes.insert(next_id);
2✔
1876
                                current = (next_id, next_incoming);
2✔
1877
                            }
1878
                            _ => break,
6✔
1879
                        }
1880
                    }
1881
                }
1882
                
1883
                
1884
                Some((current_comp, path))
4✔
1885
            }, 
1886
            None => {
1887
                // should technically not need graph_pos after this 
1888
                self.graph_pos += 1;
2✔
1889
                None
2✔
1890
            }
1891
        }
1892
    }
6✔
1893
}
1894

1895

1896
/// iterator over the edges of the de bruijn graph
1897
pub struct EdgeIter<'a, K: Kmer, D: Debug> {
1898
    graph: &'a DebruijnGraph<K, D>,
1899
    visited_edges: HashSet<(usize, usize)>,
1900
    current_node: usize,
1901
    current_dir: Dir,
1902
    node_edge_iter: smallvec::IntoIter<[(u8, usize, Dir, bool); 4]>
1903
}
1904

1905
impl<K: Kmer, D: Debug> EdgeIter<'_, K, D> {
1906
    pub fn new(graph: &DebruijnGraph<K, D>) -> EdgeIter<'_, K, D>{
×
1907
        let node_edge_iter = graph.get_node(0).l_edges().into_iter();
×
1908

×
1909
        EdgeIter { 
×
1910
            graph, 
×
1911
            visited_edges: HashSet::new(), 
×
1912
            current_node: 0, 
×
1913
            current_dir: Dir::Left, 
×
1914
            node_edge_iter
×
1915
        }
×
1916
    }
×
1917
}
1918

1919
impl<K: Kmer, D: Debug> Iterator for EdgeIter<'_, K, D> {
1920
    type Item = (usize, Dir, u8, usize); // node, direction leaving node, base, target node
1921

1922
    fn next(&mut self) -> Option<Self::Item> {
×
1923
        loop {
1924
            if let Some((base, nb_node_id, _, _)) = self.node_edge_iter.next() {
×
1925
                let edge = if self.current_node > nb_node_id { (nb_node_id, self.current_node) } else { (self.current_node, nb_node_id) };
×
1926

1927
                if self.visited_edges.insert(edge) { return Some((self.current_node, self.current_dir, base, nb_node_id)); } // else simply skip and move on
×
1928

1929
            } else {
1930
                match self.current_dir {
×
1931
                Dir::Left => {
×
1932
                    // no left edges, switch to right edges
×
1933
                    self.current_dir = Dir::Right;
×
1934
                    self.node_edge_iter = self.graph.get_node(self.current_node).r_edges().into_iter();
×
1935
                    
×
1936
                }
×
1937
                Dir::Right => {
1938
                    // no right edges, switch to next node left edges
1939
                    self.current_node += 1;
×
1940

×
1941
                    // quit if end of graph is reached
×
1942
                    if self.current_node == self.graph.len() { return None }
×
1943

×
1944
                    self.current_dir = Dir::Left;
×
1945
                    self.node_edge_iter = self.graph.get_node(self.current_node).l_edges().into_iter();
×
1946
                }
1947
            }
1948
            }
1949
            
1950
        }
1951
    }
×
1952
}
1953

1954
#[cfg(test)]
1955
mod test {
1956
    use std::{fs::File, io::BufReader};
1957

1958
    use crate::{kmer::Kmer16, summarizer::TagsCountsSumData};
1959

1960
    use super::DebruijnGraph;
1961
    use crate::{summarizer::SummaryData, Dir, BUF};
1962

1963

1964
    #[test]
1965
    #[cfg(not(feature = "sample128"))]
1966
    fn test_components() {
1967

1968
        let path = "test_data/400.graph.dbg";
1969
        let file = BufReader::with_capacity(BUF, File::open(path).unwrap());
1970

1971
        let (graph, _, _): (DebruijnGraph<Kmer16, TagsCountsSumData>, Vec<String>, crate::summarizer::SummaryConfig) = 
1972
            bincode::deserialize_from(file).expect("error deserializing graph");
1973

1974
        let components = graph.iter_components();
1975

1976
        let check_components = [
1977
            vec![3, 67, 130, 133, 59, 119, 97, 110, 68, 137, 29, 84, 131, 43, 30, 91, 14, 70, 79, 142, 136, 105, 103, 62, 
1978
                141, 104, 134, 88, 38, 81, 108, 92, 135, 96, 116, 121, 63, 124, 106, 129, 132, 126, 93, 109, 83, 112, 118, 
1979
                123, 125, 78, 122, 115, 75, 128, 140, 111, 26, 143, 113],
1980
            vec![41, 138, 100, 139, 86],
1981
            vec![53, 117, 127],
1982
            vec![69, 144, 77, 120, 114, 107, 101],
1983
        ];
1984

1985
        let mut counter = 0;
1986

1987
        for component in components {
1988
            if component.len() > 1 { 
1989
                println!("component: {:?}", component);
1990
                assert_eq!(component, check_components[counter]);
1991
                counter += 1;
1992
            }
1993
        }
1994
        assert_eq!(vec![(139, Dir::Left)], graph.max_path(|data| data.sum().unwrap_or(1) as f32, |_| true));
1995
    }
1996

1997
    #[test]
1998
    #[cfg(not(feature = "sample128"))]
1999
    fn test_iter_edges() {
2000
        let path = "test_data/400.graph.dbg";
2001
        let file = BufReader::with_capacity(BUF, File::open(path).unwrap());
2002

2003
        let (graph, _, _): (DebruijnGraph<Kmer16, TagsCountsSumData>, Vec<String>, crate::summarizer::SummaryConfig) = 
2004
            bincode::deserialize_from(file).expect("error deserializing graph");
2005

2006
        let check_edges = vec![(3, Dir::Left, 2, 134), (3, Dir::Right, 2, 67), (14, Dir::Left, 0, 91), 
2007
            (14, Dir::Right, 0, 70), (26, Dir::Left, 2, 111), (29, Dir::Left, 1, 131), (29, Dir::Left, 3, 84), (29, Dir::Right, 1, 137), 
2008
            (30, Dir::Left, 3, 43), (30, Dir::Right, 2, 91), (38, Dir::Left, 3, 81), (38, Dir::Right, 1, 88), (41, Dir::Left, 0, 138), 
2009
            (43, Dir::Left, 0, 131), (53, Dir::Left, 0, 127), (53, Dir::Right, 1, 117), (59, Dir::Left, 3, 133), (59, Dir::Right, 0, 119), 
2010
            (62, Dir::Left, 3, 119), (62, Dir::Right, 3, 103), (63, Dir::Left, 1, 121), (63, Dir::Right, 0, 124), (67, Dir::Left, 3, 130), 
2011
            (68, Dir::Left, 0, 137), (68, Dir::Right, 3, 110), (69, Dir::Left, 1, 114), (69, Dir::Left, 3, 77), (69, Dir::Right, 1, 144), 
2012
            (70, Dir::Right, 0, 79), (75, Dir::Left, 2, 121), (75, Dir::Right, 2, 128), (77, Dir::Left, 1, 120), (78, Dir::Left, 2, 122), 
2013
            (78, Dir::Right, 2, 125), (79, Dir::Right, 2, 142), (81, Dir::Left, 0, 113), (81, Dir::Right, 0, 143), (81, Dir::Right, 3, 108), 
2014
            (83, Dir::Right, 3, 109), (86, Dir::Left, 1, 139), (88, Dir::Left, 0, 113), (88, Dir::Right, 3, 134), (91, Dir::Left, 3, 141), 
2015
            (92, Dir::Left, 3, 135), (92, Dir::Right, 0, 108), (93, Dir::Left, 0, 109), (93, Dir::Right, 0, 126), (96, Dir::Left, 0, 116), 
2016
            (96, Dir::Right, 0, 135), (97, Dir::Left, 1, 119), (97, Dir::Right, 0, 110), (100, Dir::Left, 2, 139), (100, Dir::Right, 0, 138), 
2017
            (101, Dir::Right, 0, 120), (103, Dir::Right, 3, 105), (104, Dir::Left, 3, 110), (104, Dir::Right, 2, 119), (105, Dir::Right, 0, 136), 
2018
            (106, Dir::Left, 3, 124), (106, Dir::Right, 3, 129), (107, Dir::Right, 0, 120), (111, Dir::Right, 2, 140), (112, Dir::Left, 2, 115), 
2019
            (112, Dir::Left, 3, 118), (112, Dir::Right, 0, 124), (114, Dir::Left, 1, 120), (115, Dir::Right, 0, 123), (116, Dir::Left, 2, 121), 
2020
            (118, Dir::Right, 0, 123), (121, Dir::Right, 2, 132), (123, Dir::Right, 0, 125), (126, Dir::Right, 0, 132), (128, Dir::Left, 1, 140), 
2021
            (129, Dir::Right, 0, 132), (130, Dir::Left, 0, 133), (136, Dir::Right, 3, 142), (138, Dir::Left, 0, 138), (139, Dir::Left, 1, 139)];
2022

2023
        let edges = graph.iter_edges().collect::<Vec<_>>();
2024

2025
        assert_eq!(check_edges, edges);
2026
    }
2027
}
2028

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc