• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / rust-debruijn / 14991022630

13 May 2025 07:42AM UTC coverage: 85.949% (+1.2%) from 84.785%
14991022630

push

github

web-flow
Merge pull request #18 from jlab/dev

`EdgeMult` compression 

- add `EdgeMult` compatibility with graph compression
- remove parallel compression methods

296 of 311 new or added lines in 7 files covered. (95.18%)

3 existing lines in 1 file now uncovered.

6643 of 7729 relevant lines covered (85.95%)

4663221.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.8
/src/graph.rs
1
// Copyright 2017 10x Genomics
2

3
//! Containers for path-compressed De Bruijn graphs
4

5
use bit_set::BitSet;
6
use indicatif::ProgressBar;
7
use indicatif::ProgressIterator;
8
use indicatif::ProgressStyle;
9
use itertools::enumerate;
10
use log::{debug, trace};
11
use rayon::prelude::*;
12
use rayon::current_num_threads;
13
use serde_derive::{Deserialize, Serialize};
14
use smallvec::SmallVec;
15
use std::borrow::Borrow;
16

17
use std::collections::HashSet;
18
use std::collections::VecDeque;
19
use std::f32;
20
use std::fmt::{self, Debug, Display};
21
use std::fs::{remove_file, File};
22
use std::hash::Hash;
23
use std::io::BufWriter;
24
use std::io::{BufReader, Error, Read};
25
use std::io::Write;
26
use std::iter::FromIterator;
27
use std::marker::PhantomData;
28
use std::path::Path;
29

30
use boomphf::hashmap::BoomHashMap;
31

32
use serde_json;
33
use serde_json::Value;
34

35
type SmallVec4<T> = SmallVec<[T; 4]>;
36
type SmallVec8<T> = SmallVec<[T; 8]>;
37

38
use crate::bits_to_base;
39
use crate::colors::Colors;
40
use crate::compression::CompressionSpec;
41
use crate::dna_string::{DnaString, DnaStringSlice, PackedDnaStringSet};
42
use crate::summarizer::SummaryConfig;
43
use crate::summarizer::SummaryData;
44
use crate::BUF;
45
use crate::PROGRESS_STYLE;
46
use crate::{Dir, Exts, Kmer, Mer, Vmer};
47

48
/// A compressed DeBruijn graph carrying auxiliary data on each node of type `D`.
49
/// This type does not carry the sorted index arrays the allow the graph
50
/// to be walked efficiently. The `DeBruijnGraph` type wraps this type and add those
51
/// vectors.
52
#[derive(Serialize, Deserialize, Clone, Debug)]
53
pub struct BaseGraph<K, D> {
54
    pub sequences: PackedDnaStringSet,
55
    pub exts: Vec<Exts>,
56
    pub data: Vec<D>,
57
    pub stranded: bool,
58
    phantom: PhantomData<K>,
59
}
60

61
impl<K, D> BaseGraph<K, D> {
62
    pub fn new(stranded: bool) -> Self {
2,722✔
63
        BaseGraph {
2,722✔
64
            sequences: PackedDnaStringSet::new(),
2,722✔
65
            exts: Vec::new(),
2,722✔
66
            data: Vec::new(),
2,722✔
67
            phantom: PhantomData,
2,722✔
68
            stranded,
2,722✔
69
        }
2,722✔
70
    }
2,722✔
71

72
    pub fn len(&self) -> usize {
850,550✔
73
        self.sequences.len()
850,550✔
74
    }
850,550✔
75

76
    pub fn is_empty(&self) -> bool {
1✔
77
        self.sequences.is_empty()
1✔
78
    }
1✔
79

80
    pub fn combine<I: Iterator<Item = BaseGraph<K, D>>>(graphs: I) -> Self {
12✔
81
        let mut sequences = PackedDnaStringSet::new();
12✔
82
        let mut exts = Vec::new();
12✔
83
        let mut data = Vec::new();
12✔
84
        let mut stranded = Vec::new();
12✔
85

86
        for g in graphs {
2,042✔
87
            for s in 0..g.sequences.len() {
7,647✔
88
                sequences.add(&g.sequences.get(s));
7,647✔
89
            }
7,647✔
90

91
            exts.extend(g.exts);
2,030✔
92
            data.extend(g.data);
2,030✔
93
            stranded.push(g.stranded);
2,030✔
94
        }
95

96
        let out_stranded = stranded.iter().all(|x| *x);
12✔
97

12✔
98
        if !out_stranded && !stranded.iter().all(|x| !*x) {
2,030✔
99
            panic!("attempted to combine stranded and unstranded graphs");
×
100
        }
12✔
101

12✔
102
        BaseGraph {
12✔
103
            sequences,
12✔
104
            stranded: out_stranded,
12✔
105
            exts,
12✔
106
            data,
12✔
107
            phantom: PhantomData,
12✔
108
        }
12✔
109
    }
12✔
110
}
111

112
impl<K: Kmer, D> BaseGraph<K, D> {
113
    pub fn add<R: Borrow<u8>, S: IntoIterator<Item = R>>(
1,586,656✔
114
        &mut self,
1,586,656✔
115
        sequence: S,
1,586,656✔
116
        exts: Exts,
1,586,656✔
117
        data: D,
1,586,656✔
118
    ) {
1,586,656✔
119
        self.sequences.add(sequence);
1,586,656✔
120
        self.exts.push(exts);
1,586,656✔
121
        self.data.push(data);
1,586,656✔
122
    }
1,586,656✔
123
}
124

125
impl<K: Kmer + Send + Sync, D> BaseGraph<K, D> {
126
    pub fn finish(self) -> DebruijnGraph<K, D> {
581✔
127
        let indices: Vec<u32> = (0..self.len() as u32).collect();
581✔
128

129
        let left_order = {
581✔
130
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
581✔
131
            for idx in &indices {
841,337✔
132
                kmers.push(self.sequences.get(*idx as usize).first_kmer());
840,756✔
133
                
840,756✔
134
            }
840,756✔
135
            
136
            BoomHashMap::new_parallel(kmers, indices.clone())
581✔
137
        };
138

139
        let right_order = {
581✔
140
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
581✔
141
            for idx in &indices {
841,337✔
142
                kmers.push(self.sequences.get(*idx as usize).last_kmer());
840,756✔
143
            }
840,756✔
144
            
145
            BoomHashMap::new_parallel(kmers, indices)
581✔
146
        };
581✔
147
        debug!("finish graph loops: 2x {}", self.len());
581✔
148

149
        DebruijnGraph {
581✔
150
            base: self,
581✔
151
            left_order,
581✔
152
            right_order,
581✔
153
        }
581✔
154
    }
581✔
155
}
156

157
impl<K: Kmer, D> BaseGraph<K, D> {
158
    pub fn finish_serial(self) -> DebruijnGraph<K, D> {
111✔
159
        let indices: Vec<u32> = (0..self.len() as u32).collect();
111✔
160

161
        let left_order = {
111✔
162
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
111✔
163
            let mut sequences: Vec<String> = Vec::new();
111✔
164
            for idx in &indices {
744,238✔
165
                kmers.push(self.sequences.get(*idx as usize).first_kmer());
744,127✔
166
                sequences.push(self.sequences.get(*idx as usize).to_dna_string());
744,127✔
167
            }
744,127✔
168
            println!("left kmers: {:?}", kmers);
111✔
169
            println!("left seqs: {:?}", sequences);
111✔
170
            BoomHashMap::new(kmers, indices.clone())
111✔
171
        };
172

173
        let right_order = {
111✔
174
            let mut kmers: Vec<K> = Vec::with_capacity(self.len());
111✔
175
            let mut sequences: Vec<String> = Vec::new();
111✔
176
            for idx in &indices {
744,238✔
177
                kmers.push(self.sequences.get(*idx as usize).last_kmer());
744,127✔
178
                sequences.push(self.sequences.get(*idx as usize).to_dna_string());
744,127✔
179
            }
744,127✔
180
            println!("right kmers: {:?}", kmers);
111✔
181
            println!("right seqs: {:?}", sequences);
111✔
182
            BoomHashMap::new(kmers, indices)
111✔
183
        };
111✔
184

111✔
185
        DebruijnGraph {
111✔
186
            base: self,
111✔
187
            left_order,
111✔
188
            right_order,
111✔
189
        }
111✔
190
    }
111✔
191
}
192

193
/// A compressed DeBruijn graph carrying auxiliary data on each node of type `D`.
194
/// The struct carries sorted index arrays the allow the graph
195
/// to be walked efficiently.
196
#[derive(Serialize, Deserialize, Debug)]
197
pub struct DebruijnGraph<K: Hash, D> {
198
    pub base: BaseGraph<K, D>,
199
    left_order: BoomHashMap<K, u32>,
200
    right_order: BoomHashMap<K, u32>,
201
}
202

203
impl<K: Kmer, D: Debug> DebruijnGraph<K, D> {
204
    /// Total number of nodes in the DeBruijn graph
205
    pub fn len(&self) -> usize {
848,450✔
206
        self.base.len()
848,450✔
207
    }
848,450✔
208

209
    pub fn is_empty(&self) -> bool {
1✔
210
        self.base.is_empty()
1✔
211
    }
1✔
212

213
    /// Get a node given it's `node_id`
214
    pub fn get_node(&self, node_id: usize) -> Node<K, D> {
5,101,489✔
215
        Node {
5,101,489✔
216
            node_id,
5,101,489✔
217
            graph: self,
5,101,489✔
218
        }
5,101,489✔
219
    }
5,101,489✔
220

221
    /// Get a node given it's `node_id`
222
    pub fn get_node_kmer(&self, node_id: usize) -> NodeKmer<K, D> {
×
223
        let node = self.get_node(node_id);
×
224
        let node_seq = node.sequence();
×
225

×
226
        NodeKmer {
×
227
            node_id,
×
228
            node_seq_slice: node_seq,
×
229
            phantom_d: PhantomData,
×
230
            phantom_k: PhantomData,
×
231
        }
×
232
    }
×
233

234
    /// Return an iterator over all nodes in the graph
235
    pub fn iter_nodes(&self) -> NodeIter<K, D> {
129✔
236
        NodeIter {
129✔
237
            graph: self,
129✔
238
            node_id: 0,
129✔
239
        }
129✔
240
    }
129✔
241

242
    /// Find the edges leaving node `node_id` in direction `Dir`. Should generally be
243
    /// accessed via a Node wrapper object
244
    fn find_edges(&self, node_id: usize, dir: Dir) -> SmallVec4<(u8, usize, Dir, bool)> {
789,648✔
245
        let exts = self.base.exts[node_id];
789,648✔
246
        let sequence = self.base.sequences.get(node_id);
789,648✔
247
        let kmer: K = sequence.term_kmer(dir);
789,648✔
248
        let mut edges = SmallVec4::new();
789,648✔
249

250
        for i in 0..4 {
3,948,240✔
251
            if exts.has_ext(dir, i) {
3,158,592✔
252
                let link = self.find_link(kmer.extend(i, dir), dir).expect("missing link");
1,286,926✔
253
                edges.push((i, link.0, link.1, link.2));
1,286,926✔
254
            }
1,871,666✔
255
        }
256

257
        edges
789,648✔
258
    }
789,648✔
259

260
    /// Find the edges leaving node `node_id` in direction `Dir`. Should generally be
261
    /// accessed via a Node wrapper object
262
    /// 
263
    /// allows missing links
264
    fn _find_edges_sharded(&self, node_id: usize, dir: Dir) -> SmallVec4<(u8, usize, Dir, bool)> {
×
265
        let exts = self.base.exts[node_id];
×
266
        let sequence = self.base.sequences.get(node_id);
×
267
        let kmer: K = sequence.term_kmer(dir);
×
268
        let mut edges = SmallVec4::new();
×
269

270
        for i in 0..4 {
×
271
            if exts.has_ext(dir, i) {
×
272
                let link = self.find_link(kmer.extend(i, dir), dir); //.expect("missing link");
×
273
                if let Some(l) = link {
×
274
                    edges.push((i, l.0, l.1, l.2));
×
275
                }
×
276
                // Otherwise, this edge doesn't exist within this shard, so ignore it.
277
                // NOTE: this should be allowed in a 'complete' DBG
278
            }
×
279
        }
280

281
        edges
×
282
    }
×
283

284
    /// Seach for the kmer `kmer`, appearing at the given `side` of a node sequence.
285
    fn search_kmer(&self, kmer: K, side: Dir) -> Option<usize> {
5,219,523✔
286
        match side {
5,219,523✔
287
            Dir::Left => self.left_order.get(&kmer).map(|pos| *pos as usize),
2,608,240✔
288
            Dir::Right => self.right_order.get(&kmer).map(|pos| *pos as usize),
2,611,283✔
289
        }
290
    }
5,219,523✔
291

292
    /// Find a link in the graph, possibly handling a RC switch.
293
    pub fn find_link(&self, kmer: K, dir: Dir) -> Option<(usize, Dir, bool)> {
3,638,272✔
294
        // Only test self-consistent paths through
3,638,272✔
295
        // the edges
3,638,272✔
296
        // Avoids problems due to single kmer edges
3,638,272✔
297
        // (dir, next_side_incoming, rc)
3,638,272✔
298
        // (Dir::Left, Dir::Right, false) => true,
3,638,272✔
299
        // (Dir::Left, Dir::Left,  true) => true,
3,638,272✔
300
        // (Dir::Right, Dir::Left, false) => true,
3,638,272✔
301
        // (Dir::Right, Dir::Right, true) => true,
3,638,272✔
302

3,638,272✔
303
        let rc = kmer.rc();
3,638,272✔
304

3,638,272✔
305
        match dir {
3,638,272✔
306
            Dir::Left => {
307
                if let Some(idx) = self.search_kmer(kmer, Dir::Right) {
1,822,050✔
308
                    return Some((idx, Dir::Right, false));
1,030,032✔
309
                }
792,018✔
310

792,018✔
311
                if !self.base.stranded {
792,018✔
312
                    if let Some(idx) = self.search_kmer(rc, Dir::Left) {
792,018✔
313
                        return Some((idx, Dir::Left, true));
792,018✔
314
                    }
×
315
                }
×
316
            }
317

318
            Dir::Right => {
319
                if let Some(idx) = self.search_kmer(kmer, Dir::Left) {
1,816,222✔
320
                    return Some((idx, Dir::Left, false));
1,026,989✔
321
                }
789,233✔
322

789,233✔
323
                if !self.base.stranded {
789,233✔
324
                    if let Some(idx) = self.search_kmer(rc, Dir::Right) {
789,233✔
325
                        return Some((idx, Dir::Right, true));
789,233✔
326
                    }
×
327
                }
×
328
            }
329
        }
330

331
        None
×
332
    }
3,638,272✔
333

334
    /// Check whether the graph is fully compressed. Return `None` if it's compressed,
335
    /// otherwise return `Some(node1, node2)` representing a pair of node that could
336
    /// be collapsed. Probably only useful for testing.
337
    pub fn is_compressed<S: CompressionSpec<D>>(&self, spec: &S) -> Option<(usize, usize)> {
789✔
338
        for i in 0..self.len() {
154,566✔
339
            let n = self.get_node(i);
154,566✔
340

341
            for dir in &[Dir::Left, Dir::Right] {
463,698✔
342
                let dir_edges = n.edges(*dir);
309,132✔
343
                if dir_edges.len() == 1 {
309,132✔
344
                    let (_, next_id, return_dir, _) = dir_edges[0];
219,102✔
345
                    let next = self.get_node(next_id);
219,102✔
346

219,102✔
347
                    let ret_edges = next.edges(return_dir);
219,102✔
348
                    if ret_edges.len() == 1 {
219,102✔
349
                        // Test for us being a palindrome: this makes it OK
350
                        if n.len() == K::k() && n.sequence().first_kmer::<K>().is_palindrome() {
24✔
351
                            continue;
16✔
352
                        }
8✔
353

8✔
354
                        // Test for a neighbor being a palindrome: this makes it OK
8✔
355
                        if next.len() == K::k() && next.sequence().first_kmer::<K>().is_palindrome()
8✔
356
                        {
357
                            continue;
8✔
358
                        }
×
359

×
360
                        // Test for this edge representing a smooth circle (biting it's own tail):
×
361
                        if n.node_id == next_id {
×
362
                            continue;
×
363
                        }
×
364

×
365
                        if spec.join_test(n.data(), next.data()) {
×
366
                            // Found a unbranched edge that should have been eliminated
367
                            return Some((i, next_id));
×
368
                        }
×
369
                    }
219,078✔
370
                }
90,030✔
371
            }
372
        }
373

374
        None
789✔
375
    }
789✔
376

377
    /// Remove non-existent extensions that may be created due to filtered kmers
378
    /// 
379
    /// if `valid_nodes` if `None`, all nodes are valid
380
    pub fn fix_exts(&mut self, valid_nodes: Option<&BitSet>) {
247✔
381
        for i in 0..self.len() {
775,122✔
382
            let valid_exts = self.get_valid_exts(i, valid_nodes);
775,122✔
383
            self.base.exts[i] = valid_exts;
775,122✔
384
        }
775,122✔
385
    }
247✔
386

387
    pub fn get_valid_exts(&self, node_id: usize, valid_nodes: Option<&BitSet>) -> Exts {
775,122✔
388
        let mut new_exts = Exts::empty();
775,122✔
389
        let node = self.get_node(node_id);
775,122✔
390
        let exts = node.exts();
775,122✔
391
        let l_kmer: K = node.sequence().first_kmer();
775,122✔
392
        let r_kmer: K = node.sequence().last_kmer();
775,122✔
393

775,122✔
394
        let check_node = |id| match valid_nodes {
1,589,820✔
395
            Some(bs) => bs.contains(id),
1,523,338✔
396
            None => true,
66,482✔
397
        };
1,589,820✔
398

399
        for i in 0..4 {
3,875,610✔
400
            if exts.has_ext(Dir::Left, i) {
3,100,488✔
401
                match self.find_link(l_kmer.extend_left(i), Dir::Left) {
795,003✔
402
                    Some((target, _, _)) if check_node(target) => {
795,003✔
403
                        new_exts = new_exts.set(Dir::Left, i)
795,003✔
404
                    }
405
                    _ => (),
×
406
                }
407
            }
2,305,485✔
408

409
            if exts.has_ext(Dir::Right, i) {
3,100,488✔
410
                match self.find_link(r_kmer.extend_right(i), Dir::Right) {
794,817✔
411
                    Some((target, _, _)) if check_node(target) => {
794,817✔
412
                        new_exts = new_exts.set(Dir::Right, i)
794,817✔
413
                    }
414
                    _ => (),
×
415
                }
416
            }
2,305,671✔
417
        }
418

419
        new_exts
775,122✔
420
    }
775,122✔
421

422
    /// Find the highest-scoring, unambiguous path in the graph. Each node get a score
423
    /// given by `score`. Any node where `solid_path(node) == True` are valid paths -
424
    /// paths will be terminated if there are multiple valid paths emanating from a node.
425
    pub fn max_path<F, F2>(&self, score: F, solid_path: F2) -> Vec<(usize, Dir)>
×
426
    where
×
427
        F: Fn(&D) -> f32,
×
428
        F2: Fn(&D) -> bool,
×
429
    {
×
430
        if self.is_empty() {
×
431
            return Vec::default();
×
432
        }
×
433

×
434
        let mut best_node = 0;
×
435
        let mut best_score = f32::MIN;
×
436
        for i in 0..self.len() {
×
437
            let node = self.get_node(i);
×
438
            let node_score = score(node.data());
×
439

×
440
            if node_score > best_score {
×
441
                best_node = i;
×
442
                best_score = node_score;
×
443
            }
×
444
        }
445

446
        let oscore = |state| match state {
×
447
            None => 0.0,
×
448
            Some((id, _)) => score(self.get_node(id).data()),
×
449
        };
×
450

451
        let osolid_path = |state| match state {
×
452
            None => false,
×
453
            Some((id, _)) => solid_path(self.get_node(id).data()),
×
454
        };
×
455

456
        // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
457
        // already put into the path
458
        let mut used_nodes = HashSet::new();
×
459
        let mut path = VecDeque::new();
×
460

×
461
        // Start w/ initial state
×
462
        used_nodes.insert(best_node);
×
463
        path.push_front((best_node, Dir::Left));
×
464

465
        for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
×
466
            let &(start_node, dir, do_flip) = init;
×
467
            let mut current = (start_node, dir);
×
468

469
            loop {
470
                let mut next = None;
×
471
                let (cur_id, incoming_dir) = current;
×
472
                let node = self.get_node(cur_id);
×
473
                let edges = node.edges(incoming_dir.flip());
×
474

×
475
                let mut solid_paths = 0;
×
476
                for (_, id, dir, _) in edges {
×
477
                    let cand = Some((id, dir));
×
478
                    if osolid_path(cand) {
×
479
                        solid_paths += 1;
×
480
                    }
×
481

482
                    if oscore(cand) > oscore(next) {
×
483
                        next = cand;
×
484
                    }
×
485
                }
486

487
                if solid_paths > 1 {
×
488
                    break;
×
489
                }
×
490

491
                match next {
×
492
                    Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
×
493
                        if do_flip {
×
494
                            path.push_front((next_id, next_incoming.flip()));
×
495
                        } else {
×
496
                            path.push_back((next_id, next_incoming));
×
497
                        }
×
498

499
                        used_nodes.insert(next_id);
×
500
                        current = (next_id, next_incoming);
×
501
                    }
502
                    _ => break,
×
503
                }
504
            }
505
        }
506

507
        Vec::from_iter(path)
×
508
    }
×
509

510

511
    /// Find the highest-scoring, unambiguous path in the graph. Each node get a score
512
    /// given by `score`. Any node where `solid_path(node) == True` are valid paths -
513
    /// paths will be terminated if there are multiple valid paths emanating from a node.
514
    /// Returns vec with path for each component
515
    pub fn max_path_comp<F, F2>(&self, score: F, solid_path: F2) -> Vec<VecDeque<(usize, Dir)>>
1✔
516
    where
1✔
517
        F: Fn(&D) -> f32,
1✔
518
        F2: Fn(&D) -> bool,
1✔
519
    {
1✔
520
        if self.is_empty() {
1✔
521
            let vec: Vec<VecDeque<(usize, Dir)>> = Vec::new();
×
522
            return vec;
×
523
        }
1✔
524

1✔
525
        let components = self.iter_components();
1✔
526
        let mut paths: Vec<VecDeque<(usize, Dir)>> = Vec::new();
1✔
527

528
        for component in components {
3✔
529

530
            let current_comp = &component;
2✔
531
            
2✔
532

2✔
533
            let mut best_node = current_comp[0];
2✔
534
            let mut best_score = f32::MIN;
2✔
535
            for c in current_comp.iter() {
6✔
536
                let node = self.get_node(*c);
6✔
537
                let node_score = score(node.data());
6✔
538

6✔
539
                if node_score > best_score {
6✔
540
                    best_node = *c;
2✔
541
                    best_score = node_score;
2✔
542
                }
4✔
543
            }
544

545
            let oscore = |state| match state {
4✔
546
                None => 0.0,
2✔
547
                Some((id, _)) => score(self.get_node(id).data()),
2✔
548
            };
4✔
549

550
            let osolid_path = |state| match state {
2✔
551
                None => false,
×
552
                Some((id, _)) => solid_path(self.get_node(id).data()),
2✔
553
            };
2✔
554

555
            // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
556
            // already put into the path
557
            let mut used_nodes = HashSet::new();
2✔
558
            let mut path = VecDeque::new();
2✔
559

2✔
560
            // Start w/ initial state
2✔
561
            used_nodes.insert(best_node);
2✔
562
            path.push_front((best_node, Dir::Left));
2✔
563

564
            for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
4✔
565
                let &(start_node, dir, do_flip) = init;
4✔
566
                let mut current = (start_node, dir);
4✔
567

568
                loop {
569
                    let mut next = None;
6✔
570
                    let (cur_id, incoming_dir) = current;
6✔
571
                    let node = self.get_node(cur_id);
6✔
572
                    let edges = node.edges(incoming_dir.flip());
6✔
573

6✔
574
                    let mut solid_paths = 0;
6✔
575
                    for (_, id, dir, _) in edges {
8✔
576
                        let cand = Some((id, dir));
2✔
577
                        if osolid_path(cand) {
2✔
578
                            solid_paths += 1;
2✔
579
                        }
2✔
580

581
                        if oscore(cand) > oscore(next) {
2✔
582
                            next = cand;
2✔
583
                        }
2✔
584
                    }
585

586
                    if solid_paths > 1 {
6✔
UNCOV
587
                        break;
×
588
                    }
6✔
589

590
                    match next {
2✔
591
                        Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
2✔
592
                            if do_flip {
2✔
593
                                path.push_front((next_id, next_incoming.flip()));
1✔
594
                            } else {
1✔
595
                                path.push_back((next_id, next_incoming));
1✔
596
                            }
1✔
597

598
                            used_nodes.insert(next_id);
2✔
599
                            current = (next_id, next_incoming);
2✔
600
                        }
601
                        _ => break,
4✔
602
                    }
603
                }
604
            }
605
            
606
            paths.push(path);
2✔
607
            //paths.push(Vec::from_iter(path));
608
        }
609

610
        paths
1✔
611
    
612
    }
1✔
613

614
    pub fn iter_max_path_comp<F, F2>(&self, score: F, solid_path: F2) -> PathCompIter<K, D, F, F2> 
2✔
615
    where 
2✔
616
    F: Fn(&D) -> f32,
2✔
617
    F2: Fn(&D) -> bool
2✔
618
    {
2✔
619
        let component_iterator = self.iter_components();
2✔
620
        PathCompIter { graph: self, component_iterator, graph_pos: 0, score, solid_path }
2✔
621
    }
2✔
622

623
    /// write the paths from `iter_max_path_comp` to a fasta file
624
    pub fn path_to_fasta<F, F2>(&self, f: &mut dyn std::io::Write, path_iter: PathCompIter<K, D, F, F2>, return_lens: bool) -> (Vec<usize>, Vec<usize>)
1✔
625
    where 
1✔
626
    F: Fn(&D) -> f32,
1✔
627
    F2: Fn(&D) -> bool
1✔
628
    {
1✔
629
        // width of fasta lines
1✔
630
        let columns = 80;
1✔
631

1✔
632
        // sizes of components and of paths
1✔
633
        let mut comp_sizes = Vec::new();
1✔
634
        let mut path_lens = Vec::new();
1✔
635

636
        for (seq_counter, (component, path)) in path_iter.enumerate() {
2✔
637
            // get dna sequence from path
638
            let seq = self.sequence_of_path(path.iter());
2✔
639

2✔
640
            //write header with length & start node
2✔
641
            writeln!(f, ">path{} len={} start_node={}", seq_counter, seq.len(), path[0].0).unwrap();
2✔
642

2✔
643
            // calculate how sequence has to be split up
2✔
644
            let slices = (seq.len() / columns) + 1;
2✔
645
            let mut ranges = Vec::with_capacity(slices);
2✔
646

2✔
647
            let mut start = 0;
2✔
648
            while start < seq.len() {
6✔
649
                ranges.push(start..start + columns);
4✔
650
                start += columns;
4✔
651
            }
4✔
652

653
            let last_start = ranges.pop().expect("no kmers in parallel ranges").start;
2✔
654
            ranges.push(last_start..seq.len());
2✔
655

656
            // split up sequence and write to file accordingly
657
            for range in ranges {
6✔
658
                writeln!(f, "{:?}", seq.slice(range.start, range.end)).unwrap();
4✔
659
            }
4✔
660

661
            if return_lens {
2✔
662
                comp_sizes.push(component.len());
×
663
                path_lens.push(path.len());
×
664
            }
2✔
665
        }    
666

667
        (comp_sizes, path_lens)
1✔
668
        
1✔
669
    }
1✔
670

671

672
    /// Get the sequence of a path through the graph. The path is given as a sequence of node_id integers
673
    pub fn sequence_of_path<'a, I: 'a + Iterator<Item = &'a (usize, Dir)>>(
23,348✔
674
        &self,
23,348✔
675
        path: I,
23,348✔
676
    ) -> DnaString {
23,348✔
677
        let mut seq = DnaString::new();
23,348✔
678

679
        for (idx, &(node_id, dir)) in path.enumerate() {
751,786✔
680
            let start = if idx == 0 { 0 } else { K::k() - 1 };
751,786✔
681

682
            let node_seq = match dir {
751,786✔
683
                Dir::Left => self.get_node(node_id).sequence(),
390,956✔
684
                Dir::Right => self.get_node(node_id).sequence().rc(),
360,830✔
685
            };
686

687
            for p in start..node_seq.len() {
1,531,460✔
688
                seq.push(node_seq.get(p))
1,531,460✔
689
            }
690
        }
691

692
        seq
23,348✔
693
    }
23,348✔
694

695
    /// write a node to a dot file
696
    /// 
697
    /// ### Arguments: 
698
    /// 
699
    /// * `node`: [`Node<K, D>`] which will be written to a dot file
700
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
701
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
702
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
703
    /// * `f`: writer
704
    fn node_to_dot<FN: Fn(&Node<K, D>) -> String, FE: Fn(&Node<K, D>, u8, Dir, bool) -> String>(
65,320✔
705
        &self,
65,320✔
706
        node: &Node<'_, K, D>,
65,320✔
707
        node_label: &FN,
65,320✔
708
        edge_label: &FE,
65,320✔
709
        f: &mut dyn Write,
65,320✔
710
    ) {
65,320✔
711
        writeln!(f, "n{} {}", node.node_id, node_label(node)).unwrap();
65,320✔
712
        assert_eq!(node.exts().val.count_ones() as usize, node.l_edges().len() + node.r_edges().len());
65,320✔
713

714
        for (base, id, incoming_dir, flipped) in node.l_edges() {
65,320✔
715
            writeln!(f, "n{} -> n{} {}", id, node.node_id, edge_label(node, base, incoming_dir, flipped)).unwrap();
65,062✔
716
        }
65,062✔
717

718
        for (base, id, incoming_dir, flipped) in node.r_edges() {
65,320✔
719
            writeln!(f, "n{} -> n{} {}", node.node_id, id, edge_label(node, base, incoming_dir, flipped)).unwrap();
64,778✔
720
        }
64,778✔
721
    }
65,320✔
722

723
    /// Write the graph to a dot file
724
    /// 
725
    /// ### Arguments: 
726
    /// 
727
    /// * `path`: path to the output file
728
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
729
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
730
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
731
    pub fn to_dot<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE) 
1✔
732
    where 
1✔
733
    P: AsRef<Path>,
1✔
734
    FN: Fn(&Node<K, D>) -> String,
1✔
735
    FE: Fn(&Node<K, D>, u8, Dir, bool) -> String,
1✔
736
    {
1✔
737
        let mut f = BufWriter::with_capacity(BUF, File::create(path).expect("error creating dot file"));
1✔
738

1✔
739
        let pb = ProgressBar::new(self.len() as u64);
1✔
740
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
741
        pb.set_message(format!("{:<32}", "writing graph to DOT file"));
1✔
742

1✔
743
        writeln!(&mut f, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
1✔
744
        for i in (0..self.len()).progress_with(pb) {
32,658✔
745
            self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
32,658✔
746
        }
32,658✔
747
        writeln!(&mut f, "}}").unwrap();
1✔
748
        
1✔
749
        f.flush().unwrap();
1✔
750
        debug!("large to dot loop: {}", self.len());
1✔
751
    }
1✔
752

753
    /// Write the graph to a dot file in parallel
754
    /// Will write in to n_threads files simultaniously,
755
    /// then go though the files and add the contents to a larger file, 
756
    /// and delete the small files.
757
    /// 
758
    /// ### Arguments: 
759
    /// 
760
    /// * `path`: path to the output file
761
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
762
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
763
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
764
    pub fn to_dot_parallel<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE) 
1✔
765
    where 
1✔
766
        D: Sync,
1✔
767
        K: Sync,
1✔
768
        P: AsRef<Path> + Display + Sync,
1✔
769
        FN: Fn(&Node<K, D>) -> String + Sync,
1✔
770
        FE: Fn(&Node<K, D>, u8, Dir, bool) -> String + Sync,
1✔
771
    {        
1✔
772
        let slices = current_num_threads();
1✔
773
        let n_nodes = self.len();
1✔
774
        let sz = n_nodes / slices + 1;
1✔
775

1✔
776
        debug!("n_nodes: {}", n_nodes);
1✔
777
        debug!("sz: {}", sz);
1✔
778

779
        let mut parallel_ranges = Vec::with_capacity(slices);
1✔
780
        let mut start = 0;
1✔
781
        while start < n_nodes {
5✔
782
            parallel_ranges.push(start..start + sz);
4✔
783
            start += sz;
4✔
784
        }
4✔
785

786
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
1✔
787
        parallel_ranges.push(last_start..n_nodes);
1✔
788
        debug!("parallel ranges: {:?}", parallel_ranges);
1✔
789

790
        let mut files = Vec::with_capacity(current_num_threads());
1✔
791

792
        for i in 0..parallel_ranges.len() {
4✔
793
            files.push(format!("{}-{}.dot", path, i));
4✔
794
        } 
4✔
795

796
        let pb = ProgressBar::new(self.len() as u64);
1✔
797
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
798
        pb.set_message(format!("{:<32}", "writing graph to DOT files"));
1✔
799
    
1✔
800
        parallel_ranges.into_par_iter().enumerate().for_each(|(i, range)| {
4✔
801
            let mut f = BufWriter::with_capacity(BUF, File::create(&files[i]).expect("error creating parallel dot file"));
4✔
802

803
            for i in range {
32,662✔
804
                self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
32,658✔
805
                pb.inc(1);
32,658✔
806
            }
32,658✔
807

808
            f.flush().unwrap();
4✔
809
        });
4✔
810
        pb.finish_and_clear();
1✔
811

1✔
812
        let mut out_file = BufWriter::with_capacity(BUF, File::create(path).expect("error creating combined dot file"));
1✔
813

1✔
814
        writeln!(&mut out_file, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
1✔
815

1✔
816
        let pb = ProgressBar::new(files.len() as u64);
1✔
817
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
818
        pb.set_message(format!("{:<32}", "combining files"));
1✔
819

820
        for file in files.iter().progress_with(pb) {
4✔
821
            let open_file = File::open(file).expect("error opening parallel dot file");
4✔
822
            let mut reader = BufReader::new(open_file);
4✔
823
            let mut buffer = [0; BUF];
4✔
824

825
            loop {
826
                let linecount = reader.read(&mut buffer).unwrap();
228✔
827
                if linecount == 0 { break }
228✔
828
                out_file.write_all(&buffer[..linecount]).unwrap();
224✔
829
            }
830

831
            remove_file(file).unwrap();
4✔
832
        }
833

834
        writeln!(&mut out_file, "}}").unwrap();
1✔
835

1✔
836
        out_file.flush().unwrap();
1✔
837

1✔
838

1✔
839
    }
1✔
840

841
    /// Write part of the graph to a dot file
842
    /// 
843
    /// ### Arguments: 
844
    /// 
845
    /// * `path`: path to the output file
846
    /// * `node_label`: closure taking [`Node<K, D>`] and returning a string containing commands for dot nodes 
847
    /// * `edge_label`: closure taking [`Node<K, D>`], the base as a [`u8`], the incoming [`Dir`] of the edge 
848
    ///    and if the neighbor is flipped - returns a string containing commands for dot edges, 
849
    /// * `nodes`: [`Vec<usize>`] listing all IDs of nodes which should be included
850
    pub fn to_dot_partial<P, FN, FE>(&self, path: P, node_label: &FN, edge_label: &FE, nodes: Vec<usize>) 
1✔
851
    where 
1✔
852
        P: AsRef<Path>,
1✔
853
        FN: Fn(&Node<K, D>) -> String,
1✔
854
        FE: Fn(&Node<K, D>, u8, Dir, bool) -> String,
1✔
855
    {
1✔
856
        let mut f = BufWriter::with_capacity(BUF, File::create(path).expect("error creating dot file"));
1✔
857

1✔
858
        let pb = ProgressBar::new(nodes.len() as u64);
1✔
859
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
860
        pb.set_message(format!("{:<32}", "writing graph to DOT file"));
1✔
861

1✔
862
        writeln!(&mut f, "# {:?}", nodes).unwrap();
1✔
863
        writeln!(&mut f, "digraph {{\nrankdir=\"LR\"\nmodel=subset\noverlap=scalexy").unwrap();
1✔
864
        for i in nodes.into_iter().progress_with(pb) {
4✔
865
            self.node_to_dot(&self.get_node(i), node_label, edge_label, &mut f);
4✔
866
        }
4✔
867
        writeln!(&mut f, "}}").unwrap();
1✔
868

1✔
869
        f.flush().unwrap();
1✔
870

1✔
871
        debug!("large to dot loop: {}", self.len());
1✔
872
    }
1✔
873

874
    fn node_to_gfa<F: Fn(&Node<'_, K, D>) -> String>(
12✔
875
        &self,
12✔
876
        node: &Node<'_, K, D>,
12✔
877
        w: &mut dyn Write,
12✔
878
        tag_func: Option<&F>,
12✔
879
    ) -> Result<(), Error> {
12✔
880
        match tag_func {
12✔
881
            Some(f) => {
12✔
882
                let tags = (f)(node);
12✔
883
                writeln!(
12✔
884
                    w,
12✔
885
                    "S\t{}\t{}\t{}",
12✔
886
                    node.node_id,
12✔
887
                    node.sequence().to_dna_string(),
12✔
888
                    tags
12✔
889
                )?;
12✔
890
            }
891
            _ => writeln!(
×
892
                w,
×
893
                "S\t{}\t{}",
×
894
                node.node_id,
×
895
                node.sequence().to_dna_string()
×
896
            )?,
×
897
        }
898

899
        for (_, target, dir, _) in node.l_edges() {
12✔
900
            if target >= node.node_id {
8✔
901
                let to_dir = match dir {
4✔
902
                    Dir::Left => "+",
×
903
                    Dir::Right => "-",
4✔
904
                };
905
                writeln!(
4✔
906
                    w,
4✔
907
                    "L\t{}\t-\t{}\t{}\t{}M",
4✔
908
                    node.node_id,
4✔
909
                    target,
4✔
910
                    to_dir,
4✔
911
                    K::k() - 1
4✔
912
                )?;
4✔
913
            }
4✔
914
        }
915

916
        for (_, target, dir, _) in node.r_edges() {
12✔
917
            if target > node.node_id {
8✔
918
                let to_dir = match dir {
4✔
919
                    Dir::Left => "+",
4✔
UNCOV
920
                    Dir::Right => "-",
×
921
                };
922
                writeln!(
4✔
923
                    w,
4✔
924
                    "L\t{}\t+\t{}\t{}\t{}M",
4✔
925
                    node.node_id,
4✔
926
                    target,
4✔
927
                    to_dir,
4✔
928
                    K::k() - 1
4✔
929
                )?;
4✔
930
            }
4✔
931
        }
932

933
        Ok(())
12✔
934
    }
12✔
935

936
    /// Write the graph to GFA format
937
    pub fn to_gfa<P: AsRef<Path>>(&self, gfa_out: P) -> Result<(), Error> {
×
938
        let wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creating gfa file"));
×
939
        self.write_gfa(&mut std::io::BufWriter::new(wtr))
×
940
    }
×
941

942
    pub fn write_gfa(&self, wtr: &mut impl Write) -> Result<(), Error> {
×
943
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
×
944

945
        type DummyFn<K, D> = fn(&Node<'_, K, D>) -> String;
946
        let dummy_opt: Option<&DummyFn<K, D>> = None;
×
947

×
948
        let pb = ProgressBar::new(self.len() as u64);
×
949
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
×
950
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
×
951

952
        for i in (0..self.len()).progress_with(pb) {
×
953
            let n = self.get_node(i);
×
954
            self.node_to_gfa(&n, wtr, dummy_opt)?;
×
955
        }
956

957
        wtr.flush().unwrap();
×
958

×
959
        Ok(())
×
960
    }
×
961

962
    /// Write the graph to GFA format
963
    pub fn to_gfa_with_tags<P: AsRef<Path>, F: Fn(&Node<'_, K, D>) -> String>(
1✔
964
        &self,
1✔
965
        gfa_out: P,
1✔
966
        tag_func: F,
1✔
967
    ) -> Result<(), Error> {
1✔
968
        let mut wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creatinf gfa file"));
1✔
969
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
1✔
970

971
        let pb = ProgressBar::new(self.len() as u64);
1✔
972
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
973
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
1✔
974

975
        for i in (0..self.len()).progress_with(pb) {
6✔
976
            let n = self.get_node(i);
6✔
977
            self.node_to_gfa(&n, &mut wtr, Some(&tag_func))?;
6✔
978
        }
979

980
        wtr.flush().unwrap();
1✔
981

1✔
982
        Ok(())
1✔
983
    }
1✔
984

985
    /// Write the graph to GFA format, with multithreading, 
986
    /// pass `tag_func=None` to write without tags
987
    pub fn to_gfa_otags_parallel<P: AsRef<Path> + Display + Sync, F: Fn(&Node<'_, K, D>) -> String + Sync>(
1✔
988
        &self,
1✔
989
        gfa_out: P,
1✔
990
        tag_func: Option<&F>,
1✔
991
    ) -> Result<(), Error> 
1✔
992
    where 
1✔
993
    K: Sync,
1✔
994
    D: Sync,
1✔
995
    {
1✔
996
        // split into ranges according to thread count
1✔
997
        let slices = current_num_threads();
1✔
998
        let n_nodes = self.len();
1✔
999
        let sz = n_nodes / slices + 1;
1✔
1000

1✔
1001
        debug!("n_nodes: {}", n_nodes);
1✔
1002
        debug!("sz: {}", sz);
1✔
1003

1004
        let mut parallel_ranges = Vec::with_capacity(slices);
1✔
1005
        let mut start = 0;
1✔
1006
        while start < n_nodes {
4✔
1007
            parallel_ranges.push(start..start + sz);
3✔
1008
            start += sz;
3✔
1009
        }
3✔
1010

1011
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
1✔
1012
        parallel_ranges.push(last_start..n_nodes);
1✔
1013
        debug!("parallel ranges: {:?}", parallel_ranges);
1✔
1014

1015
        let mut files = Vec::with_capacity(current_num_threads());
1✔
1016

1017
        for i in 0..parallel_ranges.len() {
3✔
1018
            files.push(format!("{}-{}.gfa", gfa_out, i));
3✔
1019
        } 
3✔
1020

1021
        let pb = ProgressBar::new(self.len() as u64);
1✔
1022
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
1023
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
1✔
1024
        
1✔
1025
        
1✔
1026
        parallel_ranges.into_par_iter().enumerate().for_each(|(i, range)| {
3✔
1027
            let mut wtr = BufWriter::with_capacity(BUF, File::create(&files[i]).expect("error creating parallel gfa file"));
3✔
1028

1029
            for i in range {
9✔
1030
                let n = self.get_node(i);
6✔
1031
                self.node_to_gfa(&n, &mut wtr, tag_func).unwrap();
6✔
1032
                pb.inc(1);
6✔
1033
            }
6✔
1034

1035
            wtr.flush().unwrap();
3✔
1036
        });
3✔
1037

1✔
1038
        pb.finish_and_clear();
1✔
1039

1✔
1040
        // combine files
1✔
1041
        let mut out_file = BufWriter::with_capacity(BUF, File::create(format!("{}.gfa", gfa_out)).expect("error creating combined gfa file"));
1✔
1042
        writeln!(out_file, "H\tVN:Z:debruijn-rs")?;
1✔
1043

1044
        let pb = ProgressBar::new(files.len() as u64);
1✔
1045
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
1✔
1046
        pb.set_message(format!("{:<32}", "combining files"));
1✔
1047

1048
        for file in files.iter() {
3✔
1049
            let open_file = File::open(file).expect("error opening parallel gfa file");
3✔
1050
            let mut reader = BufReader::new(open_file);
3✔
1051
            let mut buffer = [0; BUF];
3✔
1052

1053
            loop {
1054
                let linecount = reader.read(&mut buffer).unwrap();
6✔
1055
                if linecount == 0 { break }
6✔
1056
                out_file.write_all(&buffer[..linecount]).unwrap();
3✔
1057
            }
1058

1059
            remove_file(file).unwrap();
3✔
1060
        }
1061

1062
        out_file.flush().unwrap();
1✔
1063

1✔
1064
        Ok(())
1✔
1065
    }
1✔
1066

1067
    /// Write the graph to GFA format
1068
    pub fn to_gfa_partial<P: AsRef<Path>, F: Fn(&Node<'_, K, D>) -> String>(&self, gfa_out: P, tag_func: Option<&F>, nodes: Vec<usize>) -> Result<(), Error> {
×
1069
        let mut wtr = BufWriter::with_capacity(BUF, File::create(gfa_out).expect("error creating gfa file"));
×
1070
        writeln!(wtr, "H\tVN:Z:debruijn-rs")?;
×
1071

1072
        let pb = ProgressBar::new(self.len() as u64);
×
1073
        pb.set_style(ProgressStyle::with_template(PROGRESS_STYLE).unwrap().progress_chars("#/-"));
×
1074
        pb.set_message(format!("{:<32}", "writing graph to GFA file"));
×
1075

1076
        for i in nodes.into_iter().progress_with(pb) {
×
1077
            let n = self.get_node(i);
×
1078
            self.node_to_gfa(&n, &mut wtr, tag_func)?;
×
1079
        }
1080

1081
        wtr.flush().unwrap();
×
1082

×
1083
        Ok(())    
×
1084
    }
×
1085

1086
    pub fn to_json_rest<W: Write, F: Fn(&D) -> Value>(
×
1087
        &self,
×
1088
        fmt_func: F,
×
1089
        mut writer: &mut W,
×
1090
        rest: Option<Value>,
×
1091
    ) {
×
1092
        writeln!(writer, "{{\n\"nodes\": [").unwrap();
×
1093
        for i in 0..self.len() {
×
1094
            let node = self.get_node(i);
×
1095
            node.to_json(&fmt_func, writer);
×
1096
            if i == self.len() - 1 {
×
1097
                writeln!(writer).unwrap();
×
1098
            } else {
×
1099
                writeln!(writer, ",").unwrap();
×
1100
            }
×
1101
        }
1102
        writeln!(writer, "],").unwrap();
×
1103

×
1104
        writeln!(writer, "\"links\": [").unwrap();
×
1105
        for i in 0..self.len() {
×
1106
            let node = self.get_node(i);
×
1107
            match node.edges_to_json(writer) {
×
1108
                true => {
1109
                    if i == self.len() - 1 {
×
1110
                        writeln!(writer).unwrap();
×
1111
                    } else {
×
1112
                        writeln!(writer, ",").unwrap();
×
1113
                    }
×
1114
                }
1115
                _ => continue,
×
1116
            }
1117
        }
1118
        writeln!(writer, "]").unwrap();
×
1119

1120
        match rest {
×
1121
            Some(Value::Object(v)) => {
×
1122
                for (k, v) in v.iter() {
×
1123
                    writeln!(writer, ",").expect("io error");
×
1124
                    write!(writer, "\"{}\": ", k).expect("io error");
×
1125
                    serde_json::to_writer(&mut writer, v).expect("io error");
×
1126
                    writeln!(writer).expect("io error");
×
1127
                }
×
1128
            }
1129
            _ => {
×
1130
                writeln!(writer).expect("io error");
×
1131
            }
×
1132
        }
1133

1134
        writeln!(writer, "}}").expect("io error");
×
1135
    }
×
1136

1137
    /// Write the graph to JSON
1138
    pub fn to_json<W: Write, F: Fn(&D) -> Value, RF: Fn(&mut W)>(
×
1139
        &self,
×
1140
        fmt_func: F,
×
1141
        writer: &mut W,
×
1142
    ) {
×
1143
        self.to_json_rest(fmt_func, writer, None);
×
1144
    }
×
1145

1146
    /// Print a text representation of the graph.
1147
    pub fn print(&self) {
2✔
1148
        println!("DebruijnGraph {{ len: {}, K: {} }} :", self.len(), K::k());
2✔
1149
        for node in self.iter_nodes() {
8✔
1150
            println!("{:?}", node);
8✔
1151
        }
8✔
1152
    }
2✔
1153

1154
    pub fn print_with_data(&self) {
×
1155
        println!("DebruijnGraph {{ len: {}, K: {} }} :", self.len(), K::k());
×
1156
        for node in self.iter_nodes() {
×
1157
            println!("{:?} ({:?})", node, node.data());
×
1158
        }
×
1159
    }
×
1160

1161
    pub fn max_path_beam<F, F2>(&self, beam: usize, score: F, _solid_path: F2) -> Vec<(usize, Dir)>
×
1162
    where
×
1163
        F: Fn(&D) -> f32,
×
1164
        F2: Fn(&D) -> bool,
×
1165
    {
×
1166
        if self.is_empty() {
×
1167
            return Vec::default();
×
1168
        }
×
1169

×
1170
        let mut states = Vec::new();
×
1171

1172
        for i in 0..self.len() {
×
1173
            let node = self.get_node(i);
×
1174

×
1175
            // Initialize beam search on terminal nodes
×
1176
            if node.exts().num_exts_l() == 0 || node.exts().num_exts_r() == 0 {
×
1177
                let dir = if node.exts().num_exts_l() > 0 {
×
1178
                    Dir::Right
×
1179
                } else {
1180
                    Dir::Left
×
1181
                };
1182

1183
                let status = if node.exts().num_exts_l() == 0 && node.exts().num_exts_r() == 0 {
×
1184
                    Status::End
×
1185
                } else {
1186
                    Status::Active
×
1187
                };
1188

1189
                let mut path = SmallVec8::new();
×
1190
                path.push((i as u32, dir));
×
1191

×
1192
                let s = State {
×
1193
                    path,
×
1194
                    status,
×
1195
                    score: score(node.data()),
×
1196
                };
×
1197
                states.push(s);
×
1198
            }
×
1199
        }
1200

1201
        // No end nodes -- just start on the first node
1202
        if states.is_empty() {
×
1203
            // Make a start
×
1204
            let node = self.get_node(0);
×
1205
            let mut path = SmallVec8::new();
×
1206
            path.push((0, Dir::Left));
×
1207
            states.push(State {
×
1208
                path,
×
1209
                status: Status::Active,
×
1210
                score: score(node.data()),
×
1211
            });
×
1212
        }
×
1213

1214
        // Beam search until we can't find any more expansions
1215
        let mut active = true;
×
1216
        while active {
×
1217
            let mut new_states = Vec::with_capacity(states.len());
×
1218
            active = false;
×
1219

1220
            for s in states {
×
1221
                if s.status == Status::Active {
×
1222
                    active = true;
×
1223
                    let expanded = self.expand_state(&s, &score);
×
1224
                    new_states.extend(expanded);
×
1225
                } else {
×
1226
                    new_states.push(s)
×
1227
                }
1228
            }
1229

1230
            // workaround to sort by descending score - will panic if there are NaN scores
1231
            new_states.sort_by(|a, b| (-(a.score)).partial_cmp(&-(b.score)).unwrap());
×
1232
            new_states.truncate(beam);
×
1233
            states = new_states;
×
1234
        }
×
1235

1236
        for (i, state) in states.iter().take(5).enumerate() {
×
1237
            trace!("i:{}  -- {:?}", i, state);
×
1238
        }
1239

1240
        // convert back to using usize for node_id
1241
        states[0]
×
1242
            .path
×
1243
            .iter()
×
1244
            .map(|&(node, dir)| (node as usize, dir))
×
1245
            .collect()
×
1246
    }
×
1247

1248
    fn expand_state<F>(&self, state: &State, score: &F) -> SmallVec4<State>
×
1249
    where
×
1250
        F: Fn(&D) -> f32,
×
1251
    {
×
1252
        if state.status != Status::Active {
×
1253
            panic!("only attempt to expand active states")
×
1254
        }
×
1255

×
1256
        let (node_id, dir) = state.path[state.path.len() - 1];
×
1257
        let node = self.get_node(node_id as usize);
×
1258
        let mut new_states = SmallVec4::new();
×
1259

1260
        for (_, next_node_id, incoming_dir, _) in node.edges(dir.flip()) {
×
1261
            let next_node = self.get_node(next_node_id);
×
1262
            let new_score = state.score + score(next_node.data());
×
1263

×
1264
            let cycle = state
×
1265
                .path
×
1266
                .iter()
×
1267
                .any(|&(prev_node, _)| prev_node == (next_node_id as u32));
×
1268

1269
            let status = if cycle {
×
1270
                Status::Cycle
×
1271
            } else if next_node.edges(incoming_dir.flip()).is_empty() {
×
1272
                Status::End
×
1273
            } else {
1274
                Status::Active
×
1275
            };
1276

1277
            let mut new_path = state.path.clone();
×
1278
            new_path.push((next_node_id as u32, incoming_dir));
×
1279

×
1280
            let next_state = State {
×
1281
                path: new_path,
×
1282
                score: new_score,
×
1283
                status,
×
1284
            };
×
1285

×
1286
            new_states.push(next_state);
×
1287
        }
1288

1289
        new_states
×
1290
    }
×
1291

1292

1293
    pub fn iter_components(&self) -> IterComponents<K, D> {
4✔
1294
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
4✔
1295
        let pos = 0;
4✔
1296

1297
        for _i in 0..self.len() {
24✔
1298
            visited.push(false);
24✔
1299
        }
24✔
1300

1301
        IterComponents { 
4✔
1302
            graph: self, 
4✔
1303
            visited, 
4✔
1304
            pos }
4✔
1305
    }
4✔
1306

1307

1308
    /// iteratively returns 2D Vec with node_ids grouped according to the connected components they form
1309
    pub fn components_i(&self) -> Vec<Vec<usize>> {
1✔
1310
        let mut components: Vec<Vec<usize>> = Vec::with_capacity(self.len());
1✔
1311
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
1✔
1312

1313
        for _i in 0..self.len() {
6✔
1314
            visited.push(false);
6✔
1315
        }
6✔
1316

1317
        for i in 0..self.len() {
6✔
1318
            if !visited[i] {
6✔
1319
                let comp = self.component_i(&mut visited, i);
2✔
1320
                components.push(comp);
2✔
1321
            }
4✔
1322
        }
1323

1324
        components
1✔
1325
    }
1✔
1326

1327
    /// recursively detects which nodes form separate graph components
1328
    /// returns 2D vector with node ids per component
1329
    /// (may lead to stack overflow)
1330
    pub fn components_r(&self) -> Vec<Vec<usize>> {
2✔
1331
        let mut components: Vec<Vec<usize>> = Vec::with_capacity(self.len());
2✔
1332
        let mut visited: Vec<bool> = Vec::with_capacity(self.len());
2✔
1333

1334
        for _i in 0..self.len() {
8✔
1335
            visited.push(false);
8✔
1336
        }
8✔
1337

1338
        for i in 0..self.len() {
8✔
1339
            if !visited[i] {
8✔
1340
                let mut comp: Vec<usize> = Vec::new();
4✔
1341
                self.component_r(&mut visited, i, &mut comp);
4✔
1342
                components.push(comp);
4✔
1343
            }
4✔
1344
        }
1345

1346
        components
2✔
1347

2✔
1348
    }
2✔
1349

1350
    fn component_r<'a>(&'a self, visited: &'a mut Vec<bool>, i: usize, comp: &'a mut Vec<usize>) {
8✔
1351
        
8✔
1352
        visited[i] = true;
8✔
1353
        comp.push(i);
8✔
1354
        let mut edges = self.find_edges(i, Dir::Left);
8✔
1355
        let mut r_edges = self.find_edges(i, Dir::Right);
8✔
1356

8✔
1357
        edges.append(&mut r_edges);
8✔
1358

1359
        for (_, edge, _, _) in edges.iter() {
8✔
1360
            if !visited[*edge] {
8✔
1361
                self.component_r(visited, *edge, comp);
4✔
1362
            }
4✔
1363
        }
1364
    }
8✔
1365

1366
    fn component_i<'a>(&'a self, visited: &'a mut [bool], i: usize) -> Vec<usize> {
10✔
1367
        let mut edges: Vec<usize> = Vec::new();
10✔
1368
        let mut comp: Vec<usize> = Vec::new();
10✔
1369

10✔
1370
        edges.push(i);
10✔
1371

1372
        while let Some(current_edge) = edges.pop() {
40✔
1373
            if !visited[current_edge] { 
30✔
1374
                comp.push(current_edge);
30✔
1375
                visited[current_edge] = true;
30✔
1376

30✔
1377
                let mut l_edges = self.find_edges(current_edge, Dir::Left);
30✔
1378
                let mut r_edges = self.find_edges(current_edge, Dir::Right);
30✔
1379

30✔
1380
                l_edges.append(&mut r_edges);
30✔
1381

1382
                for (_, new_edge, _, _) in l_edges.into_iter() {
40✔
1383
                    if !visited[new_edge] {
40✔
1384
                        edges.push(new_edge);
20✔
1385
                    }
20✔
1386
                }
1387
            }
×
1388
        }
1389
        comp
10✔
1390
    }
10✔
1391

1392
    pub fn find_bad_nodes<F: Fn(&Node<'_, K, D>) -> bool>(&self, valid: F) -> Vec<usize> {
×
1393
        let mut bad_nodes = Vec::new();
×
1394

1395
        for (i, node) in enumerate(self.iter_nodes()) {
×
1396
            if !valid(&node) { bad_nodes.push(i); }
×
1397
        }
1398
        
1399
        bad_nodes
×
1400
    }
×
1401
}
1402

1403
impl<K: Kmer, SD: SummaryData<u8> + Debug> DebruijnGraph<K, SD> {
1404
    pub fn create_colors(&self, config: &SummaryConfig) -> Colors<SD> {
×
1405
        Colors::new(self, config)
×
1406
    }
×
1407
    
1408
    /// edge mults will contain hanging edges if the nodes were filtered
1409
    pub fn fix_edge_mults(&mut self) {
1✔
1410
        if self.get_node(0).data().edge_mults().is_some() {
1✔
1411
            for i in 0..self.len() {
×
1412
                self.base.data[i].fix_edge_mults(self.base.exts[i]);
×
1413
            }
×
1414
        }
1✔
1415
    }
1✔
1416
}
1417

1418
#[derive(Debug, Eq, PartialEq)]
1419
enum Status {
1420
    Active,
1421
    End,
1422
    Cycle,
1423
}
1424

1425
#[derive(Debug)]
1426
struct State {
1427
    path: SmallVec8<(u32, Dir)>,
1428
    score: f32,
1429
    status: Status,
1430
}
1431

1432
impl State {}
1433

1434
/// Iterator over nodes in a `DeBruijnGraph`
1435
pub struct NodeIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1436
    graph: &'a DebruijnGraph<K, D>,
1437
    node_id: usize,
1438
}
1439

1440
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeIter<'a, K, D> {
1441
    type Item = Node<'a, K, D>;
1442

1443
    fn next(&mut self) -> Option<Node<'a, K, D>> {
544,529✔
1444
        if self.node_id < self.graph.len() {
544,529✔
1445
            let node = self.graph.get_node(self.node_id);
544,400✔
1446
            self.node_id += 1;
544,400✔
1447
            Some(node)
544,400✔
1448
        } else {
1449
            None
129✔
1450
        }
1451
    }
544,529✔
1452
}
1453

1454
impl<'a, K: Kmer + 'a, D: Debug + 'a> IntoIterator for &'a DebruijnGraph<K, D> {
1455
    type Item = NodeKmer<'a, K, D>;
1456
    type IntoIter = NodeIntoIter<'a, K, D>;
1457

1458
    fn into_iter(self) -> Self::IntoIter {
2,214✔
1459
        NodeIntoIter {
2,214✔
1460
            graph: self,
2,214✔
1461
            node_id: 0,
2,214✔
1462
        }
2,214✔
1463
    }
2,214✔
1464
}
1465

1466
/// Iterator over nodes in a `DeBruijnGraph`
1467
pub struct NodeIntoIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1468
    graph: &'a DebruijnGraph<K, D>,
1469
    node_id: usize,
1470
}
1471

1472
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeIntoIter<'a, K, D> {
1473
    type Item = NodeKmer<'a, K, D>;
1474

1475
    fn next(&mut self) -> Option<Self::Item> {
302,700✔
1476
        if self.node_id < self.graph.len() {
302,700✔
1477
            let node_id = self.node_id;
302,700✔
1478
            let node = self.graph.get_node(node_id);
302,700✔
1479
            let node_seq = node.sequence();
302,700✔
1480

302,700✔
1481
            self.node_id += 1;
302,700✔
1482
            Some(NodeKmer {
302,700✔
1483
                node_id,
302,700✔
1484
                node_seq_slice: node_seq,
302,700✔
1485
                phantom_d: PhantomData,
302,700✔
1486
                phantom_k: PhantomData,
302,700✔
1487
            })
302,700✔
1488
        } else {
1489
            None
×
1490
        }
1491
    }
302,700✔
1492
}
1493

1494
/// A `DebruijnGraph` node with a reference to the sequence of the node.
1495
#[derive(Clone)]
1496
pub struct NodeKmer<'a, K: Kmer + 'a, D: Debug + 'a> {
1497
    pub node_id: usize,
1498
    node_seq_slice: DnaStringSlice<'a>,
1499
    phantom_k: PhantomData<K>,
1500
    phantom_d: PhantomData<D>,
1501
}
1502

1503
/// An iterator over the kmers in a `DeBruijn graph node`
1504
pub struct NodeKmerIter<'a, K: Kmer + 'a, D: Debug + 'a> {
1505
    kmer_id: usize,
1506
    kmer: K,
1507
    num_kmers: usize,
1508
    node_seq_slice: DnaStringSlice<'a>,
1509
    phantom_k: PhantomData<K>,
1510
    phantom_d: PhantomData<D>,
1511
}
1512

1513
impl<'a, K: Kmer + 'a, D: Debug + 'a> IntoIterator for NodeKmer<'a, K, D> {
1514
    type Item = K;
1515
    type IntoIter = NodeKmerIter<'a, K, D>;
1516

1517
    fn into_iter(self) -> Self::IntoIter {
605,400✔
1518
        let num_kmers = self.node_seq_slice.len() - K::k() + 1;
605,400✔
1519

1520
        let kmer = if num_kmers > 0 {
605,400✔
1521
            self.node_seq_slice.get_kmer::<K>(0)
605,400✔
1522
        } else {
1523
            K::empty()
×
1524
        };
1525

1526
        NodeKmerIter {
605,400✔
1527
            kmer_id: 0,
605,400✔
1528
            kmer,
605,400✔
1529
            num_kmers,
605,400✔
1530
            node_seq_slice: self.node_seq_slice,
605,400✔
1531
            phantom_k: PhantomData,
605,400✔
1532
            phantom_d: PhantomData,
605,400✔
1533
        }
605,400✔
1534
    }
605,400✔
1535
}
1536

1537
impl<'a, K: Kmer + 'a, D: Debug + 'a> Iterator for NodeKmerIter<'a, K, D> {
1538
    type Item = K;
1539

1540
    fn next(&mut self) -> Option<Self::Item> {
3,686,536✔
1541
        if self.num_kmers == self.kmer_id {
3,686,536✔
1542
            None
×
1543
        } else {
1544
            let current_kmer = self.kmer;
3,686,536✔
1545

3,686,536✔
1546
            self.kmer_id += 1;
3,686,536✔
1547
            if self.kmer_id < self.num_kmers {
3,686,536✔
1548
                let next_base = self.node_seq_slice.get(self.kmer_id + K::k() - 1);
3,608,402✔
1549
                let new_kmer = self.kmer.extend_right(next_base);
3,608,402✔
1550
                self.kmer = new_kmer;
3,608,402✔
1551
            }
3,608,402✔
1552

1553
            Some(current_kmer)
3,686,536✔
1554
        }
1555
    }
3,686,536✔
1556

1557
    fn size_hint(&self) -> (usize, Option<usize>) {
302,700✔
1558
        (self.num_kmers, Some(self.num_kmers))
302,700✔
1559
    }
302,700✔
1560

1561
    /// Provide a 'fast-forward' capability for this iterator
1562
    /// MPHF will use this to reduce the number of kmers that
1563
    /// need to be produced.
1564
    fn nth(&mut self, n: usize) -> Option<Self::Item> {
2,655,368✔
1565
        if n <= 4 {
2,655,368✔
1566
            // for small skips forward, shift one base at a time
1567
            for _ in 0..n {
2,390,370✔
1568
                self.next();
1,031,168✔
1569
            }
1,031,168✔
1570
        } else {
264,998✔
1571
            self.kmer_id += n;
264,998✔
1572
            self.kmer = self.node_seq_slice.get_kmer::<K>(self.kmer_id);
264,998✔
1573
        }
264,998✔
1574

1575
        self.next()
2,655,368✔
1576
    }
2,655,368✔
1577
}
1578

1579
/// Marker signifying that NodeKmerIter has a known size.
1580
impl<'a, K: Kmer + 'a, D: Debug + 'a> ExactSizeIterator for NodeKmerIter<'a, K, D> {}
1581

1582
/// Unbranched sequence in the DeBruijn graph
1583
pub struct Node<'a, K: Kmer + 'a, D: 'a> {
1584
    pub node_id: usize,
1585
    pub graph: &'a DebruijnGraph<K, D>,
1586
}
1587

1588
impl<'a, K: Kmer, D: Debug> Node<'a, K, D> {
1589
    /// Length of the sequence of this node
1590
    pub fn len(&self) -> usize {
1,653,738✔
1591
        self.graph.base.sequences.get(self.node_id).len()
1,653,738✔
1592
    }
1,653,738✔
1593

1594
    pub fn is_empty(&self) -> bool {
×
1595
        self.graph.base.sequences.get(self.node_id).is_empty()
×
1596
    }
×
1597

1598
    /// Sequence of the node
1599
    pub fn sequence(&self) -> DnaStringSlice<'a> {
3,467,079✔
1600
        self.graph.base.sequences.get(self.node_id)
3,467,079✔
1601
    }
3,467,079✔
1602

1603
    /// Reference to auxiliarly data associated with the node
1604
    pub fn data(&self) -> &'a D {
3,809,115✔
1605
        &self.graph.base.data[self.node_id]
3,809,115✔
1606
    }
3,809,115✔
1607

1608
    /// Extension bases from this node
1609
    pub fn exts(&self) -> Exts {
2,442,417✔
1610
        self.graph.base.exts[self.node_id]
2,442,417✔
1611
    }
2,442,417✔
1612

1613
    /// Edges leaving the left side of the node in the format
1614
    /// (base, target_node id, incoming side of target node, whether target node is flipped)
1615
    pub fn l_edges(&self) -> SmallVec4<(u8, usize, Dir, bool)> {
130,660✔
1616
        self.graph.find_edges(self.node_id, Dir::Left)
130,660✔
1617
    }
130,660✔
1618

1619
    /// Edges leaving the right side of the node in the format
1620
    /// (base, target_node id, incoming side of target node, whether target node is flipped)
1621
    pub fn r_edges(&self) -> SmallVec4<(u8, usize, Dir, bool)> {
130,660✔
1622
        self.graph.find_edges(self.node_id, Dir::Right)
130,660✔
1623
    }
130,660✔
1624

1625
    /// Edges leaving the 'dir' side of the node in the format
1626
    /// (base, target_node id, incoming side of target node, whether target node is flipped)
1627
    pub fn edges(&self, dir: Dir) -> SmallVec4<(u8, usize, Dir, bool)> {
528,252✔
1628
        self.graph.find_edges(self.node_id, dir)
528,252✔
1629
    }
528,252✔
1630

1631
    fn to_json<F: Fn(&D) -> Value>(&self, func: &F, f: &mut dyn Write) {
×
1632
        write!(
×
1633
            f,
×
1634
            "{{\"id\":\"{}\",\"L\":{},\"D\":{},\"Se\":\"{:?}\"}}",
×
1635
            self.node_id,
×
1636
            self.sequence().len(),
×
1637
            (func)(self.data()),
×
1638
            self.sequence(),
×
1639
        )
×
1640
        .unwrap();
×
1641
    }
×
1642

1643
    fn edges_to_json(&self, f: &mut dyn Write) -> bool {
×
1644
        let mut wrote = false;
×
1645
        let edges = self.r_edges();
×
1646
        for (idx, &(_, id, incoming_dir, _)) in edges.iter().enumerate() {
×
1647
            write!(
×
1648
                f,
×
1649
                "{{\"source\":\"{}\",\"target\":\"{}\",\"D\":\"{}\"}}",
×
1650
                self.node_id,
×
1651
                id,
×
1652
                match incoming_dir {
×
1653
                    Dir::Left => "L",
×
1654
                    Dir::Right => "R",
×
1655
                }
1656
            )
1657
            .unwrap();
×
1658

×
1659
            if idx < edges.len() - 1 {
×
1660
                write!(f, ",").unwrap();
×
1661
            }
×
1662

1663
            wrote = true;
×
1664
        }
1665
        wrote
×
1666
    }
×
1667
}
1668

1669
// TODO make generic instead of u8 (u8 is sufficient for dbg)
1670
impl<K: Kmer, SD: SummaryData<u8> + Debug> Node<'_, K, SD>  {
1671
    /// get default format for dot edges based on node data
1672
    pub fn edge_dot_default(&self, colors: &Colors<SD>, base: u8, incoming_dir: Dir, flipped: bool) -> String {
129,845✔
1673
        // set color based on dir
1674
        let color = match incoming_dir {
129,845✔
1675
            Dir::Left => "blue",
65,064✔
1676
            Dir::Right => "red"
64,781✔
1677
        };
1678
        
1679
        if let Some(em) = self.data().edge_mults() {
129,845✔
1680
            
1681
            let dir = if flipped { 
129,845✔
1682
                incoming_dir 
51,540✔
1683
            } else {
1684
                incoming_dir.flip()
78,305✔
1685
            };
1686

1687
            // set penwidth based on count
1688
            let count = em.edge_mult(base, dir);
129,845✔
1689
            let penwidth = colors.edge_width(count);
129,845✔
1690

129,845✔
1691
            format!("[color={color}, penwidth={penwidth}, label=\"{}: {count}\"]", bits_to_base(base))
129,845✔
1692
        } else {
1693
            format!("[color={color}]")
×
1694
        }
1695
    }
129,845✔
1696

1697
    /// get default format for dot nodes, based on node data
1698
    pub fn node_dot_default(&self, colors: &Colors<SD>, config: &SummaryConfig, tag_translator: &bimap::BiHashMap<String, u8> , outline: bool) -> String {
65,325✔
1699
        // set color based on labels/fold change/p-value
65,325✔
1700
        let color = colors.node_color(self.data(), config, outline);
65,325✔
1701

65,325✔
1702
        let data_info = self.data().print(tag_translator, config);
65,325✔
1703
        const MIN_TEXT_WIDTH: usize = 40;
1704
        let wrap = if self.len() > MIN_TEXT_WIDTH { self.len() } else { MIN_TEXT_WIDTH };
65,325✔
1705

1706
        let label = textwrap::fill(&format!("id: {}, len: {}, exts: {:?}, seq: {}\n{}", 
65,325✔
1707
            self.node_id,
65,325✔
1708
            self.len(),
65,325✔
1709
            self.exts(),
65,325✔
1710
            self.sequence(),
65,325✔
1711
            data_info
65,325✔
1712
        ), wrap);
65,325✔
1713

65,325✔
1714
        format!("[style=filled, {color}, label=\"{label}\"]")
65,325✔
1715
    }
65,325✔
1716
}
1717

1718
impl<K: Kmer, D> fmt::Debug for Node<'_, K, D>
1719
where
1720
    D: Debug,
1721
{
1722
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
8✔
1723
        write!(
8✔
1724
            f,
8✔
1725
            "Node {{ id:{}, Exts: {:?}, L:{:?} R:{:?}, Seq: {:?}, Data: {:?} }}",
8✔
1726
            self.node_id,
8✔
1727
            self.exts(),
8✔
1728
            self.l_edges(),
8✔
1729
            self.r_edges(),
8✔
1730
            self.sequence().len(),
8✔
1731
            self.data()
8✔
1732
        )
8✔
1733
    }
8✔
1734
}
1735

1736
pub struct IterComponents<'a, K: Kmer, D> {
1737
    graph: &'a DebruijnGraph<K, D>,
1738
    visited: Vec<bool>,
1739
    pos: usize,
1740
}
1741

1742
impl<K: Kmer, D: Debug> Iterator for IterComponents<'_, K, D> {
1743
    type Item = Vec<usize>;
1744
    fn next(&mut self) -> Option<Self::Item> {
12✔
1745
        while self.pos < self.graph.len() {
28✔
1746
            if !self.visited[self.pos] {
24✔
1747
                let comp = self.graph.component_i(&mut self.visited, self.pos);
8✔
1748
                self.pos += 1;
8✔
1749
                return Some(comp)
8✔
1750
            } else {
16✔
1751
                self.pos += 1;
16✔
1752
            }
16✔
1753
        }
1754
        assert!(self.visited.iter().map(|x| *x as usize).sum::<usize>() == self.graph.len());
24✔
1755
        None
4✔
1756
    }
12✔
1757
    
1758
}
1759

1760
pub struct PathCompIter<'a, K: Kmer, D: Debug, F, F2> 
1761
where 
1762
F: Fn(&D) -> f32,
1763
F2: Fn(&D) -> bool
1764
{
1765
    graph: &'a DebruijnGraph<K, D>,
1766
    component_iterator: IterComponents<'a, K, D>,
1767
    graph_pos: usize,
1768
    score: F,
1769
    solid_path: F2,
1770
}
1771

1772
/// returns the component and the "best" path in the component
1773
impl<K: Kmer, D: Debug, F, F2> Iterator for PathCompIter<'_, K, D, F, F2> 
1774
where 
1775
F: Fn(&D) -> f32,
1776
F2: Fn(&D) -> bool
1777
{
1778
    type Item = (Vec<usize>, VecDeque<(usize, Dir)>,);
1779
    fn next(&mut self) -> Option<Self::Item> {
6✔
1780
        match self.component_iterator.next() {
6✔
1781
            Some(component) => {
4✔
1782
                let current_comp = component;
4✔
1783
                
4✔
1784
    
4✔
1785
                let mut best_node = current_comp[0];
4✔
1786
                let mut best_score = f32::MIN;
4✔
1787
                for c in current_comp.iter() {
12✔
1788
                    let node = self.graph.get_node(*c);
12✔
1789
                    let node_score = (self.score)(node.data());
12✔
1790
    
12✔
1791
                    if node_score > best_score {
12✔
1792
                        best_node = *c;
4✔
1793
                        best_score = node_score;
4✔
1794
                    }
8✔
1795
                }
1796
    
1797
                let oscore = |state| match state {
16✔
1798
                    None => 0.0,
4✔
1799
                    Some((id, _)) => (self.score)(self.graph.get_node(id).data()),
12✔
1800
                };
16✔
1801
    
1802
                let osolid_path = |state| match state {
4✔
1803
                    None => false,
×
1804
                    Some((id, _)) => (self.solid_path)(self.graph.get_node(id).data()),
4✔
1805
                };
4✔
1806
    
1807
                // Now expand in each direction, greedily taking the best path. Stop if we hit a node we've
1808
                // already put into the path
1809
                let mut used_nodes = HashSet::new();
4✔
1810
                let mut path = VecDeque::new();
4✔
1811
    
4✔
1812
                // Start w/ initial state
4✔
1813
                used_nodes.insert(best_node);
4✔
1814
                path.push_front((best_node, Dir::Left));
4✔
1815
    
1816
                for init in [(best_node, Dir::Left, false), (best_node, Dir::Right, true)].iter() {
8✔
1817
                    let &(start_node, dir, do_flip) = init;
8✔
1818
                    let mut current = (start_node, dir);
8✔
1819
    
1820
                    loop {
1821
                        let mut next = None;
12✔
1822
                        let (cur_id, incoming_dir) = current;
12✔
1823
                        let node = self.graph.get_node(cur_id);
12✔
1824
                        let edges = node.edges(incoming_dir.flip());
12✔
1825
    
12✔
1826
                        let mut solid_paths = 0;
12✔
1827
                        for (_, id, dir, _) in edges {
16✔
1828
                            let cand = Some((id, dir));
4✔
1829
                            if osolid_path(cand) {
4✔
1830
                                solid_paths += 1;
4✔
1831

4✔
1832
                                // second if clause is outside of first in original code (see max_path) 
4✔
1833
                                // but would basically ignore path validity.
4✔
1834
                                if oscore(cand) > oscore(next) {
4✔
1835
                                    next = cand;
4✔
1836
                                }
4✔
1837
                            }
×
1838
    
1839
                            if oscore(cand) > oscore(next) {
4✔
1840
                                next = cand;
×
1841
                            }
4✔
1842
                        }
1843
    
1844
                        if solid_paths > 1 {
12✔
UNCOV
1845
                            break;
×
1846
                        }
12✔
1847
    
1848
                        match next {
4✔
1849
                            Some((next_id, next_incoming)) if !used_nodes.contains(&next_id) => {
4✔
1850
                                if do_flip {
4✔
1851
                                    path.push_front((next_id, next_incoming.flip()));
2✔
1852
                                } else {
2✔
1853
                                    path.push_back((next_id, next_incoming));
2✔
1854
                                }
2✔
1855
    
1856
                                used_nodes.insert(next_id);
4✔
1857
                                current = (next_id, next_incoming);
4✔
1858
                            }
1859
                            _ => break,
8✔
1860
                        }
1861
                    }
1862
                }
1863
                
1864
                
1865
                Some((current_comp, path))
4✔
1866
            }, 
1867
            None => {
1868
                // should technically not need graph_pos after this 
1869
                self.graph_pos += 1;
2✔
1870
                None
2✔
1871
            }
1872
        }
1873
    }
6✔
1874
}
1875

1876
#[cfg(test)]
1877
mod test {
1878
    use std::{fs::File, io::BufReader};
1879

1880
    use crate::{kmer::Kmer16, summarizer::TagsCountsSumData};
1881

1882
    use super::DebruijnGraph;
1883

1884
    #[test]
1885
    #[cfg(not(feature = "sample128"))]
1886
    fn test_components() {
1887
        use crate::{summarizer::SummaryData, Dir, BUF};
1888

1889
        let path = "test_data/400.graph.dbg";
1890
        let file = BufReader::with_capacity(BUF, File::open(path).unwrap());
1891

1892
        let (graph, _, _): (DebruijnGraph<Kmer16, TagsCountsSumData>, Vec<String>, crate::summarizer::SummaryConfig) = 
1893
            bincode::deserialize_from(file).expect("error deserializing graph");
1894

1895
        let components = graph.iter_components();
1896

1897
        let check_components = [
1898
            vec![3, 67, 130, 133, 59, 119, 97, 110, 68, 137, 29, 84, 131, 43, 30, 91, 14, 70, 79, 142, 136, 105, 103, 62, 
1899
                141, 104, 134, 88, 38, 81, 108, 92, 135, 96, 116, 121, 63, 124, 106, 129, 132, 126, 93, 109, 83, 112, 118, 
1900
                123, 125, 78, 122, 115, 75, 128, 140, 111, 26, 143, 113],
1901
            vec![41, 138, 100, 139, 86],
1902
            vec![53, 117, 127],
1903
            vec![69, 144, 77, 120, 114, 107, 101],
1904
        ];
1905

1906
        let mut counter = 0;
1907

1908
        for component in components {
1909
            if component.len() > 1 { 
1910
                println!("component: {:?}", component);
1911
                assert_eq!(component, check_components[counter]);
1912
                counter += 1;
1913
            }
1914
        }
1915
        assert_eq!(vec![(139, Dir::Left)], graph.max_path(|data| data.score(), |_| true));
1916
    }
1917
}
1918

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc