• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / rust-debruijn / 14333782752

08 Apr 2025 12:41PM UTC coverage: 80.356% (-2.3%) from 82.636%
14333782752

Pull #11

github

web-flow
Merge c1b9b83ef into 2f646dc00
Pull Request #11: Modifyable DOT edges and default formatting methods for nodes and edges

35 of 273 new or added lines in 5 files covered. (12.82%)

13 existing lines in 2 files now uncovered.

6091 of 7580 relevant lines covered (80.36%)

3917548.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.18
/src/compression.rs
1
// Copyright 2017 10x Genomics
2

3
//! Create compressed DeBruijn graphs from uncompressed DeBruijn graphs, or a collection of disjoint DeBruijn graphs.
4
use bit_set::BitSet;
5
use indicatif::{ProgressBar, ProgressIterator, ProgressStyle};
6
use log::debug;
7
use rayon::current_num_threads;
8
use rayon::iter::{IntoParallelIterator, ParallelIterator};
9
use std::collections::{HashMap, VecDeque};
10
use std::fmt::Debug;
11
use std::marker::PhantomData;
12
use std::mem;
13
use std::sync::{Arc, Mutex};
14
use std::time::Instant;
15

16
use crate::dna_string::{DnaString, PackedDnaStringSet};
17
use crate::graph::{BaseGraph, DebruijnGraph};
18
use crate::Dir;
19
use crate::Exts;
20
use crate::Kmer;
21
use crate::Vmer;
22
use boomphf::hashmap::BoomHashMap2;
23

24
#[derive(Copy, Clone)]
25
enum ExtMode<K: Kmer> {
26
    Unique(K, Dir, Exts),
27
    Terminal(Exts),
28
}
29

30
#[derive(Copy, Clone)]
31
enum ExtModeNode {
32
    Unique(usize, Dir, Exts),
33
    Terminal(Exts),
34
}
35

36
/// Customize the path-compression process. Implementing this trait lets the user
37
/// control how the per-kmer data (of type `D`) is summarized into a per-path
38
/// summary data (of type `DS`). It also let's the user introduce new breaks into
39
/// paths by inspecting in the per-kmer data of a proposed with `join_test_kmer`
40
/// function.
41
pub trait CompressionSpec<D> {
42
    /// combine the data of two nodes
43
    fn reduce(&self, path_object: D, kmer_object: &D) -> D;
44
    /// check if the data of two nodes can be combined
45
    fn join_test(&self, d1: &D, d2: &D) -> bool;
46
}
47

48
/// Simple implementation of `CompressionSpec` that lets you provide that data reduction function as a closure
49
pub struct SimpleCompress<D, F> {
50
    func: F,
51
    d: PhantomData<D>,
52
}
53

54
impl<D, F> SimpleCompress<D, F> {
55
    pub fn new(func: F) -> SimpleCompress<D, F> {
1,785✔
56
        SimpleCompress {
1,785✔
57
            func,
1,785✔
58
            d: PhantomData,
1,785✔
59
        }
1,785✔
60
    }
1,785✔
61
}
62

63
impl<D, F> CompressionSpec<D> for SimpleCompress<D, F>
64
where
65
    for<'r> F: Fn(D, &'r D) -> D,
66
{
67
    fn reduce(&self, d: D, other: &D) -> D {
1,532,163✔
68
        (self.func)(d, other)
1,532,163✔
69
    }
1,532,163✔
70

71
    fn join_test(&self, _: &D, _: &D) -> bool {
1,575,663✔
72
        true
1,575,663✔
73
    }
1,575,663✔
74
}
75

76
/// Extending trait CompressionSpec for compression
77
pub struct ScmapCompress<D> {
78
    d: PhantomData<D>,
79
}
80

81
impl<D> ScmapCompress<D> {
82
    pub fn new() -> ScmapCompress<D> {
1✔
83
        ScmapCompress { d: PhantomData }
1✔
84
    }
1✔
85
}
86

87
impl<D> Default for ScmapCompress<D> {
88
    fn default() -> Self {
×
89
        Self::new()
×
90
    }
×
91
}
92

93
impl<D: PartialEq> CompressionSpec<D> for ScmapCompress<D>
94
where
95
    D: Debug,
96
{
97
    fn reduce(&self, d: D, other: &D) -> D {
5,838✔
98
        if d != *other {
5,838✔
99
            panic!("{:?} != {:?}, Should not happen", d, *other);
×
100
        }
5,838✔
101
        d
5,838✔
102
    }
5,838✔
103

104
    fn join_test(&self, d1: &D, d2: &D) -> bool {
5,952✔
105
        d1 == d2
5,952✔
106
    }
5,952✔
107
}
108

109
/// CompressionSpec with check and function
110
pub struct CheckCompress<D, F1, F2> {
111
    reduce_func: F1,
112
    join_func: F2,
113
    d: PhantomData<D>
114
} 
115

116
impl<D, F1, F2> CheckCompress<D, F1, F2> {
117
    pub fn new(reduce_func: F1, join_func: F2) -> Self {
×
118
        CheckCompress {
×
119
            reduce_func,
×
120
            join_func,
×
121
            d: PhantomData,
×
122
        }
×
123
    }
×
124
}
125

126
impl<D, F1, F2> CompressionSpec<D> for CheckCompress<D, F1, F2>
127
where
128
    for<'r> F1: Fn(D, &'r D) -> D,
129
    for<'r> F2: Fn(&'r D, &'r D) -> bool
130
{
131
    fn reduce(&self, d: D, other: &D) -> D {
×
132
        (self.reduce_func)(d, other)
×
133
    }
×
134

135
    fn join_test(&self, d: &D, other: &D) -> bool {
×
136
        (self.join_func)(d, other)
×
137
    }
×
138
}
139

140

141
struct CompressFromGraph<'a, 'b, K: 'a + Kmer, D: 'a + PartialEq, S: CompressionSpec<D>> {
142
    stranded: bool,
143
    d: PhantomData<D>,
144
    spec: &'b S,
145
    available_nodes: BitSet,
146
    graph: &'a DebruijnGraph<K, D>,
147
}
148

149
impl<K, D, S> CompressFromGraph<'_, '_, K, D, S>
150
where
151
    K: Kmer + Send + Sync,
152
    D: Debug + Clone + PartialEq,
153
    S: CompressionSpec<D>,
154
{
155
    #[inline(never)]
156
    fn try_extend_node(&mut self, node: usize, dir: Dir) -> ExtModeNode {
770,874✔
157
        let node = self.graph.get_node(node);
770,874✔
158
        let bases = node.sequence();
770,874✔
159
        let exts = node.exts();
770,874✔
160

770,874✔
161
        if exts.num_ext_dir(dir) != 1
770,874✔
162
            || (!self.stranded && node.len() == K::k() && bases.get_kmer::<K>(0).is_palindrome())
757,685✔
163
        {
164
            ExtModeNode::Terminal(exts.single_dir(dir))
13,193✔
165
        } else {
166
            // Get the next kmer
167
            let ext_base = exts.get_unique_extension(dir).expect("should be unique");
757,681✔
168
            let end_kmer: K = bases.term_kmer(dir);
757,681✔
169

757,681✔
170
            let next_kmer = end_kmer.extend(ext_base, dir);
757,681✔
171
            let (next_node_id, next_side_incoming, rc) = match self.graph.find_link(next_kmer, dir)
757,681✔
172
            {
173
                Some(e) => e,
757,681✔
174
                None => {
175
                    println!("dir: {:?}, Lmer: {:?}, exts: {:?}", dir, bases, exts);
×
176
                    println!("end kmer: {:?}", end_kmer);
×
177
                    println!("No kmer: {:?}", next_kmer);
×
178
                    println!("rc: {:?}", next_kmer.min_rc());
×
179
                    panic!("No kmer: {:?}", next_kmer)
×
180
                }
181
            };
182

183
            let next_node = self.graph.get_node(next_node_id);
757,681✔
184
            let next_exts = next_node.exts();
757,681✔
185

186
            let consistent = (next_node.len() == K::k())
757,681✔
187
                || match (dir, next_side_incoming, rc) {
3,643✔
188
                    (Dir::Left, Dir::Right, false) => true,
978✔
189
                    (Dir::Left, Dir::Left, true) => true,
886✔
190
                    (Dir::Right, Dir::Left, false) => true,
897✔
191
                    (Dir::Right, Dir::Right, true) => true,
882✔
192
                    _ => {
193
                        println!("dir: {:?}, Lmer: {:?}, exts: {:?}", dir, bases, exts);
×
194
                        println!("end kmer: {:?}", end_kmer);
×
195
                        println!("next kmer: {:?}", next_kmer);
×
196
                        println!("rc: {:?}", next_kmer.min_rc());
×
197
                        println!(
×
198
                            "next bases: {:?}, next_side_incoming: {:?}, rc: {:?}",
×
199
                            next_node.sequence(),
×
200
                            next_side_incoming,
×
201
                            rc
×
202
                        );
×
203
                        false
×
204
                    }
205
                };
206
            assert!(consistent);
757,681✔
207

208
            // We can include this kmer in the line if:
209
            // a) it exists in the partition, and is still unused
210
            // b) the kmer we go to has a unique extension back in our direction
211
            // c) the new edge is not of length K and a palindrome
212
            // d) the color of the current and next node is same
213

214
            if !self.available_nodes.contains(next_node_id)
757,681✔
215
                || (!self.stranded && next_kmer.is_palindrome())
746,520✔
216
                || !self.spec.join_test(node.data(), next_node.data())
746,519✔
217
            {
218
                // Next kmer isn't in this partition,
219
                // or we've already used it,
220
                // or it's palindrom and we are not stranded
221
                // or the colors were not same
222
                return ExtModeNode::Terminal(exts.single_dir(dir));
11,162✔
223
            }
746,519✔
224

746,519✔
225
            // orientation of next edge
746,519✔
226
            let next_side_outgoing = next_side_incoming.flip();
746,519✔
227

746,519✔
228
            let incoming_count = next_exts.num_ext_dir(next_side_incoming);
746,519✔
229
            let outgoing_exts = next_exts.single_dir(next_side_outgoing);
746,519✔
230

746,519✔
231
            if incoming_count == 0 {
746,519✔
232
                println!("dir: {:?}, Lmer: {:?}, exts: {:?}", dir, bases, exts);
×
233
                println!("end kmer: {:?}", end_kmer);
×
234
                println!("next_node: {:?}", next_node);
×
235
                println!("next_node data: {:?}", next_node.sequence());
×
236
                panic!("unreachable");
×
237
            } else if incoming_count == 1 {
746,519✔
238
                // We have a unique path to next_kmer -- include it
239
                ExtModeNode::Unique(next_node_id, next_side_outgoing, outgoing_exts)
725,096✔
240
            } else {
241
                // there's more than one path
242
                // into the target kmer - don't include it
243
                ExtModeNode::Terminal(exts.single_dir(dir))
21,423✔
244
            }
245
        }
246
    }
770,874✔
247

248
    /// Generate complete unbranched edges
249
    fn extend_node(&mut self, start_node: usize, start_dir: Dir) -> (Vec<(usize, Dir)>, Exts) {
45,778✔
250
        let mut current_dir = start_dir;
45,778✔
251
        let mut current_node = start_node;
45,778✔
252
        let mut path = Vec::new();
45,778✔
253
        let final_exts: Exts; // must get set below
45,778✔
254

45,778✔
255
        self.available_nodes.remove(start_node);
45,778✔
256

257
        loop {
258
            let ext_result = self.try_extend_node(current_node, current_dir);
770,874✔
259

770,874✔
260
            match ext_result {
770,874✔
261
                ExtModeNode::Unique(next_node, next_dir_outgoing, _) => {
725,096✔
262
                    let next_dir_incoming = next_dir_outgoing.flip();
725,096✔
263
                    path.push((next_node, next_dir_incoming));
725,096✔
264
                    self.available_nodes.remove(next_node);
725,096✔
265
                    current_node = next_node;
725,096✔
266
                    current_dir = next_dir_outgoing;
725,096✔
267
                }
725,096✔
268
                ExtModeNode::Terminal(ext) => {
45,778✔
269
                    final_exts = ext;
45,778✔
270
                    break;
45,778✔
271
                }
45,778✔
272
            }
45,778✔
273
        }
45,778✔
274

45,778✔
275
        (path, final_exts)
45,778✔
276
    }
45,778✔
277

278
    // Determine the sequence and extensions of the maximal unbranched
279
    // edge, centered around the given edge number
280
    #[inline(never)]
281
    fn build_node(&mut self, seed_node: usize) -> (DnaString, Exts, VecDeque<(usize, Dir)>, D) {
22,889✔
282
        let (l_path, l_ext) = self.extend_node(seed_node, Dir::Left);
22,889✔
283
        let (r_path, r_ext) = self.extend_node(seed_node, Dir::Right);
22,889✔
284

22,889✔
285
        // Stick together edge chunks to get full edge sequence
22,889✔
286
        let mut node_path = VecDeque::new();
22,889✔
287

22,889✔
288
        let mut node_data: D = self.graph.get_node(seed_node).data().clone();
22,889✔
289
        node_path.push_back((seed_node, Dir::Left));
22,889✔
290

291
        // Add on the left path
292
        for &(next_node, incoming_dir) in l_path.iter() {
362,283✔
293
            node_path.push_front((next_node, incoming_dir.flip()));
362,283✔
294
            node_data = self
362,283✔
295
                .spec
362,283✔
296
                .reduce(node_data, self.graph.get_node(next_node).data());
362,283✔
297
        }
362,283✔
298

299
        // Add on the right path
300
        for &(next_node, incoming_dir) in r_path.iter() {
362,813✔
301
            node_path.push_back((next_node, incoming_dir));
362,813✔
302
            node_data = self
362,813✔
303
                .spec
362,813✔
304
                .reduce(node_data, self.graph.get_node(next_node).data());
362,813✔
305
        }
362,813✔
306

307
        let left_extend = match l_path.last() {
22,889✔
308
            None => l_ext,
9,978✔
309
            Some(&(_, Dir::Left)) => l_ext.complement(),
6,248✔
310
            Some(&(_, Dir::Right)) => l_ext,
6,663✔
311
        };
312

313
        let right_extend = match r_path.last() {
22,889✔
314
            None => r_ext,
9,916✔
315
            Some(&(_, Dir::Left)) => r_ext,
6,453✔
316
            Some(&(_, Dir::Right)) => r_ext.complement(),
6,520✔
317
        };
318

319
        let path_seq = self.graph.sequence_of_path(node_path.iter());
22,889✔
320

22,889✔
321
        // return sequence and extensions
22,889✔
322
        (
22,889✔
323
            path_seq,
22,889✔
324
            Exts::from_single_dirs(left_extend, right_extend),
22,889✔
325
            node_path,
22,889✔
326
            node_data,
22,889✔
327
        )
22,889✔
328
    }
22,889✔
329

330
    /// Simplify a compressed Debruijn graph by merging adjacent unbranched nodes, and optionally
331
    /// censoring some nodes
332
    fn compress_graph(
123✔
333
        stranded: bool,
123✔
334
        compression: &S,
123✔
335
        mut old_graph: DebruijnGraph<K, D>,
123✔
336
        censor_nodes: Option<Vec<usize>>,
123✔
337
    ) -> DebruijnGraph<K, D> {
123✔
338
        let n_nodes = old_graph.len();
123✔
339
        let mut available_nodes = BitSet::with_capacity(n_nodes);
123✔
340
        for i in 0..n_nodes {
747,985✔
341
            available_nodes.insert(i);
747,985✔
342
        }
747,985✔
343

344
        if let Some(c) = censor_nodes {
123✔
345
            for censor in c {
×
346
                available_nodes.remove(censor);
×
347
            }
×
348
        }
123✔
349

350
        old_graph.fix_exts(Some(&available_nodes));
123✔
351

123✔
352
        let mut comp = CompressFromGraph {
123✔
353
            spec: compression,
123✔
354
            stranded,
123✔
355
            graph: &old_graph,
123✔
356
            available_nodes,
123✔
357
            d: PhantomData,
123✔
358
        };
123✔
359

123✔
360
        // FIXME -- clarify requirements around state of extensions
123✔
361
        let mut graph = BaseGraph::new(stranded);
123✔
362

363
        for node_counter in 0..n_nodes {
747,985✔
364
            if comp.available_nodes.contains(node_counter) {
747,985✔
365
                let (seq, exts, _, data) = comp.build_node(node_counter);
22,889✔
366
                graph.add(&seq, exts, data);
22,889✔
367
            }
725,096✔
368
        }
369

370
        // We will have some hanging exts due to
371
        let mut dbg = graph.finish();
123✔
372
        dbg.fix_exts(None); // ????
123✔
373
        debug_assert!(dbg.is_compressed(compression).is_none());
123✔
374
        dbg
123✔
375
    }
123✔
376
}
377

378
/// Perform path-compression on a (possibly partially compressed) DeBruijn graph
379
pub fn compress_graph<
123✔
380
    K: Kmer + Send + Sync,
123✔
381
    D: Clone + Debug + PartialEq,
123✔
382
    S: CompressionSpec<D>,
123✔
383
>(
123✔
384
    stranded: bool,
123✔
385
    spec: &S,
123✔
386
    old_graph: DebruijnGraph<K, D>,
123✔
387
    censor_nodes: Option<Vec<usize>>,
123✔
388
) -> DebruijnGraph<K, D> {
123✔
389
    CompressFromGraph::<K, D, S>::compress_graph(stranded, spec, old_graph, censor_nodes)
123✔
390
}
123✔
391

392
//////////////////////////////
393
// Compress from Hash a new Struct
394
//////////////////////////////
395
/// Generate a compressed DeBruijn graph from hash_index
396
struct CompressFromHash<'a, 'b, K: 'a + Kmer, D: 'a, S: CompressionSpec<D>> {
397
    stranded: bool,
398
    k: PhantomData<K>,
399
    d: PhantomData<D>,
400
    spec: &'b S,
401
    available_kmers: BitSet,
402
    index: &'a BoomHashMap2<K, Exts, D>,
403
}
404

405
/// Compression of paths in Debruijn graph
406
impl<K: Kmer +  Send + Sync, D: Clone + Debug + Send + Sync, S: CompressionSpec<D> + Sync> CompressFromHash<'_, '_, K, D, S> {
407
    fn get_kmer_data(&self, kmer: &K) -> (&Exts, &D) {
2,543,028✔
408
        match self.index.get(kmer) {
2,543,028✔
409
            Some(data) => data,
2,543,028✔
410
            None => panic!("couldn't find kmer {:?}", kmer),
×
411
        }
412
    }
2,543,028✔
413

414
    fn get_kmer_id(&self, kmer: &K) -> Option<usize> {
1,709,611✔
415
        self.index.get_key_id(kmer)
1,709,611✔
416
    }
1,709,611✔
417

418
    /// Attempt to extend kmer v in direction dir. Return:
419
    ///  - Unique(nextKmer, nextDir) if a single unique extension
420
    ///    is possible.  nextDir indicates the direction to extend nextMker
421
    ///    to preserve the direction of the extension.
422
    /// - Term(ext) no unique extension possible, indicating the extensions at this end of the line
423
    fn try_extend_kmer(&self, kmer: K, dir: Dir) -> ExtMode<K> {
861,650✔
424
        // metadata of start kmer
861,650✔
425
        let (exts, ref kmer_data) = self.get_kmer_data(&kmer);
861,650✔
426

861,650✔
427
        // kmer is marked terminal if it has not one extension in one direction (if clear path always 1) 
861,650✔
428
        // or if the graph is not stranded and the kmer is a palindrome
861,650✔
429
        if exts.num_ext_dir(dir) != 1 || (!self.stranded && kmer.is_palindrome()) {
861,650✔
430
            ExtMode::Terminal(exts.single_dir(dir))
13,689✔
431
        } else {
432
            // Get the next kmer
433
            let ext_base = exts.get_unique_extension(dir).expect("should be unique");
847,961✔
434

847,961✔
435
            let mut next_kmer = kmer.extend(ext_base, dir);
847,961✔
436

847,961✔
437
            let mut do_flip = false;
847,961✔
438
            
847,961✔
439
            // decide if direction needs to be changed (how?????????) turn kmer into rc
847,961✔
440
            if !self.stranded {
847,961✔
441
                let flip_rc = next_kmer.min_rc_flip();
847,961✔
442
                do_flip = flip_rc.1;
847,961✔
443
                next_kmer = flip_rc.0;
847,961✔
444
            }
847,961✔
445

446
            let next_dir = dir.cond_flip(do_flip);
847,961✔
447
            let is_palindrome = !self.stranded && next_kmer.is_palindrome();
847,961✔
448

449
            // We can include this kmer in the line if:
450
            // a) it exists in the partition and is still unused
451
            // b) the kmer we go to has a unique extension back in our direction
452

453
            // Check condition a)
454
            match self.get_kmer_id(&next_kmer) {
847,961✔
455
                Some(id) if self.available_kmers.contains(id) => (),
840,842✔
456

457
                // This kmer isn't in this partition, or we've already used it
458
                _ => return ExtMode::Terminal(exts.single_dir(dir)),
18,674✔
459
            }
460

461
            // Check condition b)
462
            // Direction we're approaching the new kmer from
463
            let new_incoming_dir = dir.flip().cond_flip(do_flip);
829,287✔
464
            let next_kmer_r = self.get_kmer_data(&next_kmer);
829,287✔
465
            let (next_kmer_exts, ref next_kmer_data) = next_kmer_r;
829,287✔
466
            let incoming_count = next_kmer_exts.num_ext_dir(new_incoming_dir);
829,287✔
467
            let outgoing_exts = next_kmer_exts.single_dir(new_incoming_dir.flip());
829,287✔
468

829,287✔
469
            // Test if the spec let's us combine these into the same path
829,287✔
470
            let can_join = self.spec.join_test(kmer_data, next_kmer_data);
829,287✔
471

829,287✔
472
            if incoming_count == 0 && !is_palindrome {
829,287✔
473
                println!("{:?}, {:?}, {:?}", kmer, exts, kmer_data);
×
474
                println!(
×
475
                    "{:?}, {:?}, {:?}",
×
476
                    next_kmer, next_kmer_exts, next_kmer_data
×
477
                );
×
478
                panic!("unreachable");
×
479
            } else if can_join && incoming_count == 1 && !is_palindrome {
829,287✔
480
                // We have a unique path to next_kmer -- include it
481
                ExtMode::Unique(next_kmer, next_dir, outgoing_exts)
807,208✔
482
            } else {
483
                // there's more than one path
484
                // into the target kmer - don't include it
485
                ExtMode::Terminal(exts.single_dir(dir))
22,079✔
486
            }
487
        }
488
    }
861,650✔
489

490
    /// Attempt to extend kmer v in direction dir. Return:
491
    ///  - Unique(nextKmer, nextDir) if a single unique extension
492
    ///    is possible.  nextDir indicates the direction to extend nextMker
493
    ///    to preserve the direction of the extension.
494
    /// - Term(ext) no unique extension possible, indicating the extensions at this end of the line
495
    fn try_extend_kmer_par(&self, kmer: K, dir: Dir, path: &mut [(K, Dir)]) -> ExtMode<K> {
6,003✔
496
        // metadata of start kmer
6,003✔
497
        let (exts, ref kmer_data) = self.get_kmer_data(&kmer);
6,003✔
498

6,003✔
499
        // kmer is marked terminal if it has not one extension in one direction (if clear path always 1) 
6,003✔
500
        // or if the graph is not stranded and the kmer is a palindrome
6,003✔
501
        if exts.num_ext_dir(dir) != 1 || (!self.stranded && kmer.is_palindrome()) {
6,003✔
502
            ExtMode::Terminal(exts.single_dir(dir))
194✔
503
        } else {
504
            // Get the next kmer
505
            let ext_base = exts.get_unique_extension(dir).expect("should be unique");
5,809✔
506

5,809✔
507
            let mut next_kmer = kmer.extend(ext_base, dir);
5,809✔
508

5,809✔
509
            let mut do_flip = false;
5,809✔
510
            
5,809✔
511
            // decide if direction needs to be changed (how?????????) turn kmer into rc
5,809✔
512
            if !self.stranded {
5,809✔
513
                let flip_rc = next_kmer.min_rc_flip();
5,809✔
514
                do_flip = flip_rc.1;
5,809✔
515
                next_kmer = flip_rc.0;
5,809✔
516
            }
5,809✔
517

518
            let next_dir = dir.cond_flip(do_flip);
5,809✔
519
            let is_palindrome = !self.stranded && next_kmer.is_palindrome();
5,809✔
520

521
            // We can include this kmer in the line if:
522
            // a) it exists in the partition and is still unused
523
            // b) the kmer we go to has a unique extension back in our direction
524

525
            // condition a) is skipped cause of parallel process
526
            // instead check if already in path
527
            // might be time consuming, can try to replace with hashmap later
528
            // to avoid circluar graphs leading infinite loops
529
            // also check if just checking for next_dir would suffice
530

531
            if path.contains(&(next_kmer, Dir::Left)) {
5,809✔
532
                // The next kmer is already in the path, break loop
533
                return ExtMode::Terminal(exts.single_dir(dir))
×
534
            }
5,809✔
535

5,809✔
536
            if path.contains(&(next_kmer, Dir::Right)) {
5,809✔
537
                // The next kmer is already in the path, break loop
538
                return ExtMode::Terminal(exts.single_dir(dir))
×
539
            }
5,809✔
540

5,809✔
541
            // Check condition b)
5,809✔
542
            // Direction we're approaching the new kmer from
5,809✔
543
            let new_incoming_dir = dir.flip().cond_flip(do_flip);
5,809✔
544
            let next_kmer_r = self.get_kmer_data(&next_kmer);
5,809✔
545
            let (next_kmer_exts, ref next_kmer_data) = next_kmer_r;
5,809✔
546
            let incoming_count = next_kmer_exts.num_ext_dir(new_incoming_dir);
5,809✔
547
            let outgoing_exts = next_kmer_exts.single_dir(new_incoming_dir.flip());
5,809✔
548

5,809✔
549
            // Test if the spec let's us combine these into the same path
5,809✔
550
            let can_join = self.spec.join_test(kmer_data, next_kmer_data);
5,809✔
551

5,809✔
552
            if incoming_count == 0 && !is_palindrome {
5,809✔
553
                println!("{:?}, {:?}, {:?}", kmer, exts, kmer_data);
×
554
                println!(
×
555
                    "{:?}, {:?}, {:?}",
×
556
                    next_kmer, next_kmer_exts, next_kmer_data
×
557
                );
×
558
                panic!("unreachable");
×
559
            } else if can_join && incoming_count == 1 && !is_palindrome {
5,809✔
560
                // We have a unique path to next_kmer -- include it
561
                ExtMode::Unique(next_kmer, next_dir, outgoing_exts)
5,697✔
562
            } else {
563
                // there's more than one path
564
                // into the target kmer - don't include it
565
                ExtMode::Terminal(exts.single_dir(dir))
112✔
566
            }
567
        }
568
    }
6,003✔
569

570

571
    /// Build the maximal line starting at kmer in direction dir, at most max_dist long.
572
    /// Also return the extensions at the end of this line.
573
    /// Sub-lines break if their extensions are not available in this shard
574
    #[inline(never)]
575
    fn extend_kmer(&mut self, kmer: K, start_dir: Dir, path: &mut Vec<(K, Dir)>) -> Exts {
54,442✔
576
        let mut current_dir = start_dir;
54,442✔
577
        let mut current_kmer = kmer;
54,442✔
578
        path.clear();
54,442✔
579

54,442✔
580
        let final_exts: Exts; // must get set below
54,442✔
581

54,442✔
582
        // get id of kmer and remove from available kmers
54,442✔
583
        let id = self.get_kmer_id(&kmer).expect("should have this kmer");
54,442✔
584
        let _ = self.available_kmers.remove(id);
54,442✔
585

586
        loop {
587
            let ext_result = self.try_extend_kmer(current_kmer, current_dir);
861,650✔
588

861,650✔
589
            match ext_result {
861,650✔
590
                ExtMode::Unique(next_kmer, next_dir, _) => {
807,208✔
591
                    path.push((next_kmer, next_dir));
807,208✔
592
                    let next_id = self.get_kmer_id(&next_kmer).expect("should have this kmer");
807,208✔
593
                    self.available_kmers.remove(next_id);
807,208✔
594
                    current_kmer = next_kmer;
807,208✔
595
                    current_dir = next_dir;
807,208✔
596
                }
807,208✔
597
                ExtMode::Terminal(ext) => {
54,442✔
598
                    final_exts = ext;
54,442✔
599
                    break;
54,442✔
600
                }
54,442✔
601
            }
54,442✔
602
        }
54,442✔
603

54,442✔
604
        final_exts
54,442✔
605
    }
54,442✔
606

607
    fn extend_kmer_par(&mut self, kmer: K, start_dir: Dir, path: &mut Vec<(K, Dir)>) -> Exts {
306✔
608
        let mut current_dir = start_dir;
306✔
609
        let mut current_kmer = kmer;
306✔
610
        path.clear();
306✔
611

612
        let final_exts: Exts; // must get set below
613

614
        loop {
615
            let ext_result = self.try_extend_kmer_par(current_kmer, current_dir, path);
6,003✔
616

6,003✔
617
            match ext_result {
6,003✔
618
                ExtMode::Unique(next_kmer, next_dir, _) => {
5,697✔
619
                    path.push((next_kmer, next_dir));
5,697✔
620
                    //let next_id = self.get_kmer_id(&next_kmer).expect("should have this kmer");
5,697✔
621
                    //self.available_kmers.remove(next_id);
5,697✔
622
                    current_kmer = next_kmer;
5,697✔
623
                    current_dir = next_dir;
5,697✔
624
                }
5,697✔
625
                ExtMode::Terminal(ext) => {
306✔
626
                    final_exts = ext;
306✔
627
                    break;
306✔
628
                }
306✔
629
            }
306✔
630
        }
306✔
631

306✔
632
        final_exts
306✔
633
    }
306✔
634

635
    /// Build the edge surrounding a kmer
636
    #[inline(never)]
637
    fn  build_node(
27,221✔
638
        &mut self,
27,221✔
639
        seed_id: usize,
27,221✔
640
        path: &mut Vec<(K, Dir)>,
27,221✔
641
        edge_seq: &mut VecDeque<u8>,
27,221✔
642
    ) -> (Exts, D) {
27,221✔
643
        let seed: K = *self.index.get_key(seed_id).expect("Index out of bound");
27,221✔
644
        edge_seq.clear();
27,221✔
645
        for i in 0..K::k() {
850,110✔
646
            edge_seq.push_back(seed.get(i));
850,110✔
647
        }
850,110✔
648

649
        let mut node_data = self.get_kmer_data(&seed).1.clone();
27,221✔
650

27,221✔
651
        // Unique path from seed kmer with Dir Left is built
27,221✔
652
        let l_ext = self.extend_kmer(seed, Dir::Left, path);
27,221✔
653

654

655
        // Add on the left path
656
        for &(next_kmer, dir) in path.iter() {
404,052✔
657
            let kmer = match dir {
404,052✔
658
                Dir::Left => next_kmer,
204,345✔
659
                Dir::Right => next_kmer.rc(),
199,707✔
660
            };
661

662
            edge_seq.push_front(kmer.get(0));
404,052✔
663

404,052✔
664
            // Reduce the data object
404,052✔
665
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
404,052✔
666
            node_data = self.spec.reduce(node_data, kmer_data)
404,052✔
667
        }
668

669
        let left_extend = match path.last() {
27,221✔
670
            None => l_ext,
11,273✔
671
            Some(&(_, Dir::Left)) => l_ext,
8,090✔
672
            Some(&(_, Dir::Right)) => l_ext.complement(),
7,858✔
673
        };
674

675

676
        // Unique path from seed kmer with Dir Right is built
677
        let r_ext = self.extend_kmer(seed, Dir::Right, path);
27,221✔
678

679
        // Add on the right path
680
        for &(next_kmer, dir) in path.iter() {
403,156✔
681
            let kmer = match dir {
403,156✔
682
                Dir::Left => next_kmer.rc(),
198,903✔
683
                Dir::Right => next_kmer,
204,253✔
684
            };
685

686
            edge_seq.push_back(kmer.get(K::k() - 1));
403,156✔
687

403,156✔
688
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
403,156✔
689
            node_data = self.spec.reduce(node_data, kmer_data)
403,156✔
690
        }
691

692
        let right_extend = match path.last() {
27,221✔
693
            None => r_ext,
11,130✔
694
            Some(&(_, Dir::Left)) => r_ext.complement(),
7,860✔
695
            Some(&(_, Dir::Right)) => r_ext,
8,231✔
696
        };
697
        
698
        (Exts::from_single_dirs(left_extend, right_extend), node_data)
27,221✔
699
    }
27,221✔
700

701

702
    /// Build the edge surrounding a kmer
703
    #[inline(never)]
704
    fn  build_node_start(
147✔
705
        &mut self,
147✔
706
        seed_id: usize,
147✔
707
        path: &mut Vec<(K, Dir)>,
147✔
708
        edge_seq: &mut VecDeque<u8>,
147✔
709
    ) -> (K, K) {
147✔
710
        let seed: K = *self.index.get_key(seed_id).expect("Index out of bound");
147✔
711
        edge_seq.clear();
147✔
712
        for i in 0..K::k() {
4,704✔
713
            edge_seq.push_back(seed.get(i));
4,704✔
714
        }
4,704✔
715

716
        let mut node_data = self.get_kmer_data(&seed).1.clone();
147✔
717

147✔
718
        // Unique path from seed kmer with Dir Left is built
147✔
719
        let _ = self.extend_kmer_par(seed, Dir::Left, path);
147✔
720

721
        // Add on the left path
722
        for &(next_kmer, dir) in path.iter() {
2,978✔
723
            let kmer = match dir {
2,978✔
724
                Dir::Left => next_kmer,
1,404✔
725
                Dir::Right => next_kmer.rc(),
1,574✔
726
            };
727

728
            edge_seq.push_front(kmer.get(0));
2,978✔
729

2,978✔
730
            // Reduce the data object
2,978✔
731
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
2,978✔
732
            node_data = self.spec.reduce(node_data, kmer_data)
2,978✔
733
        }
734

735
        // Unique path from seed kmer with Dir Right is built
736
        let _ = self.extend_kmer_par(seed, Dir::Right, path);
147✔
737

738
        // Add on the right path
739
        for &(next_kmer, dir) in path.iter() {
2,578✔
740
            let kmer = match dir {
2,578✔
741
                Dir::Left => next_kmer.rc(),
1,174✔
742
                Dir::Right => next_kmer,
1,404✔
743
            };
744

745
            edge_seq.push_back(kmer.get(K::k() - 1));
2,578✔
746

2,578✔
747
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
2,578✔
748
            node_data = self.spec.reduce(node_data, kmer_data)
2,578✔
749
        }
750

751
        let mut path_seq = PackedDnaStringSet::new();
147✔
752
        path_seq.add(edge_seq);
147✔
753
        let first = path_seq.sequence.get_kmer::<K>(0);
147✔
754
        let min_first = first.min_rc();
147✔
755
        let last = path_seq.sequence.get_kmer::<K>(path_seq.sequence.len()-K::k());
147✔
756
        let min_last = last.min_rc();
147✔
757

147✔
758
        if min_first < min_last {
147✔
759
            (min_first, min_last)
74✔
760
        } else {
761
            (min_last, min_first)
73✔
762
        }
763
    }
147✔
764

765
    #[inline(never)]
766
    fn  build_node_par(
6✔
767
        &mut self,
6✔
768
        seed_id: usize,
6✔
769
        path: &mut Vec<(K, Dir)>,
6✔
770
        edge_seq: &mut VecDeque<u8>,
6✔
771
    ) -> (Exts, D) {
6✔
772
        let seed: K = *self.index.get_key(seed_id).expect("Index out of bound");
6✔
773
        edge_seq.clear();
6✔
774
        for i in 0..K::k() {
192✔
775
            edge_seq.push_back(seed.get(i));
192✔
776
        }
192✔
777

778
        let mut node_data = self.get_kmer_data(&seed).1.clone();
6✔
779

6✔
780
        // Unique path from seed kmer with Dir Left is built
6✔
781
        let l_ext = self.extend_kmer_par(seed, Dir::Left, path);
6✔
782

783

784
        // Add on the left path
785
        for &(next_kmer, dir) in path.iter() {
74✔
786
            let kmer = match dir {
74✔
787
                Dir::Left => next_kmer,
35✔
788
                Dir::Right => next_kmer.rc(),
39✔
789
            };
790

791
            edge_seq.push_front(kmer.get(0));
74✔
792

74✔
793
            // Reduce the data object
74✔
794
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
74✔
795
            node_data = self.spec.reduce(node_data, kmer_data)
74✔
796
        }
797

798
        let left_extend = match path.last() {
6✔
799
            None => l_ext,
2✔
800
            Some(&(_, Dir::Left)) => l_ext,
1✔
801
            Some(&(_, Dir::Right)) => l_ext.complement(),
3✔
802
        };
803

804

805
        // Unique path from seed kmer with Dir Right is built
806
        let r_ext = self.extend_kmer_par(seed, Dir::Right, path);
6✔
807

808
        // Add on the right path
809
        for &(next_kmer, dir) in path.iter() {
67✔
810
            let kmer = match dir {
67✔
811
                Dir::Left => next_kmer.rc(),
34✔
812
                Dir::Right => next_kmer,
33✔
813
            };
814

815
            edge_seq.push_back(kmer.get(K::k() - 1));
67✔
816

67✔
817
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
67✔
818
            node_data = self.spec.reduce(node_data, kmer_data)
67✔
819
        }
820

821
        let right_extend = match path.last() {
6✔
822
            None => r_ext,
4✔
823
            Some(&(_, Dir::Left)) => r_ext.complement(),
2✔
UNCOV
824
            Some(&(_, Dir::Right)) => r_ext,
×
825
        };
826
        
827
        (Exts::from_single_dirs(left_extend, right_extend), node_data)
6✔
828
    }
6✔
829

830
    /// Compress a set of kmers and their extensions and metadata into a base DeBruijn graph.
831
    #[inline(never)]
832
    pub fn compress_kmers(
1,663✔
833
        stranded: bool,
1,663✔
834
        spec: &S,
1,663✔
835
        index: &BoomHashMap2<K, Exts, D>,
1,663✔
836
        progress: bool,
1,663✔
837
    ) -> BaseGraph<K, D> {
1,663✔
838
        
1,663✔
839
        let n_kmers = index.len();
1,663✔
840
        let mut available_kmers = BitSet::with_capacity(n_kmers);
1,663✔
841
        let progress = if n_kmers < 128 { false } else { progress };
1,663✔
842

843
        for i in 0..n_kmers {
834,429✔
844
            available_kmers.insert(i);
834,429✔
845
        }
834,429✔
846

847
        let mut comp = CompressFromHash {
1,663✔
848
            stranded,
1,663✔
849
            spec,
1,663✔
850
            k: PhantomData,
1,663✔
851
            d: PhantomData,
1,663✔
852
            available_kmers,
1,663✔
853
            index,
1,663✔
854
        };
1,663✔
855

1,663✔
856
        // Path-compressed De Bruijn graph will be created here
1,663✔
857
        let mut graph = BaseGraph::new(stranded);
1,663✔
858

1,663✔
859
        // Paths will be get assembled here
1,663✔
860
        let mut path_buf = Vec::new();
1,663✔
861

1,663✔
862
        // Node sequences will get assembled here
1,663✔
863
        let mut edge_seq_buf = VecDeque::new();
1,663✔
864

1,663✔
865
        debug!("n of kmers: {}", n_kmers);
1,663✔
866

867
        let steps = n_kmers as f32 / 128.;
1,663✔
868

1,663✔
869
        if progress {
1,663✔
870
            println!("Compressing kmers");
176✔
871
            for _i in 0..127 {
22,528✔
872
                print!("-");
22,352✔
873
            }
22,352✔
874
            println!("|");
176✔
875
        }
1,487✔
876

877
        let pb = ProgressBar::new(n_kmers as u64);
1,663✔
878
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
1,663✔
879
        pb.set_message(format!("{:<32}", "compressing graph"));
1,663✔
880

881

882
        for kmer_counter in (0..n_kmers).progress_with(pb) {
834,429✔
883
            if progress && (kmer_counter as f32 % steps >= 0.) & (kmer_counter as f32 % steps < 1.) { print!("|")}
834,429✔
884

885
            if (kmer_counter as f32 % steps >= 0.) & (kmer_counter as f32 % steps < 1.) {
834,429✔
886
                debug!("another 1/128 done: {}, data graph size: {}", (kmer_counter as f32 / steps) as i32, mem::size_of_val(&*graph.data));
59,238✔
887
            }
775,191✔
888

889
            if comp.available_kmers.contains(kmer_counter) {
834,429✔
890
                let (node_exts, node_data) =
27,221✔
891
                    comp.build_node(kmer_counter, &mut path_buf, &mut edge_seq_buf);
27,221✔
892
                graph.add(&edge_seq_buf, node_exts, node_data);
27,221✔
893
            }
807,208✔
894
        }
895

896
        if progress { println!() };
1,663✔
897

898
        graph
1,663✔
899
    }
1,663✔
900

901
    /// Compress a set of kmers and their extensions and metadata into a base DeBruijn graph, utilizing multithreading
902
    #[inline(never)]
903
    pub fn compress_kmers_parallel(
1✔
904
        stranded: bool,
1✔
905
        spec: &S,
1✔
906
        index: &BoomHashMap2<K, Exts, D>,
1✔
907
        progress: bool,
1✔
908
    ) -> BaseGraph<K, D> {
1✔
909
        
1✔
910
        let n_kmers = index.len();
1✔
911
        let progress = if n_kmers < 128 { false } else { progress };
1✔
912

913
        // calculate the slices of the workload according to the available threads
914
        debug!("current num threads: {}", current_num_threads());
1✔
915
        debug!("n of kmers: {}", n_kmers);
1✔
916

917
        let slices = current_num_threads();
1✔
918
        let sz = n_kmers / slices + 1;
1✔
919

1✔
920
        debug!("n_kmers: {}", n_kmers);
1✔
921
        debug!("sz: {}", sz);
1✔
922

923
        let mut parallel_ranges = Vec::with_capacity(slices);
1✔
924
        let mut start = 0;
1✔
925
        while start < n_kmers {
5✔
926
            parallel_ranges.push(start..start + sz);
4✔
927
            start += sz;
4✔
928
        }
4✔
929

930
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
1✔
931
        parallel_ranges.push(last_start..n_kmers);
1✔
932
        debug!("parallel ranges: {:?}", parallel_ranges);
1✔
933

934
        let all_start_end_kmers: Arc<Mutex<HashMap<K, K>>> = Arc::new(Mutex::new(HashMap::new()));
1✔
935

1✔
936
        let steps = n_kmers as f32 / 128.;
1✔
937
        if progress {
1✔
938
            println!("Compressing kmers\nStep 1 of 2");
1✔
939
            for _i in 0..127 {
128✔
940
                print!("-");
127✔
941
            }
127✔
942
            println!("|");
1✔
943
        }
×
944
        
945
        // go through all kmers and find the start and end kmer of each compressable sequence node
946
        parallel_ranges.into_par_iter().for_each(|range| {
4✔
947

4✔
948
            let mut path_buf = Vec::new();
4✔
949
            let mut edge_seq_buf = VecDeque::new();
4✔
950
            //let mut start_end_kmers: Vec<(K, K)> = Vec::new();
4✔
951

4✔
952
            let range2 = range.clone();
4✔
953

954
            for kmer_counter in range {
151✔
955

956
                if progress && (kmer_counter as f32 % steps >= 0.) & (kmer_counter as f32 % steps < 1.) { print!("|")}
147✔
957

958
                let mut comp = CompressFromHash {
147✔
959
                    stranded,
147✔
960
                    spec,
147✔
961
                    k: PhantomData,
147✔
962
                    d: PhantomData,
147✔
963
                    available_kmers: BitSet::new(),
147✔
964
                    index,
147✔
965
                };               
147✔
966

147✔
967
                let kmers = comp.build_node_start(kmer_counter, &mut path_buf, &mut edge_seq_buf);
147✔
968

147✔
969
                let all_clone = Arc::clone(&all_start_end_kmers);
147✔
970
                let mut all_lock = all_clone.lock().expect("lock all_start_end_kmers");
147✔
971
                all_lock.entry(kmers.0).or_insert(kmers.1);
147✔
972
            }
973

974
            debug!("finished range: {:?}", range2);
4✔
975
        });
4✔
976

1✔
977
        if progress { println!() }
1✔
978

979
        // all the kmers which occurr at the beginning or end of a node are soerted and deduped
980
        // resulting in one (K, K) per node
981
        let all_start_end_kmers: Vec<(K, K)> = all_start_end_kmers.lock().expect("final lock all_start_end_kmers").clone().into_iter().collect();
1✔
982

1✔
983
        //all_start_end_kmers.sort();
1✔
984
        //all_start_end_kmers.dedup();
1✔
985

1✔
986
        //debug!("all start and end kmers: {:?}", all_start_end_kmers);
1✔
987

1✔
988
        // all graphs constructed by the respective threads are pushed into this and later combined
1✔
989
        let graphs: Arc<Mutex<Vec<BaseGraph<K, D>>>> = Arc::new(Mutex::new(Vec::with_capacity(current_num_threads())));
1✔
990
        
1✔
991
        // calculate the workload has to be sliced depending on the threads
1✔
992
        let n_starts = all_start_end_kmers.len();
1✔
993
        let slices = current_num_threads();
1✔
994
        let sz = n_starts / slices + 1;
1✔
995

1✔
996
        debug!("n start kmers: {}", all_start_end_kmers.len());
1✔
997
        debug!("sz: {}", sz);
1✔
998

999
        let mut parallel_ranges = Vec::with_capacity(slices);
1✔
1000
        let mut start = 0;
1✔
1001
        while start < n_starts {
4✔
1002
            parallel_ranges.push(start..start + sz);
3✔
1003
            start += sz;
3✔
1004
        }
3✔
1005

1006
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
1✔
1007
        parallel_ranges.push(last_start..n_starts);
1✔
1008

1✔
1009
        debug!("parallel ranges 2: {:?}", parallel_ranges);
1✔
1010

1011
        let steps = n_starts as f32 / 128.;
1✔
1012
        if progress {
1✔
1013
            println!("Step 2 of 2");
1✔
1014
            for _i in 0..127 {
128✔
1015
                print!("-");
127✔
1016
            }
127✔
1017
            println!("|");
1✔
1018
        }
×
1019
        let short_progress = n_starts < 100;
1✔
1020

1✔
1021
        // go trough all start kmers and find the corresponding sequence, add them to the partial graph
1✔
1022
        parallel_ranges.into_par_iter().for_each(|range| {
3✔
1023

3✔
1024
            let mut graph: BaseGraph<K, D> = BaseGraph::new(stranded);
3✔
1025

3✔
1026
            let range2 = range.clone();
3✔
1027

1028
            for (i, (start, _)) in all_start_end_kmers[range].iter().enumerate() {   
6✔
1029

1030
                if progress & !short_progress && (i as f32 % steps >= 0.) & (i as f32 % steps < 1.) { print!("|")}
6✔
1031

1032
                let mut path_buf = Vec::new();
6✔
1033
                let mut edge_seq_buf = VecDeque::new();
6✔
1034
                let mut comp = CompressFromHash {
6✔
1035
                    stranded,
6✔
1036
                    spec,
6✔
1037
                    k: PhantomData,
6✔
1038
                    d: PhantomData,
6✔
1039
                    available_kmers: BitSet::new(),
6✔
1040
                    index,
6✔
1041
                };      
6✔
1042
                let (node_exts, node_data) = comp.build_node_par(comp.index.get_key_id(start).expect("get kmer id from index, should exist"), &mut path_buf, &mut edge_seq_buf);
6✔
1043
                graph.add(&edge_seq_buf, node_exts, node_data);
6✔
1044
            }
1045

1046
            debug!("finished range: {:?}, subgraph data size: {}", range2, mem::size_of_val(&*graph.data));
3✔
1047

1048
            let graphs_clone = Arc::clone(&graphs);
3✔
1049
            let mut graphs_lock = graphs_clone.lock().expect("lock graphs to push new graph");
3✔
1050
            graphs_lock.push(graph);
3✔
1051

3✔
1052
        });
3✔
1053

1✔
1054
        if progress & short_progress {
1✔
1055
            for _i in 0..128 {
129✔
1056
                print!("|");
128✔
1057
            }
128✔
1058
        }
×
1059
        
1060
        if progress { println!() }
1✔
1061

1062
        // combine the graphs and return the resulting graph
1063
        let graph = BaseGraph::combine(graphs.lock().expect("final graph lock").clone().into_iter());
1✔
1064
        graph
1✔
1065
    }
1✔
1066
}
1067

1068

1069
/// Take a BoomHash Object and build a compressed DeBruijn graph.
1070
#[inline(never)]
1071
pub fn compress_kmers_with_hash<K: Kmer + Send + Sync, D: Clone + Debug + Send + Sync, S: CompressionSpec<D> + Send + Sync>(
1,664✔
1072
    stranded: bool,
1,664✔
1073
    spec: &S,
1,664✔
1074
    index: &BoomHashMap2<K, Exts, D>,
1,664✔
1075
    time: bool,
1,664✔
1076
    parallel: bool,
1,664✔
1077
    progress: bool,
1,664✔
1078
) -> BaseGraph<K, D> {
1,664✔
1079
    let before_compression = Instant::now();
1,664✔
1080
    let graph = if !parallel {CompressFromHash::<K, D, S>::compress_kmers(stranded, spec, index, progress) } else {
1,664✔
1081
        CompressFromHash::<K, D, S>::compress_kmers_parallel(stranded, spec, index, progress)};
1✔
1082
    if time { println!("time compression (s): {}", before_compression.elapsed().as_secs_f32()) }
1,664✔
1083
    graph
1,664✔
1084
}
1,664✔
1085

1086
/// Take (make) a BoomHash Object and build a compressed DeBruijn graph.
1087
#[inline(never)]
1088
pub fn compress_kmers<K: Kmer + Send + Sync, D: Clone + Debug  + Send + Sync, S: CompressionSpec<D> + Send + Sync>(
×
1089
    stranded: bool,
×
1090
    spec: &S,
×
1091
    kmer_exts: &[(K, (Exts, D))],
×
1092
) -> BaseGraph<K, D> {
×
1093
    let mut keys = Vec::with_capacity(kmer_exts.len());
×
1094
    let mut exts = Vec::with_capacity(kmer_exts.len());
×
1095
    let mut data = Vec::with_capacity(kmer_exts.len());
×
1096

1097
    for (k, (e, d)) in kmer_exts {
×
1098
        keys.push(*k);
×
1099
        data.push(d.clone());
×
1100
        exts.push(*e);
×
1101
    }
×
1102

1103
    let index = BoomHashMap2::new(keys, exts, data);
×
1104
    CompressFromHash::<K, D, S>::compress_kmers(stranded, spec, &index, false)
×
1105
}
×
1106

1107
/// Build graph from a set of kmers with unknown extensions by finding the extensions on the fly.
1108
#[inline(never)]
1109
pub fn compress_kmers_no_exts<K: Kmer + Send + Sync, D: Clone + Debug + Send + Sync, S: CompressionSpec<D> + Send + Sync>(
×
1110
    stranded: bool,
×
1111
    spec: &S,
×
1112
    kmer_exts: &[(K, D)],
×
1113
) -> BaseGraph<K, D> {
×
1114
    let kmer_set: std::collections::HashSet<_> = kmer_exts.iter().map(|(k, _)| k).collect();
×
1115

×
1116
    let can = |k: K| k.min_rc();
×
1117

1118
    let mut keys = Vec::with_capacity(kmer_exts.len());
×
1119
    let mut exts = Vec::with_capacity(kmer_exts.len());
×
1120
    let mut data = Vec::with_capacity(kmer_exts.len());
×
1121
    for (k, d) in kmer_exts {
×
1122
        let mut e = Exts::empty();
×
1123

1124
        for l in 0..4 {
×
1125
            let new = can(k.extend_left(l));
×
1126

×
1127
            if kmer_set.contains(&new) {
×
1128
                e = e.set(Dir::Left, l);
×
1129
            }
×
1130
        }
1131

1132
        for r in 0..4 {
×
1133
            let new = can(k.extend_right(r));
×
1134

×
1135
            if kmer_set.contains(&new) {
×
1136
                e = e.set(Dir::Right, r);
×
1137
            }
×
1138
        }
1139

1140
        keys.push(*k);
×
1141
        data.push(d.clone());
×
1142
        exts.push(e);
×
1143
    }
1144

1145
    assert_eq!(kmer_set.len(), keys.len());
×
1146

1147
    let index = BoomHashMap2::new(keys, exts, data);
×
1148
    CompressFromHash::<K, D, S>::compress_kmers(stranded, spec, &index,false)
×
1149
}
×
1150

1151
/// assumes stranded = false
1152
pub fn uncompressed_graph<K: Kmer, D: Clone + Debug>(
×
1153
    index: &BoomHashMap2<K, Exts, D>,
×
1154
) -> BaseGraph<K, D> {
×
1155

×
1156
    let mut graph: BaseGraph<K, D> = BaseGraph::new(false);
×
1157
    let mut kmer_seq: VecDeque<u8> = VecDeque::with_capacity(K::k());
×
1158

UNCOV
1159
    for (kmer, exts, data) in index.into_iter() {
×
UNCOV
1160
        kmer_seq.clear();
×
1161
        for i in 0..K::k() {
×
1162
            kmer_seq.push_back(kmer.get(i));
×
1163
        }
×
NEW
1164
        graph.add(&kmer_seq, *exts, data.clone());
×
1165
    }
1166
    graph
×
1167
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc