• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / rust-debruijn / 14334283286

08 Apr 2025 01:05PM UTC coverage: 80.501% (-2.1%) from 82.636%
14334283286

Pull #11

github

web-flow
Merge 21c5c3c91 into 2f646dc00
Pull Request #11: Modifyable DOT edges and default formatting methods for nodes and edges

35 of 273 new or added lines in 5 files covered. (12.82%)

19 existing lines in 3 files now uncovered.

6102 of 7580 relevant lines covered (80.5%)

8056178.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.31
/src/compression.rs
1
// Copyright 2017 10x Genomics
2

3
//! Create compressed DeBruijn graphs from uncompressed DeBruijn graphs, or a collection of disjoint DeBruijn graphs.
4
use bit_set::BitSet;
5
use indicatif::{ProgressBar, ProgressIterator, ProgressStyle};
6
use log::debug;
7
use rayon::current_num_threads;
8
use rayon::iter::{IntoParallelIterator, ParallelIterator};
9
use std::collections::{HashMap, VecDeque};
10
use std::fmt::Debug;
11
use std::marker::PhantomData;
12
use std::mem;
13
use std::sync::{Arc, Mutex};
14
use std::time::Instant;
15

16
use crate::dna_string::{DnaString, PackedDnaStringSet};
17
use crate::graph::{BaseGraph, DebruijnGraph};
18
use crate::Dir;
19
use crate::Exts;
20
use crate::Kmer;
21
use crate::Vmer;
22
use boomphf::hashmap::BoomHashMap2;
23

24
#[derive(Copy, Clone)]
25
enum ExtMode<K: Kmer> {
26
    Unique(K, Dir, Exts),
27
    Terminal(Exts),
28
}
29

30
#[derive(Copy, Clone)]
31
enum ExtModeNode {
32
    Unique(usize, Dir, Exts),
33
    Terminal(Exts),
34
}
35

36
/// Customize the path-compression process. Implementing this trait lets the user
37
/// control how the per-kmer data (of type `D`) is summarized into a per-path
38
/// summary data (of type `DS`). It also let's the user introduce new breaks into
39
/// paths by inspecting in the per-kmer data of a proposed with `join_test_kmer`
40
/// function.
41
pub trait CompressionSpec<D> {
42
    /// combine the data of two nodes
43
    fn reduce(&self, path_object: D, kmer_object: &D) -> D;
44
    /// check if the data of two nodes can be combined
45
    fn join_test(&self, d1: &D, d2: &D) -> bool;
46
}
47

48
/// Simple implementation of `CompressionSpec` that lets you provide that data reduction function as a closure
49
pub struct SimpleCompress<D, F> {
50
    func: F,
51
    d: PhantomData<D>,
52
}
53

54
impl<D, F> SimpleCompress<D, F> {
55
    pub fn new(func: F) -> SimpleCompress<D, F> {
4,204✔
56
        SimpleCompress {
4,204✔
57
            func,
4,204✔
58
            d: PhantomData,
4,204✔
59
        }
4,204✔
60
    }
4,204✔
61
}
62

63
impl<D, F> CompressionSpec<D> for SimpleCompress<D, F>
64
where
65
    for<'r> F: Fn(D, &'r D) -> D,
66
{
67
    fn reduce(&self, d: D, other: &D) -> D {
3,051,794✔
68
        (self.func)(d, other)
3,051,794✔
69
    }
3,051,794✔
70

71
    fn join_test(&self, _: &D, _: &D) -> bool {
3,126,631✔
72
        true
3,126,631✔
73
    }
3,126,631✔
74
}
75

76
/// Extending trait CompressionSpec for compression
77
pub struct ScmapCompress<D> {
78
    d: PhantomData<D>,
79
}
80

81
impl<D> ScmapCompress<D> {
82
    pub fn new() -> ScmapCompress<D> {
2✔
83
        ScmapCompress { d: PhantomData }
2✔
84
    }
2✔
85
}
86

87
impl<D> Default for ScmapCompress<D> {
88
    fn default() -> Self {
×
89
        Self::new()
×
90
    }
×
91
}
92

93
impl<D: PartialEq> CompressionSpec<D> for ScmapCompress<D>
94
where
95
    D: Debug,
96
{
97
    fn reduce(&self, d: D, other: &D) -> D {
11,676✔
98
        if d != *other {
11,676✔
99
            panic!("{:?} != {:?}, Should not happen", d, *other);
×
100
        }
11,676✔
101
        d
11,676✔
102
    }
11,676✔
103

104
    fn join_test(&self, d1: &D, d2: &D) -> bool {
11,904✔
105
        d1 == d2
11,904✔
106
    }
11,904✔
107
}
108

109
/// CompressionSpec with check and function
110
pub struct CheckCompress<D, F1, F2> {
111
    reduce_func: F1,
112
    join_func: F2,
113
    d: PhantomData<D>
114
} 
115

116
impl<D, F1, F2> CheckCompress<D, F1, F2> {
117
    pub fn new(reduce_func: F1, join_func: F2) -> Self {
×
118
        CheckCompress {
×
119
            reduce_func,
×
120
            join_func,
×
121
            d: PhantomData,
×
122
        }
×
123
    }
×
124
}
125

126
impl<D, F1, F2> CompressionSpec<D> for CheckCompress<D, F1, F2>
127
where
128
    for<'r> F1: Fn(D, &'r D) -> D,
129
    for<'r> F2: Fn(&'r D, &'r D) -> bool
130
{
131
    fn reduce(&self, d: D, other: &D) -> D {
×
132
        (self.reduce_func)(d, other)
×
133
    }
×
134

135
    fn join_test(&self, d: &D, other: &D) -> bool {
×
136
        (self.join_func)(d, other)
×
137
    }
×
138
}
139

140

141
struct CompressFromGraph<'a, 'b, K: 'a + Kmer, D: 'a + PartialEq, S: CompressionSpec<D>> {
142
    stranded: bool,
143
    d: PhantomData<D>,
144
    spec: &'b S,
145
    available_nodes: BitSet,
146
    graph: &'a DebruijnGraph<K, D>,
147
}
148

149
impl<K, D, S> CompressFromGraph<'_, '_, K, D, S>
150
where
151
    K: Kmer + Send + Sync,
152
    D: Debug + Clone + PartialEq,
153
    S: CompressionSpec<D>,
154
{
155
    #[inline(never)]
156
    fn try_extend_node(&mut self, node: usize, dir: Dir) -> ExtModeNode {
1,477,160✔
157
        let node = self.graph.get_node(node);
1,477,160✔
158
        let bases = node.sequence();
1,477,160✔
159
        let exts = node.exts();
1,477,160✔
160

1,477,160✔
161
        if exts.num_ext_dir(dir) != 1
1,477,160✔
162
            || (!self.stranded && node.len() == K::k() && bases.get_kmer::<K>(0).is_palindrome())
1,453,864✔
163
        {
164
            ExtModeNode::Terminal(exts.single_dir(dir))
23,304✔
165
        } else {
166
            // Get the next kmer
167
            let ext_base = exts.get_unique_extension(dir).expect("should be unique");
1,453,856✔
168
            let end_kmer: K = bases.term_kmer(dir);
1,453,856✔
169

1,453,856✔
170
            let next_kmer = end_kmer.extend(ext_base, dir);
1,453,856✔
171
            let (next_node_id, next_side_incoming, rc) = match self.graph.find_link(next_kmer, dir)
1,453,856✔
172
            {
173
                Some(e) => e,
1,453,856✔
174
                None => {
175
                    println!("dir: {:?}, Lmer: {:?}, exts: {:?}", dir, bases, exts);
×
176
                    println!("end kmer: {:?}", end_kmer);
×
177
                    println!("No kmer: {:?}", next_kmer);
×
178
                    println!("rc: {:?}", next_kmer.min_rc());
×
179
                    panic!("No kmer: {:?}", next_kmer)
×
180
                }
181
            };
182

183
            let next_node = self.graph.get_node(next_node_id);
1,453,856✔
184
            let next_exts = next_node.exts();
1,453,856✔
185

186
            let consistent = (next_node.len() == K::k())
1,453,856✔
187
                || match (dir, next_side_incoming, rc) {
11,644✔
188
                    (Dir::Left, Dir::Right, false) => true,
3,025✔
189
                    (Dir::Left, Dir::Left, true) => true,
2,767✔
190
                    (Dir::Right, Dir::Left, false) => true,
3,099✔
191
                    (Dir::Right, Dir::Right, true) => true,
2,753✔
192
                    _ => {
193
                        println!("dir: {:?}, Lmer: {:?}, exts: {:?}", dir, bases, exts);
×
194
                        println!("end kmer: {:?}", end_kmer);
×
195
                        println!("next kmer: {:?}", next_kmer);
×
196
                        println!("rc: {:?}", next_kmer.min_rc());
×
197
                        println!(
×
198
                            "next bases: {:?}, next_side_incoming: {:?}, rc: {:?}",
×
199
                            next_node.sequence(),
×
200
                            next_side_incoming,
×
201
                            rc
×
202
                        );
×
203
                        false
×
204
                    }
205
                };
206
            assert!(consistent);
1,453,856✔
207

208
            // We can include this kmer in the line if:
209
            // a) it exists in the partition, and is still unused
210
            // b) the kmer we go to has a unique extension back in our direction
211
            // c) the new edge is not of length K and a palindrome
212
            // d) the color of the current and next node is same
213

214
            if !self.available_nodes.contains(next_node_id)
1,453,856✔
215
                || (!self.stranded && next_kmer.is_palindrome())
1,432,372✔
216
                || !self.spec.join_test(node.data(), next_node.data())
1,432,368✔
217
            {
218
                // Next kmer isn't in this partition,
219
                // or we've already used it,
220
                // or it's palindrom and we are not stranded
221
                // or the colors were not same
222
                return ExtModeNode::Terminal(exts.single_dir(dir));
21,488✔
223
            }
1,432,368✔
224

1,432,368✔
225
            // orientation of next edge
1,432,368✔
226
            let next_side_outgoing = next_side_incoming.flip();
1,432,368✔
227

1,432,368✔
228
            let incoming_count = next_exts.num_ext_dir(next_side_incoming);
1,432,368✔
229
            let outgoing_exts = next_exts.single_dir(next_side_outgoing);
1,432,368✔
230

1,432,368✔
231
            if incoming_count == 0 {
1,432,368✔
232
                println!("dir: {:?}, Lmer: {:?}, exts: {:?}", dir, bases, exts);
×
233
                println!("end kmer: {:?}", end_kmer);
×
234
                println!("next_node: {:?}", next_node);
×
235
                println!("next_node data: {:?}", next_node.sequence());
×
236
                panic!("unreachable");
×
237
            } else if incoming_count == 1 {
1,432,368✔
238
                // We have a unique path to next_kmer -- include it
239
                ExtModeNode::Unique(next_node_id, next_side_outgoing, outgoing_exts)
1,397,272✔
240
            } else {
241
                // there's more than one path
242
                // into the target kmer - don't include it
243
                ExtModeNode::Terminal(exts.single_dir(dir))
35,096✔
244
            }
245
        }
246
    }
1,477,160✔
247

248
    /// Generate complete unbranched edges
249
    fn extend_node(&mut self, start_node: usize, start_dir: Dir) -> (Vec<(usize, Dir)>, Exts) {
79,888✔
250
        let mut current_dir = start_dir;
79,888✔
251
        let mut current_node = start_node;
79,888✔
252
        let mut path = Vec::new();
79,888✔
253
        let final_exts: Exts; // must get set below
79,888✔
254

79,888✔
255
        self.available_nodes.remove(start_node);
79,888✔
256

257
        loop {
258
            let ext_result = self.try_extend_node(current_node, current_dir);
1,477,160✔
259

1,477,160✔
260
            match ext_result {
1,477,160✔
261
                ExtModeNode::Unique(next_node, next_dir_outgoing, _) => {
1,397,272✔
262
                    let next_dir_incoming = next_dir_outgoing.flip();
1,397,272✔
263
                    path.push((next_node, next_dir_incoming));
1,397,272✔
264
                    self.available_nodes.remove(next_node);
1,397,272✔
265
                    current_node = next_node;
1,397,272✔
266
                    current_dir = next_dir_outgoing;
1,397,272✔
267
                }
1,397,272✔
268
                ExtModeNode::Terminal(ext) => {
79,888✔
269
                    final_exts = ext;
79,888✔
270
                    break;
79,888✔
271
                }
79,888✔
272
            }
79,888✔
273
        }
79,888✔
274

79,888✔
275
        (path, final_exts)
79,888✔
276
    }
79,888✔
277

278
    // Determine the sequence and extensions of the maximal unbranched
279
    // edge, centered around the given edge number
280
    #[inline(never)]
281
    fn build_node(&mut self, seed_node: usize) -> (DnaString, Exts, VecDeque<(usize, Dir)>, D) {
39,944✔
282
        let (l_path, l_ext) = self.extend_node(seed_node, Dir::Left);
39,944✔
283
        let (r_path, r_ext) = self.extend_node(seed_node, Dir::Right);
39,944✔
284

39,944✔
285
        // Stick together edge chunks to get full edge sequence
39,944✔
286
        let mut node_path = VecDeque::new();
39,944✔
287

39,944✔
288
        let mut node_data: D = self.graph.get_node(seed_node).data().clone();
39,944✔
289
        node_path.push_back((seed_node, Dir::Left));
39,944✔
290

291
        // Add on the left path
292
        for &(next_node, incoming_dir) in l_path.iter() {
697,131✔
293
            node_path.push_front((next_node, incoming_dir.flip()));
697,131✔
294
            node_data = self
697,131✔
295
                .spec
697,131✔
296
                .reduce(node_data, self.graph.get_node(next_node).data());
697,131✔
297
        }
697,131✔
298

299
        // Add on the right path
300
        for &(next_node, incoming_dir) in r_path.iter() {
700,141✔
301
            node_path.push_back((next_node, incoming_dir));
700,141✔
302
            node_data = self
700,141✔
303
                .spec
700,141✔
304
                .reduce(node_data, self.graph.get_node(next_node).data());
700,141✔
305
        }
700,141✔
306

307
        let left_extend = match l_path.last() {
39,944✔
308
            None => l_ext,
16,790✔
309
            Some(&(_, Dir::Left)) => l_ext.complement(),
11,286✔
310
            Some(&(_, Dir::Right)) => l_ext,
11,868✔
311
        };
312

313
        let right_extend = match r_path.last() {
39,944✔
314
            None => r_ext,
16,749✔
315
            Some(&(_, Dir::Left)) => r_ext,
11,786✔
316
            Some(&(_, Dir::Right)) => r_ext.complement(),
11,409✔
317
        };
318

319
        let path_seq = self.graph.sequence_of_path(node_path.iter());
39,944✔
320

39,944✔
321
        // return sequence and extensions
39,944✔
322
        (
39,944✔
323
            path_seq,
39,944✔
324
            Exts::from_single_dirs(left_extend, right_extend),
39,944✔
325
            node_path,
39,944✔
326
            node_data,
39,944✔
327
        )
39,944✔
328
    }
39,944✔
329

330
    /// Simplify a compressed Debruijn graph by merging adjacent unbranched nodes, and optionally
331
    /// censoring some nodes
332
    fn compress_graph(
246✔
333
        stranded: bool,
246✔
334
        compression: &S,
246✔
335
        mut old_graph: DebruijnGraph<K, D>,
246✔
336
        censor_nodes: Option<Vec<usize>>,
246✔
337
    ) -> DebruijnGraph<K, D> {
246✔
338
        let n_nodes = old_graph.len();
246✔
339
        let mut available_nodes = BitSet::with_capacity(n_nodes);
246✔
340
        for i in 0..n_nodes {
1,437,216✔
341
            available_nodes.insert(i);
1,437,216✔
342
        }
1,437,216✔
343

344
        if let Some(c) = censor_nodes {
246✔
345
            for censor in c {
×
346
                available_nodes.remove(censor);
×
347
            }
×
348
        }
246✔
349

350
        old_graph.fix_exts(Some(&available_nodes));
246✔
351

246✔
352
        let mut comp = CompressFromGraph {
246✔
353
            spec: compression,
246✔
354
            stranded,
246✔
355
            graph: &old_graph,
246✔
356
            available_nodes,
246✔
357
            d: PhantomData,
246✔
358
        };
246✔
359

246✔
360
        // FIXME -- clarify requirements around state of extensions
246✔
361
        let mut graph = BaseGraph::new(stranded);
246✔
362

363
        for node_counter in 0..n_nodes {
1,437,216✔
364
            if comp.available_nodes.contains(node_counter) {
1,437,216✔
365
                let (seq, exts, _, data) = comp.build_node(node_counter);
39,944✔
366
                graph.add(&seq, exts, data);
39,944✔
367
            }
1,397,272✔
368
        }
369

370
        // We will have some hanging exts due to
371
        let mut dbg = graph.finish();
246✔
372
        dbg.fix_exts(None); // ????
246✔
373
        debug_assert!(dbg.is_compressed(compression).is_none());
246✔
374
        dbg
246✔
375
    }
246✔
376
}
377

378
/// Perform path-compression on a (possibly partially compressed) DeBruijn graph
379
pub fn compress_graph<
246✔
380
    K: Kmer + Send + Sync,
246✔
381
    D: Clone + Debug + PartialEq,
246✔
382
    S: CompressionSpec<D>,
246✔
383
>(
246✔
384
    stranded: bool,
246✔
385
    spec: &S,
246✔
386
    old_graph: DebruijnGraph<K, D>,
246✔
387
    censor_nodes: Option<Vec<usize>>,
246✔
388
) -> DebruijnGraph<K, D> {
246✔
389
    CompressFromGraph::<K, D, S>::compress_graph(stranded, spec, old_graph, censor_nodes)
246✔
390
}
246✔
391

392
//////////////////////////////
393
// Compress from Hash a new Struct
394
//////////////////////////////
395
/// Generate a compressed DeBruijn graph from hash_index
396
struct CompressFromHash<'a, 'b, K: 'a + Kmer, D: 'a, S: CompressionSpec<D>> {
397
    stranded: bool,
398
    k: PhantomData<K>,
399
    d: PhantomData<D>,
400
    spec: &'b S,
401
    available_kmers: BitSet,
402
    index: &'a BoomHashMap2<K, Exts, D>,
403
}
404

405
/// Compression of paths in Debruijn graph
406
impl<K: Kmer +  Send + Sync, D: Clone + Debug + Send + Sync, S: CompressionSpec<D> + Sync> CompressFromHash<'_, '_, K, D, S> {
407
    fn get_kmer_data(&self, kmer: &K) -> (&Exts, &D) {
5,208,660✔
408
        match self.index.get(kmer) {
5,208,660✔
409
            Some(data) => data,
5,208,660✔
410
            None => panic!("couldn't find kmer {:?}", kmer),
×
411
        }
412
    }
5,208,660✔
413

414
    fn get_kmer_id(&self, kmer: &K) -> Option<usize> {
3,509,044✔
415
        self.index.get_key_id(kmer)
3,509,044✔
416
    }
3,509,044✔
417

418
    /// Attempt to extend kmer v in direction dir. Return:
419
    ///  - Unique(nextKmer, nextDir) if a single unique extension
420
    ///    is possible.  nextDir indicates the direction to extend nextMker
421
    ///    to preserve the direction of the extension.
422
    /// - Term(ext) no unique extension possible, indicating the extensions at this end of the line
423
    fn try_extend_kmer(&self, kmer: K, dir: Dir) -> ExtMode<K> {
1,767,590✔
424
        // metadata of start kmer
1,767,590✔
425
        let (exts, ref kmer_data) = self.get_kmer_data(&kmer);
1,767,590✔
426

1,767,590✔
427
        // kmer is marked terminal if it has not one extension in one direction (if clear path always 1) 
1,767,590✔
428
        // or if the graph is not stranded and the kmer is a palindrome
1,767,590✔
429
        if exts.num_ext_dir(dir) != 1 || (!self.stranded && kmer.is_palindrome()) {
1,767,590✔
430
            ExtMode::Terminal(exts.single_dir(dir))
26,136✔
431
        } else {
432
            // Get the next kmer
433
            let ext_base = exts.get_unique_extension(dir).expect("should be unique");
1,741,454✔
434

1,741,454✔
435
            let mut next_kmer = kmer.extend(ext_base, dir);
1,741,454✔
436

1,741,454✔
437
            let mut do_flip = false;
1,741,454✔
438
            
1,741,454✔
439
            // decide if direction needs to be changed (how?????????) turn kmer into rc
1,741,454✔
440
            if !self.stranded {
1,741,454✔
441
                let flip_rc = next_kmer.min_rc_flip();
1,741,454✔
442
                do_flip = flip_rc.1;
1,741,454✔
443
                next_kmer = flip_rc.0;
1,741,454✔
444
            }
1,741,454✔
445

446
            let next_dir = dir.cond_flip(do_flip);
1,741,454✔
447
            let is_palindrome = !self.stranded && next_kmer.is_palindrome();
1,741,454✔
448

449
            // We can include this kmer in the line if:
450
            // a) it exists in the partition and is still unused
451
            // b) the kmer we go to has a unique extension back in our direction
452

453
            // Check condition a)
454
            match self.get_kmer_id(&next_kmer) {
1,741,454✔
455
                Some(id) if self.available_kmers.contains(id) => (),
1,717,919✔
456

457
                // This kmer isn't in this partition, or we've already used it
458
                _ => return ExtMode::Terminal(exts.single_dir(dir)),
46,905✔
459
            }
460

461
            // Check condition b)
462
            // Direction we're approaching the new kmer from
463
            let new_incoming_dir = dir.flip().cond_flip(do_flip);
1,694,549✔
464
            let next_kmer_r = self.get_kmer_data(&next_kmer);
1,694,549✔
465
            let (next_kmer_exts, ref next_kmer_data) = next_kmer_r;
1,694,549✔
466
            let incoming_count = next_kmer_exts.num_ext_dir(new_incoming_dir);
1,694,549✔
467
            let outgoing_exts = next_kmer_exts.single_dir(new_incoming_dir.flip());
1,694,549✔
468

1,694,549✔
469
            // Test if the spec let's us combine these into the same path
1,694,549✔
470
            let can_join = self.spec.join_test(kmer_data, next_kmer_data);
1,694,549✔
471

1,694,549✔
472
            if incoming_count == 0 && !is_palindrome {
1,694,549✔
473
                println!("{:?}, {:?}, {:?}", kmer, exts, kmer_data);
×
474
                println!(
×
475
                    "{:?}, {:?}, {:?}",
×
476
                    next_kmer, next_kmer_exts, next_kmer_data
×
477
                );
×
478
                panic!("unreachable");
×
479
            } else if can_join && incoming_count == 1 && !is_palindrome {
1,694,549✔
480
                // We have a unique path to next_kmer -- include it
481
                ExtMode::Unique(next_kmer, next_dir, outgoing_exts)
1,654,804✔
482
            } else {
483
                // there's more than one path
484
                // into the target kmer - don't include it
485
                ExtMode::Terminal(exts.single_dir(dir))
39,745✔
486
            }
487
        }
488
    }
1,767,590✔
489

490
    /// Attempt to extend kmer v in direction dir. Return:
491
    ///  - Unique(nextKmer, nextDir) if a single unique extension
492
    ///    is possible.  nextDir indicates the direction to extend nextMker
493
    ///    to preserve the direction of the extension.
494
    /// - Term(ext) no unique extension possible, indicating the extensions at this end of the line
495
    fn try_extend_kmer_par(&self, kmer: K, dir: Dir, path: &mut [(K, Dir)]) -> ExtMode<K> {
12,006✔
496
        // metadata of start kmer
12,006✔
497
        let (exts, ref kmer_data) = self.get_kmer_data(&kmer);
12,006✔
498

12,006✔
499
        // kmer is marked terminal if it has not one extension in one direction (if clear path always 1) 
12,006✔
500
        // or if the graph is not stranded and the kmer is a palindrome
12,006✔
501
        if exts.num_ext_dir(dir) != 1 || (!self.stranded && kmer.is_palindrome()) {
12,006✔
502
            ExtMode::Terminal(exts.single_dir(dir))
388✔
503
        } else {
504
            // Get the next kmer
505
            let ext_base = exts.get_unique_extension(dir).expect("should be unique");
11,618✔
506

11,618✔
507
            let mut next_kmer = kmer.extend(ext_base, dir);
11,618✔
508

11,618✔
509
            let mut do_flip = false;
11,618✔
510
            
11,618✔
511
            // decide if direction needs to be changed (how?????????) turn kmer into rc
11,618✔
512
            if !self.stranded {
11,618✔
513
                let flip_rc = next_kmer.min_rc_flip();
11,618✔
514
                do_flip = flip_rc.1;
11,618✔
515
                next_kmer = flip_rc.0;
11,618✔
516
            }
11,618✔
517

518
            let next_dir = dir.cond_flip(do_flip);
11,618✔
519
            let is_palindrome = !self.stranded && next_kmer.is_palindrome();
11,618✔
520

521
            // We can include this kmer in the line if:
522
            // a) it exists in the partition and is still unused
523
            // b) the kmer we go to has a unique extension back in our direction
524

525
            // condition a) is skipped cause of parallel process
526
            // instead check if already in path
527
            // might be time consuming, can try to replace with hashmap later
528
            // to avoid circluar graphs leading infinite loops
529
            // also check if just checking for next_dir would suffice
530

531
            if path.contains(&(next_kmer, Dir::Left)) {
11,618✔
532
                // The next kmer is already in the path, break loop
533
                return ExtMode::Terminal(exts.single_dir(dir))
×
534
            }
11,618✔
535

11,618✔
536
            if path.contains(&(next_kmer, Dir::Right)) {
11,618✔
537
                // The next kmer is already in the path, break loop
538
                return ExtMode::Terminal(exts.single_dir(dir))
×
539
            }
11,618✔
540

11,618✔
541
            // Check condition b)
11,618✔
542
            // Direction we're approaching the new kmer from
11,618✔
543
            let new_incoming_dir = dir.flip().cond_flip(do_flip);
11,618✔
544
            let next_kmer_r = self.get_kmer_data(&next_kmer);
11,618✔
545
            let (next_kmer_exts, ref next_kmer_data) = next_kmer_r;
11,618✔
546
            let incoming_count = next_kmer_exts.num_ext_dir(new_incoming_dir);
11,618✔
547
            let outgoing_exts = next_kmer_exts.single_dir(new_incoming_dir.flip());
11,618✔
548

11,618✔
549
            // Test if the spec let's us combine these into the same path
11,618✔
550
            let can_join = self.spec.join_test(kmer_data, next_kmer_data);
11,618✔
551

11,618✔
552
            if incoming_count == 0 && !is_palindrome {
11,618✔
553
                println!("{:?}, {:?}, {:?}", kmer, exts, kmer_data);
×
554
                println!(
×
555
                    "{:?}, {:?}, {:?}",
×
556
                    next_kmer, next_kmer_exts, next_kmer_data
×
557
                );
×
558
                panic!("unreachable");
×
559
            } else if can_join && incoming_count == 1 && !is_palindrome {
11,618✔
560
                // We have a unique path to next_kmer -- include it
561
                ExtMode::Unique(next_kmer, next_dir, outgoing_exts)
11,394✔
562
            } else {
563
                // there's more than one path
564
                // into the target kmer - don't include it
565
                ExtMode::Terminal(exts.single_dir(dir))
224✔
566
            }
567
        }
568
    }
12,006✔
569

570

571
    /// Build the maximal line starting at kmer in direction dir, at most max_dist long.
572
    /// Also return the extensions at the end of this line.
573
    /// Sub-lines break if their extensions are not available in this shard
574
    #[inline(never)]
575
    fn extend_kmer(&mut self, kmer: K, start_dir: Dir, path: &mut Vec<(K, Dir)>) -> Exts {
112,786✔
576
        let mut current_dir = start_dir;
112,786✔
577
        let mut current_kmer = kmer;
112,786✔
578
        path.clear();
112,786✔
579

112,786✔
580
        let final_exts: Exts; // must get set below
112,786✔
581

112,786✔
582
        // get id of kmer and remove from available kmers
112,786✔
583
        let id = self.get_kmer_id(&kmer).expect("should have this kmer");
112,786✔
584
        let _ = self.available_kmers.remove(id);
112,786✔
585

586
        loop {
587
            let ext_result = self.try_extend_kmer(current_kmer, current_dir);
1,767,590✔
588

1,767,590✔
589
            match ext_result {
1,767,590✔
590
                ExtMode::Unique(next_kmer, next_dir, _) => {
1,654,804✔
591
                    path.push((next_kmer, next_dir));
1,654,804✔
592
                    let next_id = self.get_kmer_id(&next_kmer).expect("should have this kmer");
1,654,804✔
593
                    self.available_kmers.remove(next_id);
1,654,804✔
594
                    current_kmer = next_kmer;
1,654,804✔
595
                    current_dir = next_dir;
1,654,804✔
596
                }
1,654,804✔
597
                ExtMode::Terminal(ext) => {
112,786✔
598
                    final_exts = ext;
112,786✔
599
                    break;
112,786✔
600
                }
112,786✔
601
            }
112,786✔
602
        }
112,786✔
603

112,786✔
604
        final_exts
112,786✔
605
    }
112,786✔
606

607
    fn extend_kmer_par(&mut self, kmer: K, start_dir: Dir, path: &mut Vec<(K, Dir)>) -> Exts {
612✔
608
        let mut current_dir = start_dir;
612✔
609
        let mut current_kmer = kmer;
612✔
610
        path.clear();
612✔
611

612
        let final_exts: Exts; // must get set below
613

614
        loop {
615
            let ext_result = self.try_extend_kmer_par(current_kmer, current_dir, path);
12,006✔
616

12,006✔
617
            match ext_result {
12,006✔
618
                ExtMode::Unique(next_kmer, next_dir, _) => {
11,394✔
619
                    path.push((next_kmer, next_dir));
11,394✔
620
                    //let next_id = self.get_kmer_id(&next_kmer).expect("should have this kmer");
11,394✔
621
                    //self.available_kmers.remove(next_id);
11,394✔
622
                    current_kmer = next_kmer;
11,394✔
623
                    current_dir = next_dir;
11,394✔
624
                }
11,394✔
625
                ExtMode::Terminal(ext) => {
612✔
626
                    final_exts = ext;
612✔
627
                    break;
612✔
628
                }
612✔
629
            }
612✔
630
        }
612✔
631

612✔
632
        final_exts
612✔
633
    }
612✔
634

635
    /// Build the edge surrounding a kmer
636
    #[inline(never)]
637
    fn  build_node(
56,393✔
638
        &mut self,
56,393✔
639
        seed_id: usize,
56,393✔
640
        path: &mut Vec<(K, Dir)>,
56,393✔
641
        edge_seq: &mut VecDeque<u8>,
56,393✔
642
    ) -> (Exts, D) {
56,393✔
643
        let seed: K = *self.index.get_key(seed_id).expect("Index out of bound");
56,393✔
644
        edge_seq.clear();
56,393✔
645
        for i in 0..K::k() {
1,772,179✔
646
            edge_seq.push_back(seed.get(i));
1,772,179✔
647
        }
1,772,179✔
648

649
        let mut node_data = self.get_kmer_data(&seed).1.clone();
56,393✔
650

56,393✔
651
        // Unique path from seed kmer with Dir Left is built
56,393✔
652
        let l_ext = self.extend_kmer(seed, Dir::Left, path);
56,393✔
653

654

655
        // Add on the left path
656
        for &(next_kmer, dir) in path.iter() {
828,466✔
657
            let kmer = match dir {
828,466✔
658
                Dir::Left => next_kmer,
417,890✔
659
                Dir::Right => next_kmer.rc(),
410,576✔
660
            };
661

662
            edge_seq.push_front(kmer.get(0));
828,466✔
663

828,466✔
664
            // Reduce the data object
828,466✔
665
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
828,466✔
666
            node_data = self.spec.reduce(node_data, kmer_data)
828,466✔
667
        }
668

669
        let left_extend = match path.last() {
56,393✔
670
            None => l_ext,
22,248✔
671
            Some(&(_, Dir::Left)) => l_ext,
17,134✔
672
            Some(&(_, Dir::Right)) => l_ext.complement(),
17,011✔
673
        };
674

675

676
        // Unique path from seed kmer with Dir Right is built
677
        let r_ext = self.extend_kmer(seed, Dir::Right, path);
56,393✔
678

679
        // Add on the right path
680
        for &(next_kmer, dir) in path.iter() {
826,338✔
681
            let kmer = match dir {
826,338✔
682
                Dir::Left => next_kmer.rc(),
407,946✔
683
                Dir::Right => next_kmer,
418,392✔
684
            };
685

686
            edge_seq.push_back(kmer.get(K::k() - 1));
826,338✔
687

826,338✔
688
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
826,338✔
689
            node_data = self.spec.reduce(node_data, kmer_data)
826,338✔
690
        }
691

692
        let right_extend = match path.last() {
56,393✔
693
            None => r_ext,
21,917✔
694
            Some(&(_, Dir::Left)) => r_ext.complement(),
16,517✔
695
            Some(&(_, Dir::Right)) => r_ext,
17,959✔
696
        };
697
        
698
        (Exts::from_single_dirs(left_extend, right_extend), node_data)
56,393✔
699
    }
56,393✔
700

701

702
    /// Build the edge surrounding a kmer
703
    #[inline(never)]
704
    fn  build_node_start(
294✔
705
        &mut self,
294✔
706
        seed_id: usize,
294✔
707
        path: &mut Vec<(K, Dir)>,
294✔
708
        edge_seq: &mut VecDeque<u8>,
294✔
709
    ) -> (K, K) {
294✔
710
        let seed: K = *self.index.get_key(seed_id).expect("Index out of bound");
294✔
711
        edge_seq.clear();
294✔
712
        for i in 0..K::k() {
9,408✔
713
            edge_seq.push_back(seed.get(i));
9,408✔
714
        }
9,408✔
715

716
        let mut node_data = self.get_kmer_data(&seed).1.clone();
294✔
717

294✔
718
        // Unique path from seed kmer with Dir Left is built
294✔
719
        let _ = self.extend_kmer_par(seed, Dir::Left, path);
294✔
720

721
        // Add on the left path
722
        for &(next_kmer, dir) in path.iter() {
5,738✔
723
            let kmer = match dir {
5,738✔
724
                Dir::Left => next_kmer,
2,912✔
725
                Dir::Right => next_kmer.rc(),
2,826✔
726
            };
727

728
            edge_seq.push_front(kmer.get(0));
5,738✔
729

5,738✔
730
            // Reduce the data object
5,738✔
731
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
5,738✔
732
            node_data = self.spec.reduce(node_data, kmer_data)
5,738✔
733
        }
734

735
        // Unique path from seed kmer with Dir Right is built
736
        let _ = self.extend_kmer_par(seed, Dir::Right, path);
294✔
737

738
        // Add on the right path
739
        for &(next_kmer, dir) in path.iter() {
5,374✔
740
            let kmer = match dir {
5,374✔
741
                Dir::Left => next_kmer.rc(),
2,462✔
742
                Dir::Right => next_kmer,
2,912✔
743
            };
744

745
            edge_seq.push_back(kmer.get(K::k() - 1));
5,374✔
746

5,374✔
747
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
5,374✔
748
            node_data = self.spec.reduce(node_data, kmer_data)
5,374✔
749
        }
750

751
        let mut path_seq = PackedDnaStringSet::new();
294✔
752
        path_seq.add(edge_seq);
294✔
753
        let first = path_seq.sequence.get_kmer::<K>(0);
294✔
754
        let min_first = first.min_rc();
294✔
755
        let last = path_seq.sequence.get_kmer::<K>(path_seq.sequence.len()-K::k());
294✔
756
        let min_last = last.min_rc();
294✔
757

294✔
758
        if min_first < min_last {
294✔
759
            (min_first, min_last)
154✔
760
        } else {
761
            (min_last, min_first)
140✔
762
        }
763
    }
294✔
764

765
    #[inline(never)]
766
    fn  build_node_par(
12✔
767
        &mut self,
12✔
768
        seed_id: usize,
12✔
769
        path: &mut Vec<(K, Dir)>,
12✔
770
        edge_seq: &mut VecDeque<u8>,
12✔
771
    ) -> (Exts, D) {
12✔
772
        let seed: K = *self.index.get_key(seed_id).expect("Index out of bound");
12✔
773
        edge_seq.clear();
12✔
774
        for i in 0..K::k() {
384✔
775
            edge_seq.push_back(seed.get(i));
384✔
776
        }
384✔
777

778
        let mut node_data = self.get_kmer_data(&seed).1.clone();
12✔
779

12✔
780
        // Unique path from seed kmer with Dir Left is built
12✔
781
        let l_ext = self.extend_kmer_par(seed, Dir::Left, path);
12✔
782

783

784
        // Add on the left path
785
        for &(next_kmer, dir) in path.iter() {
175✔
786
            let kmer = match dir {
175✔
787
                Dir::Left => next_kmer,
91✔
788
                Dir::Right => next_kmer.rc(),
84✔
789
            };
790

791
            edge_seq.push_front(kmer.get(0));
175✔
792

175✔
793
            // Reduce the data object
175✔
794
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
175✔
795
            node_data = self.spec.reduce(node_data, kmer_data)
175✔
796
        }
797

798
        let left_extend = match path.last() {
12✔
799
            None => l_ext,
5✔
800
            Some(&(_, Dir::Left)) => l_ext,
4✔
801
            Some(&(_, Dir::Right)) => l_ext.complement(),
3✔
802
        };
803

804

805
        // Unique path from seed kmer with Dir Right is built
806
        let r_ext = self.extend_kmer_par(seed, Dir::Right, path);
12✔
807

808
        // Add on the right path
809
        for &(next_kmer, dir) in path.iter() {
107✔
810
            let kmer = match dir {
107✔
811
                Dir::Left => next_kmer.rc(),
42✔
812
                Dir::Right => next_kmer,
65✔
813
            };
814

815
            edge_seq.push_back(kmer.get(K::k() - 1));
107✔
816

107✔
817
            let (_, kmer_data) = self.get_kmer_data(&next_kmer);
107✔
818
            node_data = self.spec.reduce(node_data, kmer_data)
107✔
819
        }
820

821
        let right_extend = match path.last() {
12✔
822
            None => r_ext,
7✔
823
            Some(&(_, Dir::Left)) => r_ext.complement(),
2✔
824
            Some(&(_, Dir::Right)) => r_ext,
3✔
825
        };
826
        
827
        (Exts::from_single_dirs(left_extend, right_extend), node_data)
12✔
828
    }
12✔
829

830
    /// Compress a set of kmers and their extensions and metadata into a base DeBruijn graph.
831
    #[inline(never)]
832
    pub fn compress_kmers(
3,960✔
833
        stranded: bool,
3,960✔
834
        spec: &S,
3,960✔
835
        index: &BoomHashMap2<K, Exts, D>,
3,960✔
836
        progress: bool,
3,960✔
837
    ) -> BaseGraph<K, D> {
3,960✔
838
        
3,960✔
839
        let n_kmers = index.len();
3,960✔
840
        let mut available_kmers = BitSet::with_capacity(n_kmers);
3,960✔
841
        let progress = if n_kmers < 128 { false } else { progress };
3,960✔
842

843
        for i in 0..n_kmers {
1,711,197✔
844
            available_kmers.insert(i);
1,711,197✔
845
        }
1,711,197✔
846

847
        let mut comp = CompressFromHash {
3,960✔
848
            stranded,
3,960✔
849
            spec,
3,960✔
850
            k: PhantomData,
3,960✔
851
            d: PhantomData,
3,960✔
852
            available_kmers,
3,960✔
853
            index,
3,960✔
854
        };
3,960✔
855

3,960✔
856
        // Path-compressed De Bruijn graph will be created here
3,960✔
857
        let mut graph = BaseGraph::new(stranded);
3,960✔
858

3,960✔
859
        // Paths will be get assembled here
3,960✔
860
        let mut path_buf = Vec::new();
3,960✔
861

3,960✔
862
        // Node sequences will get assembled here
3,960✔
863
        let mut edge_seq_buf = VecDeque::new();
3,960✔
864

3,960✔
865
        debug!("n of kmers: {}", n_kmers);
3,960✔
866

867
        let steps = n_kmers as f32 / 128.;
3,960✔
868

3,960✔
869
        if progress {
3,960✔
870
            println!("Compressing kmers");
547✔
871
            for _i in 0..127 {
70,016✔
872
                print!("-");
69,469✔
873
            }
69,469✔
874
            println!("|");
547✔
875
        }
3,413✔
876

877
        let pb = ProgressBar::new(n_kmers as u64);
3,960✔
878
        pb.set_style(ProgressStyle::with_template("{msg} [{elapsed_precise}] {bar:60.cyan/blue} ({pos}/{len})").unwrap().progress_chars("#/-"));
3,960✔
879
        pb.set_message(format!("{:<32}", "compressing graph"));
3,960✔
880

881

882
        for kmer_counter in (0..n_kmers).progress_with(pb) {
1,711,197✔
883
            if progress && (kmer_counter as f32 % steps >= 0.) & (kmer_counter as f32 % steps < 1.) { print!("|")}
1,711,197✔
884

885
            if (kmer_counter as f32 % steps >= 0.) & (kmer_counter as f32 % steps < 1.) {
1,711,197✔
886
                debug!("another 1/128 done: {}, data graph size: {}", (kmer_counter as f32 / steps) as i32, mem::size_of_val(&*graph.data));
162,902✔
887
            }
1,548,295✔
888

889
            if comp.available_kmers.contains(kmer_counter) {
1,711,197✔
890
                let (node_exts, node_data) =
56,393✔
891
                    comp.build_node(kmer_counter, &mut path_buf, &mut edge_seq_buf);
56,393✔
892
                graph.add(&edge_seq_buf, node_exts, node_data);
56,393✔
893
            }
1,654,804✔
894
        }
895

896
        if progress { println!() };
3,960✔
897

898
        graph
3,960✔
899
    }
3,960✔
900

901
    /// Compress a set of kmers and their extensions and metadata into a base DeBruijn graph, utilizing multithreading
902
    #[inline(never)]
903
    pub fn compress_kmers_parallel(
2✔
904
        stranded: bool,
2✔
905
        spec: &S,
2✔
906
        index: &BoomHashMap2<K, Exts, D>,
2✔
907
        progress: bool,
2✔
908
    ) -> BaseGraph<K, D> {
2✔
909
        
2✔
910
        let n_kmers = index.len();
2✔
911
        let progress = if n_kmers < 128 { false } else { progress };
2✔
912

913
        // calculate the slices of the workload according to the available threads
914
        debug!("current num threads: {}", current_num_threads());
2✔
915
        debug!("n of kmers: {}", n_kmers);
2✔
916

917
        let slices = current_num_threads();
2✔
918
        let sz = n_kmers / slices + 1;
2✔
919

2✔
920
        debug!("n_kmers: {}", n_kmers);
2✔
921
        debug!("sz: {}", sz);
2✔
922

923
        let mut parallel_ranges = Vec::with_capacity(slices);
2✔
924
        let mut start = 0;
2✔
925
        while start < n_kmers {
10✔
926
            parallel_ranges.push(start..start + sz);
8✔
927
            start += sz;
8✔
928
        }
8✔
929

930
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
2✔
931
        parallel_ranges.push(last_start..n_kmers);
2✔
932
        debug!("parallel ranges: {:?}", parallel_ranges);
2✔
933

934
        let all_start_end_kmers: Arc<Mutex<HashMap<K, K>>> = Arc::new(Mutex::new(HashMap::new()));
2✔
935

2✔
936
        let steps = n_kmers as f32 / 128.;
2✔
937
        if progress {
2✔
938
            println!("Compressing kmers\nStep 1 of 2");
2✔
939
            for _i in 0..127 {
256✔
940
                print!("-");
254✔
941
            }
254✔
942
            println!("|");
2✔
943
        }
×
944
        
945
        // go through all kmers and find the start and end kmer of each compressable sequence node
946
        parallel_ranges.into_par_iter().for_each(|range| {
8✔
947

8✔
948
            let mut path_buf = Vec::new();
8✔
949
            let mut edge_seq_buf = VecDeque::new();
8✔
950
            //let mut start_end_kmers: Vec<(K, K)> = Vec::new();
8✔
951

8✔
952
            let range2 = range.clone();
8✔
953

954
            for kmer_counter in range {
302✔
955

956
                if progress && (kmer_counter as f32 % steps >= 0.) & (kmer_counter as f32 % steps < 1.) { print!("|")}
294✔
957

958
                let mut comp = CompressFromHash {
294✔
959
                    stranded,
294✔
960
                    spec,
294✔
961
                    k: PhantomData,
294✔
962
                    d: PhantomData,
294✔
963
                    available_kmers: BitSet::new(),
294✔
964
                    index,
294✔
965
                };               
294✔
966

294✔
967
                let kmers = comp.build_node_start(kmer_counter, &mut path_buf, &mut edge_seq_buf);
294✔
968

294✔
969
                let all_clone = Arc::clone(&all_start_end_kmers);
294✔
970
                let mut all_lock = all_clone.lock().expect("lock all_start_end_kmers");
294✔
971
                all_lock.entry(kmers.0).or_insert(kmers.1);
294✔
972
            }
973

974
            debug!("finished range: {:?}", range2);
8✔
975
        });
8✔
976

2✔
977
        if progress { println!() }
2✔
978

979
        // all the kmers which occurr at the beginning or end of a node are soerted and deduped
980
        // resulting in one (K, K) per node
981
        let all_start_end_kmers: Vec<(K, K)> = all_start_end_kmers.lock().expect("final lock all_start_end_kmers").clone().into_iter().collect();
2✔
982

2✔
983
        //all_start_end_kmers.sort();
2✔
984
        //all_start_end_kmers.dedup();
2✔
985

2✔
986
        //debug!("all start and end kmers: {:?}", all_start_end_kmers);
2✔
987

2✔
988
        // all graphs constructed by the respective threads are pushed into this and later combined
2✔
989
        let graphs: Arc<Mutex<Vec<BaseGraph<K, D>>>> = Arc::new(Mutex::new(Vec::with_capacity(current_num_threads())));
2✔
990
        
2✔
991
        // calculate the workload has to be sliced depending on the threads
2✔
992
        let n_starts = all_start_end_kmers.len();
2✔
993
        let slices = current_num_threads();
2✔
994
        let sz = n_starts / slices + 1;
2✔
995

2✔
996
        debug!("n start kmers: {}", all_start_end_kmers.len());
2✔
997
        debug!("sz: {}", sz);
2✔
998

999
        let mut parallel_ranges = Vec::with_capacity(slices);
2✔
1000
        let mut start = 0;
2✔
1001
        while start < n_starts {
8✔
1002
            parallel_ranges.push(start..start + sz);
6✔
1003
            start += sz;
6✔
1004
        }
6✔
1005

1006
        let last_start = parallel_ranges.pop().expect("no kmers in parallel ranges").start;
2✔
1007
        parallel_ranges.push(last_start..n_starts);
2✔
1008

2✔
1009
        debug!("parallel ranges 2: {:?}", parallel_ranges);
2✔
1010

1011
        let steps = n_starts as f32 / 128.;
2✔
1012
        if progress {
2✔
1013
            println!("Step 2 of 2");
2✔
1014
            for _i in 0..127 {
256✔
1015
                print!("-");
254✔
1016
            }
254✔
1017
            println!("|");
2✔
1018
        }
×
1019
        let short_progress = n_starts < 100;
2✔
1020

2✔
1021
        // go trough all start kmers and find the corresponding sequence, add them to the partial graph
2✔
1022
        parallel_ranges.into_par_iter().for_each(|range| {
6✔
1023

6✔
1024
            let mut graph: BaseGraph<K, D> = BaseGraph::new(stranded);
6✔
1025

6✔
1026
            let range2 = range.clone();
6✔
1027

1028
            for (i, (start, _)) in all_start_end_kmers[range].iter().enumerate() {   
12✔
1029

1030
                if progress & !short_progress && (i as f32 % steps >= 0.) & (i as f32 % steps < 1.) { print!("|")}
12✔
1031

1032
                let mut path_buf = Vec::new();
12✔
1033
                let mut edge_seq_buf = VecDeque::new();
12✔
1034
                let mut comp = CompressFromHash {
12✔
1035
                    stranded,
12✔
1036
                    spec,
12✔
1037
                    k: PhantomData,
12✔
1038
                    d: PhantomData,
12✔
1039
                    available_kmers: BitSet::new(),
12✔
1040
                    index,
12✔
1041
                };      
12✔
1042
                let (node_exts, node_data) = comp.build_node_par(comp.index.get_key_id(start).expect("get kmer id from index, should exist"), &mut path_buf, &mut edge_seq_buf);
12✔
1043
                graph.add(&edge_seq_buf, node_exts, node_data);
12✔
1044
            }
1045

1046
            debug!("finished range: {:?}, subgraph data size: {}", range2, mem::size_of_val(&*graph.data));
6✔
1047

1048
            let graphs_clone = Arc::clone(&graphs);
6✔
1049
            let mut graphs_lock = graphs_clone.lock().expect("lock graphs to push new graph");
6✔
1050
            graphs_lock.push(graph);
6✔
1051

6✔
1052
        });
6✔
1053

2✔
1054
        if progress & short_progress {
2✔
1055
            for _i in 0..128 {
258✔
1056
                print!("|");
256✔
1057
            }
256✔
1058
        }
×
1059
        
1060
        if progress { println!() }
2✔
1061

1062
        // combine the graphs and return the resulting graph
1063
        let graph = BaseGraph::combine(graphs.lock().expect("final graph lock").clone().into_iter());
2✔
1064
        graph
2✔
1065
    }
2✔
1066
}
1067

1068

1069
/// Take a BoomHash Object and build a compressed DeBruijn graph.
1070
#[inline(never)]
1071
pub fn compress_kmers_with_hash<K: Kmer + Send + Sync, D: Clone + Debug + Send + Sync, S: CompressionSpec<D> + Send + Sync>(
3,962✔
1072
    stranded: bool,
3,962✔
1073
    spec: &S,
3,962✔
1074
    index: &BoomHashMap2<K, Exts, D>,
3,962✔
1075
    time: bool,
3,962✔
1076
    parallel: bool,
3,962✔
1077
    progress: bool,
3,962✔
1078
) -> BaseGraph<K, D> {
3,962✔
1079
    let before_compression = Instant::now();
3,962✔
1080
    let graph = if !parallel {CompressFromHash::<K, D, S>::compress_kmers(stranded, spec, index, progress) } else {
3,962✔
1081
        CompressFromHash::<K, D, S>::compress_kmers_parallel(stranded, spec, index, progress)};
2✔
1082
    if time { println!("time compression (s): {}", before_compression.elapsed().as_secs_f32()) }
3,962✔
1083
    graph
3,962✔
1084
}
3,962✔
1085

1086
/// Take (make) a BoomHash Object and build a compressed DeBruijn graph.
1087
#[inline(never)]
1088
pub fn compress_kmers<K: Kmer + Send + Sync, D: Clone + Debug  + Send + Sync, S: CompressionSpec<D> + Send + Sync>(
×
1089
    stranded: bool,
×
1090
    spec: &S,
×
1091
    kmer_exts: &[(K, (Exts, D))],
×
1092
) -> BaseGraph<K, D> {
×
1093
    let mut keys = Vec::with_capacity(kmer_exts.len());
×
1094
    let mut exts = Vec::with_capacity(kmer_exts.len());
×
1095
    let mut data = Vec::with_capacity(kmer_exts.len());
×
1096

1097
    for (k, (e, d)) in kmer_exts {
×
1098
        keys.push(*k);
×
1099
        data.push(d.clone());
×
1100
        exts.push(*e);
×
1101
    }
×
1102

1103
    let index = BoomHashMap2::new(keys, exts, data);
×
1104
    CompressFromHash::<K, D, S>::compress_kmers(stranded, spec, &index, false)
×
1105
}
×
1106

1107
/// Build graph from a set of kmers with unknown extensions by finding the extensions on the fly.
1108
#[inline(never)]
1109
pub fn compress_kmers_no_exts<K: Kmer + Send + Sync, D: Clone + Debug + Send + Sync, S: CompressionSpec<D> + Send + Sync>(
×
1110
    stranded: bool,
×
1111
    spec: &S,
×
1112
    kmer_exts: &[(K, D)],
×
1113
) -> BaseGraph<K, D> {
×
1114
    let kmer_set: std::collections::HashSet<_> = kmer_exts.iter().map(|(k, _)| k).collect();
×
1115

×
1116
    let can = |k: K| k.min_rc();
×
1117

1118
    let mut keys = Vec::with_capacity(kmer_exts.len());
×
1119
    let mut exts = Vec::with_capacity(kmer_exts.len());
×
1120
    let mut data = Vec::with_capacity(kmer_exts.len());
×
1121
    for (k, d) in kmer_exts {
×
1122
        let mut e = Exts::empty();
×
1123

1124
        for l in 0..4 {
×
1125
            let new = can(k.extend_left(l));
×
1126

×
1127
            if kmer_set.contains(&new) {
×
1128
                e = e.set(Dir::Left, l);
×
1129
            }
×
1130
        }
1131

1132
        for r in 0..4 {
×
1133
            let new = can(k.extend_right(r));
×
1134

×
1135
            if kmer_set.contains(&new) {
×
1136
                e = e.set(Dir::Right, r);
×
1137
            }
×
1138
        }
1139

1140
        keys.push(*k);
×
1141
        data.push(d.clone());
×
1142
        exts.push(e);
×
1143
    }
1144

1145
    assert_eq!(kmer_set.len(), keys.len());
×
1146

1147
    let index = BoomHashMap2::new(keys, exts, data);
×
1148
    CompressFromHash::<K, D, S>::compress_kmers(stranded, spec, &index,false)
×
1149
}
×
1150

1151
/// assumes stranded = false
1152
pub fn uncompressed_graph<K: Kmer, D: Clone + Debug>(
×
1153
    index: &BoomHashMap2<K, Exts, D>,
×
1154
) -> BaseGraph<K, D> {
×
1155

×
1156
    let mut graph: BaseGraph<K, D> = BaseGraph::new(false);
×
1157
    let mut kmer_seq: VecDeque<u8> = VecDeque::with_capacity(K::k());
×
1158

UNCOV
1159
    for (kmer, exts, data) in index.into_iter() {
×
UNCOV
1160
        kmer_seq.clear();
×
1161
        for i in 0..K::k() {
×
1162
            kmer_seq.push_back(kmer.get(i));
×
1163
        }
×
NEW
1164
        graph.add(&kmer_seq, *exts, data.clone());
×
1165
    }
1166
    graph
×
1167
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc