• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 14999125721

13 May 2025 02:22PM UTC coverage: 49.654% (-5.7%) from 55.382%
14999125721

push

github

zommiommy
wip hyperball cli

4 of 147 new or added lines in 4 files covered. (2.72%)

477 existing lines in 46 files now uncovered.

3663 of 7377 relevant lines covered (49.65%)

25650535.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.1
/webgraph/src/graphs/bvgraph/comp/bvcomp.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use crate::prelude::*;
9
use core::cmp::Ordering;
10
use dsi_bitstream::codes::ToNat;
11
use lender::prelude::*;
12

13
/// A BvGraph compressor, this is used to compress a graph into a BvGraph
14
#[derive(Debug, Clone)]
15
pub struct BvComp<E> {
16
    /// The ring-buffer that stores the neighbors of the last
17
    /// `compression_window` neighbors
18
    backrefs: CircularBuffer<Vec<usize>>,
19
    /// The ring-buffer that stores how many recursion steps are needed to
20
    /// decode the last `compression_window` nodes, this is used for
21
    /// `max_ref_count` which is used to modulate the compression / decoding
22
    /// speed tradeoff
23
    ref_counts: CircularBuffer<usize>,
24
    /// The bitstream writer, this implements the mock function so we can
25
    /// do multiple tentative compressions and use the real one once we figured
26
    /// out how to compress the graph best
27
    encoder: E,
28
    /// When compressing we need to store metadata. So we store the compressors
29
    /// to reuse the allocations for perf reasons.
30
    compressors: Vec<Compressor>,
31
    /// The number of previous nodes that will be considered during the compression
32
    compression_window: usize,
33
    /// The maximum recursion depth that will be used to decompress a node
34
    max_ref_count: usize,
35
    /// The minimum length of sequences that will be compressed as a (start, len)
36
    min_interval_length: usize,
37
    /// The current node we are compressing
38
    curr_node: usize,
39
    /// The first node we are compressing, this is needed because during
40
    /// parallel compression we need to work on different chunks
41
    start_node: usize,
42
    /// The number of arcs compressed so far
43
    pub arcs: u64,
44
}
45

46
#[derive(Debug, Clone, PartialEq, Eq)]
47
/// Compute how to encode the successors of a node, given a reference node.
48
/// This could be a function, but we made it a struct so we can reuse the
49
/// allocations for performance reasons
50
struct Compressor {
51
    /// The outdegree of the node we are compressing
52
    outdegree: usize,
53
    /// The blocks of nodes we are copying from the reference node
54
    blocks: Vec<usize>,
55
    /// The non-copied nodes
56
    extra_nodes: Vec<usize>,
57
    /// The starts of the intervals
58
    left_interval: Vec<usize>,
59
    /// The lengths of the intervals
60
    len_interval: Vec<usize>,
61
    /// The nodes left to encode as gaps
62
    residuals: Vec<usize>,
63
}
64

65
impl Compressor {
66
    /// Constant used only to make the code more readable.
67
    /// When min_interval_length is 0, we don't use intervals, which might be
68
    /// counter-intuitive
69
    const NO_INTERVALS: usize = 0;
70

71
    /// Creates a new empty compressor
72
    fn new() -> Self {
1,744,246✔
73
        Compressor {
74
            outdegree: 0,
75
            blocks: Vec::with_capacity(1024),
3,488,492✔
76
            extra_nodes: Vec::with_capacity(1024),
3,488,492✔
77
            left_interval: Vec::with_capacity(1024),
3,488,492✔
78
            len_interval: Vec::with_capacity(1024),
1,744,246✔
79
            residuals: Vec::with_capacity(1024),
1,744,246✔
80
        }
81
    }
82

83
    /// Writes the current node to the bitstream, this dumps the internal
84
    /// buffers which are initialized by calling `compress` so this has to be
85
    /// called only after `compress`.
86
    ///
87
    /// This returns the number of bits written.
88
    fn write<E: Encode>(
88,169,870✔
89
        &self,
90
        writer: &mut E,
91
        curr_node: usize,
92
        reference_offset: Option<usize>,
93
        min_interval_length: usize,
94
    ) -> Result<u64, E::Error> {
95
        let mut written_bits: u64 = 0;
264,509,610✔
96
        written_bits += writer.start_node(curr_node)? as u64;
264,509,610✔
97
        // write the outdegree
98
        written_bits += writer.write_outdegree(self.outdegree as u64)? as u64;
88,169,870✔
99
        // write the references
100
        if self.outdegree != 0 {
88,169,870✔
101
            if let Some(reference_offset) = reference_offset {
150,578,253✔
102
                written_bits += writer.write_reference_offset(reference_offset as u64)? as u64;
×
103
                if reference_offset != 0 {
74,528,927✔
104
                    written_bits += writer.write_block_count(self.blocks.len() as _)? as u64;
221,251,128✔
105
                    if !self.blocks.is_empty() {
55,312,782✔
106
                        for i in 0..self.blocks.len() {
152,960,015✔
107
                            written_bits += writer.write_block((self.blocks[i] - 1) as u64)? as u64;
317,860,173✔
108
                        }
109
                    }
110
                }
111
            }
112
        }
113
        // write the intervals
114
        if !self.extra_nodes.is_empty() && min_interval_length != Self::NO_INTERVALS {
155,910,128✔
115
            written_bits += writer.write_interval_count(self.left_interval.len() as _)? as u64;
243,452,156✔
116

117
            if !self.left_interval.is_empty() {
60,863,039✔
118
                written_bits += writer.write_interval_start(
14,437,504✔
UNCOV
119
                    (self.left_interval[0] as i64 - curr_node as i64).to_nat(),
×
120
                )? as u64;
×
121
                written_bits += writer
14,437,504✔
UNCOV
122
                    .write_interval_len((self.len_interval[0] - min_interval_length) as u64)?
×
123
                    as u64;
×
124
                let mut prev = self.left_interval[0] + self.len_interval[0];
14,437,504✔
125

126
                for i in 1..self.left_interval.len() {
8,301,144✔
127
                    written_bits += writer
8,301,144✔
128
                        .write_interval_start((self.left_interval[i] - prev - 1) as u64)?
16,602,288✔
129
                        as u64;
×
130
                    written_bits += writer
8,301,144✔
UNCOV
131
                        .write_interval_len((self.len_interval[i] - min_interval_length) as u64)?
×
132
                        as u64;
×
133
                    prev = self.left_interval[i] + self.len_interval[i];
8,301,144✔
134
                }
135
            }
136
        }
137
        // write the residuals
138
        if !self.residuals.is_empty() {
88,169,870✔
139
            written_bits += writer
66,442,823✔
UNCOV
140
                .write_first_residual((self.residuals[0] as i64 - curr_node as i64).to_nat())?
×
141
                as u64;
×
142

143
            for i in 1..self.residuals.len() {
324,303,635✔
144
                written_bits += writer
257,860,812✔
145
                    .write_residual((self.residuals[i] - self.residuals[i - 1] - 1) as u64)?
773,582,436✔
146
                    as u64;
×
147
            }
148
        }
149

150
        written_bits += writer.end_node(curr_node)? as u64;
88,169,870✔
151
        Ok(written_bits)
88,169,870✔
152
    }
153

154
    #[inline(always)]
155
    /// Reset the compressor for a new compression
156
    fn clear(&mut self) {
117,499,066✔
157
        self.outdegree = 0;
117,499,066✔
158
        self.blocks.clear();
234,998,132✔
159
        self.extra_nodes.clear();
234,998,132✔
160
        self.left_interval.clear();
234,998,132✔
161
        self.len_interval.clear();
234,998,132✔
162
        self.residuals.clear();
234,998,132✔
163
    }
164

165
    /// setup the internal buffers for the compression of the given values
166
    fn compress(
117,499,066✔
167
        &mut self,
168
        curr_list: &[usize],
169
        ref_list: Option<&[usize]>,
170
        min_interval_length: usize,
171
    ) -> anyhow::Result<()> {
172
        self.clear();
234,998,132✔
173
        self.outdegree = curr_list.len();
117,499,066✔
174

175
        if self.outdegree != 0 {
117,499,066✔
176
            if let Some(ref_list) = ref_list {
188,847,181✔
177
                self.diff_comp(curr_list, ref_list);
178
            } else {
179
                self.extra_nodes.extend(curr_list)
78,372,645✔
180
            }
181

182
            if !self.extra_nodes.is_empty() {
107,485,698✔
183
                if min_interval_length != Self::NO_INTERVALS {
182,034,440✔
184
                    self.intervalize(min_interval_length);
162,681,780✔
185
                } else {
186
                    self.residuals.extend(&self.extra_nodes);
19,352,660✔
187
                }
188
            }
189
        }
190
        debug_assert_eq!(self.left_interval.len(), self.len_interval.len());
587,495,330✔
191
        Ok(())
117,499,066✔
192
    }
193

194
    /// Get the extra nodes, compute all the intervals of consecutive nodes
195
    /// longer than min_interval_length and put the rest in the residuals
196
    fn intervalize(&mut self, min_interval_length: usize) {
81,340,890✔
197
        let vl = self.extra_nodes.len();
244,022,670✔
198
        let mut i = 0;
162,681,780✔
199

200
        while i < vl {
567,559,848✔
201
            let mut j = 0;
486,218,958✔
202
            if i < vl - 1 && self.extra_nodes[i] + 1 == self.extra_nodes[i + 1] {
823,204,566✔
203
                j += 1;
77,111,992✔
204
                while i + j < vl - 1 && self.extra_nodes[i + j] + 1 == self.extra_nodes[i + j + 1] {
1,326,927,992✔
205
                    j += 1;
166,787,196✔
206
                }
207
                j += 1;
77,111,992✔
208

209
                // Now j is the number of integers in the interval.
210
                if j >= min_interval_length {
111,511,981✔
211
                    self.left_interval.push(self.extra_nodes[i]);
137,599,956✔
212
                    self.len_interval.push(j);
103,199,967✔
213
                    i += j - 1;
34,399,989✔
214
                }
215
            }
216
            if j < min_interval_length {
451,818,969✔
217
                self.residuals.push(self.extra_nodes[i]);
1,355,456,907✔
218
            }
219

220
            i += 1;
221
        }
222
    }
223

224
    /// Compute the copy blocks and the ignore blocks.
225
    /// The copy blocks are blocks of nodes we will copy from the reference node.
226
    fn diff_comp(&mut self, curr_list: &[usize], ref_list: &[usize]) {
81,361,483✔
227
        // j is the index of the next successor of the current node we must examine
228
        let mut j = 0;
162,722,966✔
229
        // k is the index of the next successor of the reference node we must examine
230
        let mut k = 0;
162,722,966✔
231
        // currBlockLen is the number of entries (in the reference list) we have already copied/ignored (in the current block)
232
        let mut curr_block_len = 0;
162,722,966✔
233
        // copying is true iff we are producing a copy block (instead of an ignore block)
234
        let mut copying = true;
162,722,966✔
235

236
        while j < curr_list.len() && k < ref_list.len() {
2,147,483,647✔
237
            // First case: we are currently copying entries from the reference list
238
            if copying {
1,420,642,871✔
239
                match curr_list[j].cmp(&ref_list[k]) {
1,127,382,692✔
240
                    Ordering::Greater => {
108,860,815✔
241
                        /* If while copying we trespass the current element of the reference list,
108,860,815✔
242
                        we must stop copying. */
108,860,815✔
243
                        self.blocks.push(curr_block_len);
435,443,260✔
244
                        copying = false;
108,860,815✔
245
                        curr_block_len = 0;
108,860,815✔
246
                    }
247
                    Ordering::Less => {
122,512,341✔
248
                        /* If while copying we find a non-matching element of the reference list which
122,512,341✔
249
                        is larger than us, we can just add the current element to the extra list
122,512,341✔
250
                        and move on. j gets increased. */
122,512,341✔
251
                        self.extra_nodes.push(curr_list[j]);
122,512,341✔
252
                        j += 1;
122,512,341✔
253
                    }
254
                    Ordering::Equal => {
332,318,190✔
255
                        // currList[j] == refList[k]
256
                        /* If the current elements of the two lists are equal, we just increase the block length.
332,318,190✔
257
                        both j and k get increased. */
332,318,190✔
258
                        j += 1;
664,636,380✔
259
                        k += 1;
332,318,190✔
260
                        curr_block_len += 1;
332,318,190✔
261
                        // if (forReal) copiedArcs++;
262
                    }
263
                }
264
            } else {
265
                match curr_list[j].cmp(&ref_list[k]) {
856,951,525✔
266
                    Ordering::Greater => {
476,808,956✔
267
                        /* If we trespassed the current element of the reference list, we
476,808,956✔
268
                        increase the block length. k gets increased. */
476,808,956✔
269
                        k += 1;
476,808,956✔
270
                        curr_block_len += 1;
476,808,956✔
271
                    }
272
                    Ordering::Less => {
321,128,687✔
273
                        /* If we did not trespass the current element of the reference list, we just
321,128,687✔
274
                        add the current element to the extra list and move on. j gets increased. */
321,128,687✔
275
                        self.extra_nodes.push(curr_list[j]);
321,128,687✔
276
                        j += 1;
321,128,687✔
277
                    }
278
                    Ordering::Equal => {
59,013,882✔
279
                        // currList[j] == refList[k]
280
                        /* If we found a match we flush the current block and start a new copying phase. */
59,013,882✔
281
                        self.blocks.push(curr_block_len);
236,055,528✔
282
                        copying = true;
59,013,882✔
283
                        curr_block_len = 0;
59,013,882✔
284
                    }
285
                }
286
            }
287
        }
288
        /* We do not record the last block. The only case when we have to enqueue the last block's length
289
         * is when we were copying and we did not copy up to the end of the reference list.
290
         */
291
        if copying && k < ref_list.len() {
153,875,728✔
292
            self.blocks.push(curr_block_len);
18,970,290✔
293
        }
294

295
        // If there are still missing elements, we add them to the extra list.
296
        while j < curr_list.len() {
496,612,862✔
297
            self.extra_nodes.push(curr_list[j]);
111,296,632✔
298
            j += 1;
111,296,632✔
299
        }
300
        // add a 1 to the first block so we can uniformly write them later
301
        if !self.blocks.is_empty() {
156,024,783✔
302
            self.blocks[0] += 1;
74,663,300✔
303
        }
304
    }
305
}
306

307
impl<E: EncodeAndEstimate> BvComp<E> {
308
    /// This value for `min_interval_length` implies that no intervalization will be performed.
309
    pub const NO_INTERVALS: usize = Compressor::NO_INTERVALS;
310

311
    /// Creates a new BvGraph compressor.
312
    pub fn new(
38,143✔
313
        encoder: E,
314
        compression_window: usize,
315
        max_ref_count: usize,
316
        min_interval_length: usize,
317
        start_node: usize,
318
    ) -> Self {
319
        BvComp {
320
            backrefs: CircularBuffer::new(compression_window + 1),
76,286✔
321
            ref_counts: CircularBuffer::new(compression_window + 1),
76,286✔
322
            encoder,
323
            min_interval_length,
324
            compression_window,
325
            max_ref_count,
326
            start_node,
327
            curr_node: start_node,
328
            compressors: (0..compression_window + 1)
38,143✔
329
                .map(|_| Compressor::new())
330
                .collect(),
331
            arcs: 0,
332
        }
333
    }
334

335
    /// Push a new node to the compressor.
336
    /// The iterator must yield the successors of the node and the nodes HAVE
337
    /// TO BE CONTIGUOUS (i.e. if a node has no neighbors you have to pass an
338
    /// empty iterator)
339
    pub fn push<I: IntoIterator<Item = usize>>(&mut self, succ_iter: I) -> anyhow::Result<u64> {
12,484,903✔
340
        // collect the iterator inside the backrefs, to reuse the capacity already
341
        // allocated
342
        {
343
            let mut succ_vec = self.backrefs.take(self.curr_node);
62,424,515✔
344
            succ_vec.clear();
37,454,709✔
345
            succ_vec.extend(succ_iter);
49,939,612✔
346
            self.backrefs.replace(self.curr_node, succ_vec);
37,454,709✔
347
        }
348
        // get the ref
349
        let curr_list = &self.backrefs[self.curr_node];
24,969,806✔
350
        self.arcs += curr_list.len() as u64;
12,484,903✔
351
        // first try to compress the current node without references
352
        let compressor = &mut self.compressors[0];
24,969,806✔
353
        // Compute how we would compress this
354
        compressor.compress(curr_list, None, self.min_interval_length)?;
62,424,515✔
355
        // avoid the mock writing
356
        if self.compression_window == 0 {
12,484,903✔
357
            let written_bits = compressor.write(
5,362,917✔
358
                &mut self.encoder,
1,787,639✔
359
                self.curr_node,
1,787,639✔
360
                None,
1,787,639✔
361
                self.min_interval_length,
1,787,639✔
362
            )?;
363
            // update the current node
364
            self.curr_node += 1;
×
365
            return Ok(written_bits);
×
366
        }
367
        // The delta of the best reference, by default 0 which is no compression
UNCOV
368
        let mut ref_delta = 0;
×
369
        let mut min_bits = {
10,697,264✔
370
            let mut estimator = self.encoder.estimator();
×
371
            // Write the compressed data
372
            compressor.write(
×
373
                &mut estimator,
×
374
                self.curr_node,
×
375
                Some(0),
×
376
                self.min_interval_length,
×
377
            )?
378
        };
379

380
        let mut ref_count = 0;
×
381

382
        let deltas = 1 + self
×
383
            .compression_window
×
384
            .min(self.curr_node - self.start_node);
×
385
        // compression windows is not zero, so compress the current node
386
        for delta in 1..deltas {
115,138,191✔
387
            let ref_node = self.curr_node - delta;
230,276,382✔
388
            // If the reference node is too far, we don't consider it
389
            let count = self.ref_counts[ref_node];
230,276,382✔
390
            if count >= self.max_ref_count {
115,138,191✔
391
                continue;
21,315,168✔
392
            }
393
            // Get the neighbors of this previous ref_node
UNCOV
394
            let ref_list = &self.backrefs[ref_node];
×
395
            // No neighbors, no compression
UNCOV
396
            if ref_list.is_empty() {
×
397
                continue;
63,968,260✔
398
            }
399
            // Get its compressor
UNCOV
400
            let compressor = &mut self.compressors[delta];
×
401
            // Compute how we would compress this
UNCOV
402
            compressor.compress(curr_list, Some(ref_list), self.min_interval_length)?;
×
403
            // Compute how many bits it would use, using the mock writer
404
            let bits = {
29,854,763✔
405
                let mut estimator = self.encoder.estimator();
29,854,763✔
UNCOV
406
                compressor.write(
×
UNCOV
407
                    &mut estimator,
×
UNCOV
408
                    self.curr_node,
×
UNCOV
409
                    Some(delta),
×
UNCOV
410
                    self.min_interval_length,
×
411
                )?
412
            };
413
            // keep track of the best, it's strictly less so we keep the
414
            // nearest one in the case of multiple equal ones
415
            if bits < min_bits {
5,185,895✔
416
                min_bits = bits;
10,371,790✔
417
                ref_delta = delta;
5,185,895✔
418
                ref_count = count + 1;
5,185,895✔
419
            }
420
        }
421
        // write the best result reusing the precomputed compression
422
        let compressor = &mut self.compressors[ref_delta];
10,697,264✔
423
        let written_bits = compressor.write(
10,697,264✔
424
            &mut self.encoder,
×
425
            self.curr_node,
×
426
            Some(ref_delta),
×
427
            self.min_interval_length,
×
428
        )?;
429
        self.ref_counts[self.curr_node] = ref_count;
×
430
        // consistency check
431
        debug_assert_eq!(written_bits, min_bits);
×
432
        // update the current node
433
        self.curr_node += 1;
10,697,264✔
434
        Ok(written_bits)
10,697,264✔
435
    }
436

437
    /// Given an iterator over the nodes successors iterators, push them all.
438
    /// The iterator must yield the successors of the node and the nodes HAVE
439
    /// TO BE CONTIGUOUS (i.e. if a node has no neighbors you have to pass an
440
    /// empty iterator).
441
    ///
442
    /// This most commonly is called with a reference to a graph.
443
    pub fn extend<L>(&mut self, iter_nodes: L) -> anyhow::Result<u64>
6,962✔
444
    where
445
        L: IntoLender,
446
        L::Lender: for<'next> NodeLabelsLender<'next, Label = usize>,
447
    {
448
        let mut count = 0;
13,924✔
449
        for_! ( (_, succ) in iter_nodes {
3,846,996✔
UNCOV
450
            count += self.push(succ.into_iter())?;
×
451
        });
452
        // WAS
453
        // iter_nodes.for_each(|(_, succ)| self.push(succ)).sum()
454
        Ok(count)
6,962✔
455
    }
456

457
    /// Consume the compressor return the number of bits written by
458
    /// flushing the encoder (0 for instantaneous codes)
459
    pub fn flush(mut self) -> Result<usize, E::Error> {
38,143✔
460
        self.encoder.flush()
76,286✔
461
    }
462
}
463

464
#[cfg(test)]
465
mod test {
466

467
    use self::sequential::Iter;
468

469
    use super::*;
470
    use dsi_bitstream::prelude::*;
471
    use itertools::Itertools;
472
    use std::fs::File;
473
    use std::io::{BufReader, BufWriter};
474

475
    #[test]
476
    fn test_compressor_no_ref() -> anyhow::Result<()> {
477
        let mut compressor = Compressor::new();
478
        compressor.compress(&[0, 1, 2, 5, 7, 8, 9], None, 2)?;
479
        assert_eq!(
480
            compressor,
481
            Compressor {
482
                outdegree: 7,
483
                blocks: vec![],
484
                extra_nodes: vec![0, 1, 2, 5, 7, 8, 9],
485
                left_interval: vec![0, 7],
486
                len_interval: vec![3, 3],
487
                residuals: vec![5],
488
            }
489
        );
490
        Ok(())
491
    }
492

493
    #[test]
494
    fn test_compressor1() -> anyhow::Result<()> {
495
        let mut compressor = Compressor::new();
496
        compressor.compress(&[0, 1, 2, 5, 7, 8, 9], Some(&[0, 1, 2]), 2)?;
497
        assert_eq!(
498
            compressor,
499
            Compressor {
500
                outdegree: 7,
501
                blocks: vec![],
502
                extra_nodes: vec![5, 7, 8, 9],
503
                left_interval: vec![7],
504
                len_interval: vec![3],
505
                residuals: vec![5],
506
            }
507
        );
508
        Ok(())
509
    }
510

511
    #[test]
512
    fn test_compressor2() -> anyhow::Result<()> {
513
        let mut compressor = Compressor::new();
514
        compressor.compress(&[0, 1, 2, 5, 7, 8, 9], Some(&[0, 1, 2, 100]), 2)?;
515
        assert_eq!(
516
            compressor,
517
            Compressor {
518
                outdegree: 7,
519
                blocks: vec![4],
520
                extra_nodes: vec![5, 7, 8, 9],
521
                left_interval: vec![7],
522
                len_interval: vec![3],
523
                residuals: vec![5],
524
            }
525
        );
526
        Ok(())
527
    }
528

529
    #[test]
530
    fn test_compressor3() -> anyhow::Result<()> {
531
        let mut compressor = Compressor::new();
532
        compressor.compress(
533
            &[0, 1, 2, 5, 7, 8, 9, 100],
534
            Some(&[0, 1, 2, 4, 7, 8, 9, 101]),
535
            2,
536
        )?;
537
        assert_eq!(
538
            compressor,
539
            Compressor {
540
                outdegree: 8,
541
                blocks: vec![4, 1, 3],
542
                extra_nodes: vec![5, 100],
543
                left_interval: vec![],
544
                len_interval: vec![],
545
                residuals: vec![5, 100],
546
            }
547
        );
548
        Ok(())
549
    }
550

551
    #[test]
552
    fn test_writer_window_zero() -> anyhow::Result<()> {
553
        test_compression(0, 0)?;
554
        test_compression(0, 1)?;
555
        test_compression(0, 2)?;
556
        Ok(())
557
    }
558

559
    #[test]
560
    fn test_writer_window_one() -> anyhow::Result<()> {
561
        test_compression(1, 0)?;
562
        test_compression(1, 1)?;
563
        test_compression(1, 2)?;
564
        Ok(())
565
    }
566

567
    #[test]
568
    fn test_writer_window_two() -> anyhow::Result<()> {
569
        test_compression(2, 0)?;
570
        test_compression(2, 1)?;
571
        test_compression(2, 2)?;
572
        Ok(())
573
    }
574

575
    #[test]
576
    fn test_writer_cnr() -> anyhow::Result<()> {
577
        let compression_window = 7;
578
        let min_interval_length = 4;
579

580
        let seq_graph = BvGraphSeq::with_basename("../data/cnr-2000")
581
            .endianness::<BE>()
582
            .load()?;
583

584
        // Compress the graph
585
        let file_path = "../data/cnr-2000.bvcomp";
586
        let bit_write = <BufBitWriter<BE, _>>::new(<WordAdapter<usize, _>>::new(BufWriter::new(
587
            File::create(file_path)?,
588
        )));
589

590
        let comp_flags = CompFlags {
591
            ..Default::default()
592
        };
593

594
        //let codes_writer = DynamicCodesWriter::new(
595
        //    bit_write,
596
        //    &comp_flags,
597
        //);
598
        let codes_writer = <ConstCodesEncoder<BE, _>>::new(bit_write);
599

600
        let mut bvcomp = BvComp::new(codes_writer, compression_window, 3, min_interval_length, 0);
601

602
        bvcomp.extend(&seq_graph).unwrap();
603
        bvcomp.flush()?;
604

605
        // Read it back
606

607
        let bit_read = <BufBitReader<BE, _>>::new(<WordAdapter<u32, _>>::new(BufReader::new(
608
            File::open(file_path)?,
609
        )));
610

611
        //let codes_reader = <DynamicCodesReader<LE, _>>::new(bit_read, &comp_flags)?;
612
        let codes_reader = <ConstCodesDecoder<BE, _>>::new(bit_read, &comp_flags)?;
613

614
        let mut seq_iter = Iter::new(
615
            codes_reader,
616
            seq_graph.num_nodes(),
617
            compression_window,
618
            min_interval_length,
619
        );
620
        // Check that the graph is the same
621
        let mut iter = seq_graph.iter().enumerate();
622
        while let Some((i, (true_node_id, true_succ))) = iter.next() {
623
            let (seq_node_id, seq_succ) = seq_iter.next().unwrap();
624

625
            assert_eq!(true_node_id, i);
626
            assert_eq!(true_node_id, seq_node_id);
627
            assert_eq!(
628
                true_succ.collect_vec(),
629
                seq_succ.into_iter().collect_vec(),
630
                "node_id: {}",
631
                i
632
            );
633
        }
634
        std::fs::remove_file(file_path).unwrap();
635

636
        Ok(())
637
    }
638

639
    fn test_compression(
640
        compression_window: usize,
641
        min_interval_length: usize,
642
    ) -> anyhow::Result<()> {
643
        let seq_graph = BvGraphSeq::with_basename("../data/cnr-2000")
644
            .endianness::<BE>()
645
            .load()?;
646

647
        // Compress the graph
648
        let mut buffer: Vec<u64> = Vec::new();
649
        let bit_write = <BufBitWriter<LE, _>>::new(MemWordWriterVec::new(&mut buffer));
650

651
        let comp_flags = CompFlags {
652
            ..Default::default()
653
        };
654

655
        let codes_writer = <ConstCodesEncoder<LE, _>>::new(bit_write);
656

657
        let mut bvcomp = BvComp::new(codes_writer, compression_window, 3, min_interval_length, 0);
658

659
        bvcomp.extend(&seq_graph).unwrap();
660
        bvcomp.flush()?;
661

662
        // Read it back
663
        let buffer_32: &[u32] = unsafe { buffer.align_to().1 };
664
        let bit_read = <BufBitReader<LE, _>>::new(MemWordReader::new(buffer_32));
665

666
        //let codes_reader = <DynamicCodesReader<LE, _>>::new(bit_read, &comp_flags)?;
667
        let codes_reader = <ConstCodesDecoder<LE, _>>::new(bit_read, &comp_flags)?;
668

669
        let mut seq_iter = Iter::new(
670
            codes_reader,
671
            seq_graph.num_nodes(),
672
            compression_window,
673
            min_interval_length,
674
        );
675
        // Check that the graph is the same
676
        let mut iter = seq_graph.iter().enumerate();
677
        while let Some((i, (true_node_id, true_succ))) = iter.next() {
678
            let (seq_node_id, seq_succ) = seq_iter.next().unwrap();
679

680
            assert_eq!(true_node_id, i);
681
            assert_eq!(true_node_id, seq_node_id);
682
            assert_eq!(
683
                true_succ.collect_vec(),
684
                seq_succ.collect_vec(),
685
                "node_id: {}",
686
                i
687
            );
688
        }
689

690
        Ok(())
691
    }
692
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc