• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 19278972970

11 Nov 2025 09:25PM UTC coverage: 62.316% (+14.3%) from 48.052%
19278972970

push

github

zommiommy
BatchCodec print stats and encoding time

40 of 53 new or added lines in 4 files covered. (75.47%)

814 existing lines in 46 files now uncovered.

5252 of 8428 relevant lines covered (62.32%)

29580067.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.71
/webgraph/src/graphs/bvgraph/random_access.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use crate::prelude::*;
9
use bitflags::Flags;
10
use dsi_bitstream::codes::ToInt;
11
use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12
use dsi_bitstream::traits::{Endianness, BE};
13
use epserde::deser::Owned;
14
use lender::IntoLender;
15
use std::path::PathBuf;
16

17
use self::sequential::Iter;
18

19
#[derive(Debug, Clone)]
20
pub struct BvGraph<F> {
21
    factory: F,
22
    number_of_nodes: usize,
23
    number_of_arcs: u64,
24
    compression_window: usize,
25
    min_interval_length: usize,
26
}
27

28
impl BvGraph<()> {
29
    /// Returns a load configuration that can be customized.
30
    pub fn with_basename(
38✔
31
        basename: impl AsRef<std::path::Path>,
32
    ) -> LoadConfig<BE, Random, Dynamic, Mmap, Mmap> {
33
        LoadConfig {
34
            basename: PathBuf::from(basename.as_ref()),
152✔
35
            graph_load_flags: Flags::empty(),
76✔
36
            offsets_load_flags: Flags::empty(),
38✔
37
            _marker: std::marker::PhantomData,
38
        }
39
    }
40
}
41

42
impl<E: Endianness, F: CodesReaderFactoryHelper<E>, OFF: Offsets>
43
    BvGraph<DynCodesDecoderFactory<E, F, OFF>>
44
where
45
    for<'a> &'a OFF::DeserType<'a>: IntoIterator<Item = usize>,
46
{
47
    /// Remaps the offsets in a slice of `usize`.
48
    ///
49
    /// This method is mainly useful for benchmarking and testing purposes, as
50
    /// representing the offsets as a slice increasing significantly the
51
    /// memory footprint. It just replaces current decoder factory with
52
    /// the result of [`DynCodesDecoderFactory::offsets_to_slice`].
UNCOV
53
    pub fn offsets_to_slice(self) -> BvGraph<DynCodesDecoderFactory<E, F, Owned<Box<[usize]>>>> {
×
54
        BvGraph {
55
            factory: self.factory.offsets_to_slice(),
×
UNCOV
56
            number_of_nodes: self.number_of_nodes,
×
57
            number_of_arcs: self.number_of_arcs,
×
58
            compression_window: self.compression_window,
×
59
            min_interval_length: self.min_interval_length,
×
60
        }
61
    }
62
}
63

64
impl<E: Endianness, F: CodesReaderFactoryHelper<E>, OFF: Offsets>
65
    BvGraph<ConstCodesDecoderFactory<E, F, OFF>>
66
{
67
    /// Remaps the offsets in a slice of `usize`.
68
    ///
69
    /// This method is mainly useful for benchmarking and testing purposes, as
70
    /// representing the offsets as a slice increasing significantly the
71
    /// memory footprint. It just replaces current decoder factory with
72
    /// the result of [`ConstCodesDecoderFactory::offsets_to_slice`].
UNCOV
73
    pub fn offsets_to_slice(self) -> BvGraph<ConstCodesDecoderFactory<E, F, Owned<Box<[usize]>>>> {
×
74
        BvGraph {
75
            factory: self.factory.offsets_to_slice(),
×
UNCOV
76
            number_of_nodes: self.number_of_nodes,
×
77
            number_of_arcs: self.number_of_arcs,
×
78
            compression_window: self.compression_window,
×
79
            min_interval_length: self.min_interval_length,
×
80
        }
81
    }
82
}
83

84
impl<F: RandomAccessDecoderFactory> SplitLabeling for BvGraph<F>
85
where
86
    for<'a> <F as RandomAccessDecoderFactory>::Decoder<'a>: Send + Sync,
87
{
88
    type SplitLender<'a>
89
        = split::ra::Lender<'a, BvGraph<F>>
90
    where
91
        Self: 'a;
92
    type IntoIterator<'a>
93
        = split::ra::IntoIterator<'a, BvGraph<F>>
94
    where
95
        Self: 'a;
96

97
    fn split_iter(&self, how_many: usize) -> Self::IntoIterator<'_> {
12✔
98
        split::ra::Iter::new(self, how_many)
36✔
99
    }
100
}
101

102
impl<F> BvGraph<F>
103
where
104
    F: RandomAccessDecoderFactory,
105
{
106
    /// Creates a new BvGraph from the given parameters.
107
    ///
108
    /// # Arguments
109
    /// - `reader_factory`: backend that can create objects that allows
110
    ///   us to read the bitstream of the graph to decode the edges.
111
    /// - `offsets`: the bit offset at which we will have to start for decoding
112
    ///   the edges of each node. (This is needed for the random accesses,
113
    ///   [`BvGraphSeq`] does not need them)
114
    /// - `min_interval_length`: the minimum size of the intervals we are going
115
    ///   to decode.
116
    /// - `compression_window`: the maximum distance between two nodes that
117
    ///   reference each other.
118
    /// - `number_of_nodes`: the number of nodes in the graph.
119
    /// - `number_of_arcs`: the number of arcs in the graph.
120
    ///
121
    pub fn new(
6,996✔
122
        factory: F,
123
        number_of_nodes: usize,
124
        number_of_arcs: u64,
125
        compression_window: usize,
126
        min_interval_length: usize,
127
    ) -> Self {
128
        Self {
129
            factory,
130
            number_of_nodes,
131
            number_of_arcs,
132
            compression_window,
133
            min_interval_length,
134
        }
135
    }
136

137
    #[inline(always)]
138
    /// Consume self and return the factory
UNCOV
139
    pub fn into_inner(self) -> F {
×
UNCOV
140
        self.factory
×
141
    }
142
}
143

144
impl<F> SequentialLabeling for BvGraph<F>
145
where
146
    F: RandomAccessDecoderFactory,
147
{
148
    type Label = usize;
149
    type Lender<'b>
150
        = Iter<F::Decoder<'b>>
151
    where
152
        Self: 'b,
153
        F: 'b;
154

155
    #[inline(always)]
156
    fn num_nodes(&self) -> usize {
7,748✔
157
        self.number_of_nodes
7,748✔
158
    }
159

160
    #[inline(always)]
UNCOV
161
    fn num_arcs_hint(&self) -> Option<u64> {
×
UNCOV
162
        Some(self.number_of_arcs)
×
163
    }
164

165
    /// Returns a fast sequential iterator over the nodes of the graph and their successors.
166
    fn iter_from(&self, start_node: usize) -> Self::Lender<'_> {
14,045✔
167
        let codes_reader = self.factory.new_decoder(start_node).unwrap();
70,225✔
168
        // we have to pre-fill the buffer
169
        let mut backrefs = CircularBuffer::new(self.compression_window + 1);
42,135✔
170

171
        for node_id in start_node.saturating_sub(self.compression_window)..start_node {
251,776✔
172
            backrefs.replace(node_id, self.successors(node_id).collect());
586,788✔
173
        }
174

175
        Iter {
176
            decoder: codes_reader,
177
            backrefs,
178
            compression_window: self.compression_window,
28,090✔
179
            min_interval_length: self.min_interval_length,
28,090✔
180
            number_of_nodes: self.number_of_nodes,
14,045✔
181
            current_node: start_node,
182
        }
183
    }
184
}
185

186
impl<F> SequentialGraph for BvGraph<F> where F: RandomAccessDecoderFactory {}
187

188
impl<F> RandomAccessLabeling for BvGraph<F>
189
where
190
    F: RandomAccessDecoderFactory,
191
{
192
    type Labels<'a>
193
        = Succ<F::Decoder<'a>>
194
    where
195
        Self: 'a,
196
        F: 'a;
197

198
    fn num_arcs(&self) -> u64 {
8,024✔
199
        self.number_of_arcs
8,024✔
200
    }
201

202
    /// Returns the outdegree of a node.
203
    fn outdegree(&self, node_id: usize) -> usize {
73,425,599✔
204
        let mut codes_reader = self
146,851,198✔
205
            .factory
73,425,599✔
206
            .new_decoder(node_id)
146,851,198✔
207
            .expect("Cannot create reader");
208
        codes_reader.read_outdegree() as usize
73,425,599✔
209
    }
210

211
    #[inline(always)]
212
    /// Returns a random access iterator over the successors of a node.
213
    fn labels(&self, node_id: usize) -> Succ<F::Decoder<'_>> {
190,180,074✔
214
        let codes_reader = self
380,360,148✔
215
            .factory
190,180,074✔
216
            .new_decoder(node_id)
380,360,148✔
217
            .expect("Cannot create reader");
218

219
        let mut result = Succ::new(codes_reader);
570,540,222✔
220
        let degree = result.reader.read_outdegree() as usize;
380,360,148✔
221
        // no edges, we are done!
222
        if degree == 0 {
190,180,074✔
223
            return result;
3,076,972✔
224
        }
225
        result.size = degree;
187,103,102✔
226
        let mut nodes_left_to_decode = degree;
374,206,204✔
227
        // read the reference offset
228
        let ref_delta = if self.compression_window != 0 {
374,206,204✔
229
            result.reader.read_reference_offset() as usize
187,102,806✔
230
        } else {
231
            0
296✔
232
        };
233
        // if we copy nodes from a previous one
234
        if ref_delta != 0 {
187,103,102✔
235
            // compute the node id of the reference
236
            let reference_node_id = node_id - ref_delta;
250,999,288✔
237
            // retrieve the data
238
            let neighbors = self.successors(reference_node_id);
501,998,576✔
239
            debug_assert!(neighbors.len() != 0);
250,999,288✔
240
            // get the info on which destinations to copy
241
            let number_of_blocks = result.reader.read_block_count() as usize;
250,999,288✔
242
            // add +1 if the number of blocks is even, so we have capacity for
243
            // the block that will be added in the masked iterator
244
            let alloc_len = 1 + number_of_blocks - (number_of_blocks & 1);
250,999,288✔
245
            let mut blocks = Vec::with_capacity(alloc_len);
376,498,932✔
246
            if number_of_blocks != 0 {
125,499,644✔
247
                // the first block could be zero
248
                blocks.push(result.reader.read_block() as usize);
252,456,261✔
249
                // while the other can't
250
                for _ in 1..number_of_blocks {
214,420,718✔
251
                    blocks.push(result.reader.read_block() as usize + 1);
390,805,893✔
252
                }
253
            }
254
            // create the masked iterator
255
            let res = MaskedIterator::new(neighbors, blocks);
501,998,576✔
256
            nodes_left_to_decode -= res.len();
125,499,644✔
257

258
            result.copied_nodes_iter = Some(res);
250,999,288✔
259
        };
260

261
        // if we still have to read nodes
262
        if nodes_left_to_decode != 0 && self.min_interval_length != 0 {
333,047,989✔
263
            // read the number of intervals
264
            let number_of_intervals = result.reader.read_interval_count() as usize;
291,888,830✔
265
            if number_of_intervals != 0 {
145,944,415✔
266
                // pre-allocate with capacity for efficiency
267
                result.intervals = Vec::with_capacity(number_of_intervals + 1);
128,554,497✔
268
                let node_id_offset = (result.reader.read_interval_start()).to_int();
171,405,996✔
269

270
                debug_assert!((node_id as i64 + node_id_offset) >= 0);
85,702,998✔
271
                let mut start = (node_id as i64 + node_id_offset) as usize;
85,702,998✔
272
                let mut delta = result.reader.read_interval_len() as usize;
85,702,998✔
273
                delta += self.min_interval_length;
42,851,499✔
274
                // save the first interval
275
                result.intervals.push((start, delta));
128,554,497✔
276
                start += delta;
42,851,499✔
277
                nodes_left_to_decode -= delta;
42,851,499✔
278
                // decode the intervals
279
                for _ in 1..number_of_intervals {
88,007,844✔
280
                    start += 1 + result.reader.read_interval_start() as usize;
90,312,690✔
281
                    delta = result.reader.read_interval_len() as usize;
90,312,690✔
282
                    delta += self.min_interval_length;
90,312,690✔
283

284
                    result.intervals.push((start, delta));
180,625,380✔
285
                    start += delta;
45,156,345✔
286
                    nodes_left_to_decode -= delta;
45,156,345✔
287
                }
288
                // fake final interval to avoid checks in the implementation of
289
                // `next`
290
                result.intervals.push((usize::MAX - 1, 1));
128,554,497✔
291
            }
292
        }
293

294
        // decode just the first extra, if present (the others will be decoded on demand)
295
        if nodes_left_to_decode != 0 {
330,428,146✔
296
            let node_id_offset = result.reader.read_first_residual().to_int();
716,625,220✔
297
            result.next_residual_node = (node_id as i64 + node_id_offset) as usize;
143,325,044✔
298
            result.residuals_to_go = nodes_left_to_decode - 1;
143,325,044✔
299
        }
300

301
        // setup the first interval node so we can decode without branches
302
        if !result.intervals.is_empty() {
229,954,601✔
303
            let (start, len) = &mut result.intervals[0];
171,405,996✔
304
            *len -= 1;
85,702,998✔
305
            result.next_interval_node = *start;
85,702,998✔
306
            *start += 1;
42,851,499✔
307
            result.intervals_idx += (*len == 0) as usize;
42,851,499✔
308
        };
309

310
        // cache the first copied node so we don't have to check if the iter
311
        // ended at every call of `next`
312
        result.next_copied_node = result
187,103,102✔
313
            .copied_nodes_iter
187,103,102✔
314
            .as_mut()
187,103,102✔
315
            .and_then(|iter| iter.next())
520,483,834✔
316
            .unwrap_or(usize::MAX);
187,103,102✔
317

318
        result
187,103,102✔
319
    }
320
}
321

322
impl<F: SequentialDecoderFactory> BvGraph<F>
323
where
324
    for<'a> F::Decoder<'a>: Decode,
325
{
326
    #[inline(always)]
327
    /// Creates an iterator specialized in the degrees of the nodes.
328
    /// This is slightly faster because it can avoid decoding some of the nodes
329
    /// and completely skip the merging step.
330
    pub fn offset_deg_iter(&self) -> OffsetDegIter<F::Decoder<'_>> {
1✔
331
        OffsetDegIter::new(
332
            self.factory.new_decoder().unwrap(),
3✔
333
            self.number_of_nodes,
1✔
334
            self.compression_window,
1✔
335
            self.min_interval_length,
1✔
336
        )
337
    }
338
}
339

340
impl<F: RandomAccessDecoderFactory> BvGraph<F>
341
where
342
    for<'a> F::Decoder<'a>: Decode,
343
{
344
    #[inline(always)]
345
    /// Creates an iterator specialized in the degrees of the nodes starting
346
    /// from a given node.
347
    pub fn offset_deg_iter_from(&self, node: usize) -> OffsetDegIter<F::Decoder<'_>> {
100✔
348
        let mut backrefs = vec![0; self.compression_window];
300✔
349
        for node_id in node.saturating_sub(self.compression_window)..node {
1,744✔
350
            backrefs[node_id % self.compression_window] = self.outdegree(node_id);
2,688✔
351
        }
352
        OffsetDegIter::new_from(
353
            self.factory.new_decoder(node).unwrap(),
400✔
354
            self.number_of_nodes,
100✔
355
            self.compression_window,
100✔
356
            self.min_interval_length,
100✔
357
            node,
100✔
358
            backrefs,
100✔
359
        )
360
    }
361
}
362
impl<F> RandomAccessGraph for BvGraph<F> where F: RandomAccessDecoderFactory {}
363

364
/// The iterator returned from [`BvGraph`] that returns the successors of a
365
/// node in sorted order.
366
#[derive(Debug, Clone)]
367
pub struct Succ<D: Decode> {
368
    reader: D,
369
    /// The number of values left
370
    size: usize,
371
    /// Iterator over the destinations that we are going to copy
372
    /// from another node
373
    copied_nodes_iter: Option<MaskedIterator<Succ<D>>>,
374

375
    /// Intervals of extra nodes
376
    intervals: Vec<(usize, usize)>,
377
    /// The index of interval to return
378
    intervals_idx: usize,
379
    /// Remaining residual nodes
380
    residuals_to_go: usize,
381
    /// The next residual node
382
    next_residual_node: usize,
383
    /// The next residual node
384
    next_copied_node: usize,
385
    /// The next interval node
386
    next_interval_node: usize,
387
}
388

389
impl<D: Decode> ExactSizeIterator for Succ<D> {
390
    #[inline(always)]
391
    fn len(&self) -> usize {
335,489,676✔
392
        self.size
335,489,676✔
393
    }
394
}
395

396
unsafe impl<D: Decode> SortedIterator for Succ<D> {}
397

398
impl<D: Decode> Succ<D> {
399
    /// Creates an empty iterator
400
    fn new(reader: D) -> Self {
532,606,596✔
401
        Self {
402
            reader,
403
            size: 0,
404
            copied_nodes_iter: None,
405
            intervals: vec![],
532,606,596✔
406
            intervals_idx: 0,
407
            residuals_to_go: 0,
408
            next_residual_node: usize::MAX,
409
            next_copied_node: usize::MAX,
410
            next_interval_node: usize::MAX,
411
        }
412
    }
413
}
414

415
impl<D: Decode> Iterator for Succ<D> {
416
    type Item = usize;
417

418
    fn next(&mut self) -> Option<Self::Item> {
2,147,483,647✔
419
        // check if we should stop iterating
420
        if self.size == 0 {
2,147,483,647✔
421
            return None;
212,435,600✔
422
        }
423

424
        self.size -= 1;
2,147,483,647✔
425
        debug_assert!(
2,147,483,647✔
426
            self.next_copied_node != usize::MAX
2,147,483,647✔
427
                || self.next_residual_node != usize::MAX
2,147,483,647✔
428
                || self.next_interval_node != usize::MAX,
1,051,326,664✔
UNCOV
429
            "At least one of the nodes must present, this should be a problem with the degree.",
×
430
        );
431

432
        // find the smallest of the values
433
        let min = self.next_residual_node.min(self.next_interval_node);
2,147,483,647✔
434

435
        // depending on from where the node was, forward it
436
        if min >= self.next_copied_node {
2,147,483,647✔
437
            let res = self.next_copied_node;
2,147,483,647✔
438
            self.next_copied_node = self
2,147,483,647✔
439
                .copied_nodes_iter
2,147,483,647✔
440
                .as_mut()
2,147,483,647✔
441
                .and_then(|iter| iter.next())
2,147,483,647✔
442
                .unwrap_or(usize::MAX);
2,147,483,647✔
443
            return Some(res);
2,147,483,647✔
444
        } else if min == self.next_residual_node {
2,147,483,647✔
445
            if self.residuals_to_go == 0 {
1,549,481,632✔
446
                self.next_residual_node = usize::MAX;
291,468,276✔
447
            } else {
448
                self.residuals_to_go -= 1;
1,933,090,160✔
449
                // NOTE: here we cannot propagate the error
450
                self.next_residual_node += 1 + self.reader.read_residual() as usize;
966,545,080✔
451
            }
452
        } else {
453
            let (start, len) = &mut self.intervals[self.intervals_idx];
2,147,483,647✔
454
            debug_assert_ne!(
2,147,483,647✔
UNCOV
455
                *len, 0,
×
456
                "there should never be an interval with length zero here"
×
457
            );
458
            // if the interval has other values, just reduce the interval
459
            *len -= 1;
2,147,483,647✔
460
            self.next_interval_node = *start;
2,147,483,647✔
461
            *start += 1;
2,147,483,647✔
462
            self.intervals_idx += (*len == 0) as usize;
2,147,483,647✔
463
        }
464

465
        Some(min)
2,147,483,647✔
466
    }
467

468
    fn size_hint(&self) -> (usize, Option<usize>) {
4,064,316✔
469
        (self.size, Some(self.size))
4,064,316✔
470
    }
471
}
472

473
/// Convenience implementation that makes it possible to iterate over a
474
/// [`BvGraphSeq`] using the [`for_`](lender::for_) macro (see the
475
/// [crate documentation](crate)).
476
impl<'a, F: RandomAccessDecoderFactory> IntoLender for &'a BvGraph<F> {
477
    type Lender = <BvGraph<F> as SequentialLabeling>::Lender<'a>;
478

479
    #[inline(always)]
480
    fn into_lender(self) -> Self::Lender {
×
UNCOV
481
        self.iter()
×
482
    }
483
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc