• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 19076069272

04 Nov 2025 04:42PM UTC coverage: 61.785% (-0.2%) from 61.976%
19076069272

push

github

vigna
Fixed doctests

5143 of 8324 relevant lines covered (61.79%)

30278823.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.71
/webgraph/src/graphs/bvgraph/random_access.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use crate::prelude::*;
9
use bitflags::Flags;
10
use dsi_bitstream::codes::ToInt;
11
use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12
use dsi_bitstream::traits::{Endianness, BE};
13
use epserde::deser::Owned;
14
use lender::IntoLender;
15
use std::path::PathBuf;
16

17
use self::sequential::Iter;
18

19
#[derive(Debug, Clone)]
20
pub struct BvGraph<F> {
21
    factory: F,
22
    number_of_nodes: usize,
23
    number_of_arcs: u64,
24
    compression_window: usize,
25
    min_interval_length: usize,
26
}
27

28
impl BvGraph<()> {
29
    /// Returns a load configuration that can be customized.
30
    pub fn with_basename(
38✔
31
        basename: impl AsRef<std::path::Path>,
32
    ) -> LoadConfig<BE, Random, Dynamic, Mmap, Mmap> {
33
        LoadConfig {
34
            basename: PathBuf::from(basename.as_ref()),
152✔
35
            graph_load_flags: Flags::empty(),
76✔
36
            offsets_load_flags: Flags::empty(),
38✔
37
            _marker: std::marker::PhantomData,
38
        }
39
    }
40
}
41

42
impl<E: Endianness, F: CodesReaderFactoryHelper<E>, OFF: Offsets>
43
    BvGraph<DynCodesDecoderFactory<E, F, OFF>>
44
where
45
    for<'a> &'a OFF::DeserType<'a>: IntoIterator<Item = usize>,
46
{
47
    /// Remaps the offsets in a slice of `usize`.
48
    ///
49
    /// This method is mainly useful for benchmarking and testing purposes, as
50
    /// representing the offsets as a slice increasing significantly the
51
    /// memory footprint. It just replaces current decoder factory with
52
    /// the result of [`DynCodesDecoderFactory::offsets_to_slice`].
53
    pub fn offsets_to_slice(self) -> BvGraph<DynCodesDecoderFactory<E, F, Owned<Box<[usize]>>>> {
×
54
        BvGraph {
55
            factory: self.factory.offsets_to_slice(),
×
56
            number_of_nodes: self.number_of_nodes,
×
57
            number_of_arcs: self.number_of_arcs,
×
58
            compression_window: self.compression_window,
×
59
            min_interval_length: self.min_interval_length,
×
60
        }
61
    }
62
}
63

64
impl<E: Endianness, F: CodesReaderFactoryHelper<E>, OFF: Offsets>
65
    BvGraph<ConstCodesDecoderFactory<E, F, OFF>>
66
{
67
    /// Remaps the offsets in a slice of `usize`.
68
    ///
69
    /// This method is mainly useful for benchmarking and testing purposes, as
70
    /// representing the offsets as a slice increasing significantly the
71
    /// memory footprint. It just replaces current decoder factory with
72
    /// the result of [`ConstCodesDecoderFactory::offsets_to_slice`].
73
    pub fn offsets_to_slice(self) -> BvGraph<ConstCodesDecoderFactory<E, F, Owned<Box<[usize]>>>> {
×
74
        BvGraph {
75
            factory: self.factory.offsets_to_slice(),
×
76
            number_of_nodes: self.number_of_nodes,
×
77
            number_of_arcs: self.number_of_arcs,
×
78
            compression_window: self.compression_window,
×
79
            min_interval_length: self.min_interval_length,
×
80
        }
81
    }
82
}
83

84
impl<F: RandomAccessDecoderFactory> SplitLabeling for BvGraph<F>
85
where
86
    for<'a> <F as RandomAccessDecoderFactory>::Decoder<'a>: Send + Sync,
87
{
88
    type SplitLender<'a>
89
        = split::ra::Lender<'a, BvGraph<F>>
90
    where
91
        Self: 'a;
92
    type IntoIterator<'a>
93
        = split::ra::IntoIterator<'a, BvGraph<F>>
94
    where
95
        Self: 'a;
96

97
    fn split_iter(&self, how_many: usize) -> Self::IntoIterator<'_> {
12✔
98
        split::ra::Iter::new(self, how_many)
36✔
99
    }
100
}
101

102
impl<F> BvGraph<F>
103
where
104
    F: RandomAccessDecoderFactory,
105
{
106
    /// Creates a new BvGraph from the given parameters.
107
    ///
108
    /// # Arguments
109
    /// - `reader_factory`: backend that can create objects that allows
110
    ///   us to read the bitstream of the graph to decode the edges.
111
    /// - `offsets`: the bit offset at which we will have to start for decoding
112
    ///   the edges of each node. (This is needed for the random accesses,
113
    ///   [`BvGraphSeq`] does not need them)
114
    /// - `min_interval_length`: the minimum size of the intervals we are going
115
    ///   to decode.
116
    /// - `compression_window`: the maximum distance between two nodes that
117
    ///   reference each other.
118
    /// - `number_of_nodes`: the number of nodes in the graph.
119
    /// - `number_of_arcs`: the number of arcs in the graph.
120
    ///
121
    pub fn new(
6,996✔
122
        factory: F,
123
        number_of_nodes: usize,
124
        number_of_arcs: u64,
125
        compression_window: usize,
126
        min_interval_length: usize,
127
    ) -> Self {
128
        Self {
129
            factory,
130
            number_of_nodes,
131
            number_of_arcs,
132
            compression_window,
133
            min_interval_length,
134
        }
135
    }
136

137
    #[inline(always)]
138
    /// Consume self and return the factory
139
    pub fn into_inner(self) -> F {
×
140
        self.factory
×
141
    }
142
}
143

144
impl<F> SequentialLabeling for BvGraph<F>
145
where
146
    F: RandomAccessDecoderFactory,
147
{
148
    type Label = usize;
149
    type Lender<'b>
150
        = Iter<F::Decoder<'b>>
151
    where
152
        Self: 'b,
153
        F: 'b;
154

155
    #[inline(always)]
156
    fn num_nodes(&self) -> usize {
984,711✔
157
        self.number_of_nodes
984,711✔
158
    }
159

160
    #[inline(always)]
161
    fn num_arcs_hint(&self) -> Option<u64> {
×
162
        Some(self.number_of_arcs)
×
163
    }
164

165
    /// Returns a fast sequential iterator over the nodes of the graph and their successors.
166
    fn iter_from(&self, start_node: usize) -> Self::Lender<'_> {
14,045✔
167
        let codes_reader = self.factory.new_decoder(start_node).unwrap();
70,225✔
168
        // we have to pre-fill the buffer
169
        let mut backrefs = CircularBuffer::new(self.compression_window + 1);
42,135✔
170

171
        for node_id in start_node.saturating_sub(self.compression_window)..start_node {
251,776✔
172
            backrefs.replace(node_id, self.successors(node_id).collect());
586,788✔
173
        }
174

175
        Iter {
176
            decoder: codes_reader,
177
            backrefs,
178
            compression_window: self.compression_window,
28,090✔
179
            min_interval_length: self.min_interval_length,
28,090✔
180
            number_of_nodes: self.number_of_nodes,
14,045✔
181
            current_node: start_node,
182
        }
183
    }
184
}
185

186
impl<F> SequentialGraph for BvGraph<F> where F: RandomAccessDecoderFactory {}
187

188
impl<F> RandomAccessLabeling for BvGraph<F>
189
where
190
    F: RandomAccessDecoderFactory,
191
{
192
    type Labels<'a>
193
        = Succ<F::Decoder<'a>>
194
    where
195
        Self: 'a,
196
        F: 'a;
197

198
    fn num_arcs(&self) -> u64 {
8,032✔
199
        self.number_of_arcs
8,032✔
200
    }
201

202
    /// Returns the outdegree of a node.
203
    fn outdegree(&self, node_id: usize) -> usize {
72,946,427✔
204
        let mut codes_reader = self
145,892,854✔
205
            .factory
72,946,427✔
206
            .new_decoder(node_id)
145,892,854✔
207
            .expect("Cannot create reader");
208
        codes_reader.read_outdegree() as usize
72,946,427✔
209
    }
210

211
    #[inline(always)]
212
    /// Returns a random access iterator over the successors of a node.
213
    fn labels(&self, node_id: usize) -> Succ<F::Decoder<'_>> {
188,511,071✔
214
        let codes_reader = self
377,022,142✔
215
            .factory
188,511,071✔
216
            .new_decoder(node_id)
377,022,142✔
217
            .expect("Cannot create reader");
218

219
        let mut result = Succ::new(codes_reader);
565,533,213✔
220
        let degree = result.reader.read_outdegree() as usize;
377,022,142✔
221
        // no edges, we are done!
222
        if degree == 0 {
188,511,071✔
223
            return result;
3,076,972✔
224
        }
225
        result.size = degree;
185,434,099✔
226
        let mut nodes_left_to_decode = degree;
370,868,198✔
227
        // read the reference offset
228
        let ref_delta = if self.compression_window != 0 {
370,868,198✔
229
            result.reader.read_reference_offset() as usize
185,433,803✔
230
        } else {
231
            0
296✔
232
        };
233
        // if we copy nodes from a previous one
234
        if ref_delta != 0 {
185,434,099✔
235
            // compute the node id of the reference
236
            let reference_node_id = node_id - ref_delta;
248,643,928✔
237
            // retrieve the data
238
            let neighbors = self.successors(reference_node_id);
497,287,856✔
239
            debug_assert!(neighbors.len() != 0);
248,643,928✔
240
            // get the info on which destinations to copy
241
            let number_of_blocks = result.reader.read_block_count() as usize;
248,643,928✔
242
            // add +1 if the number of blocks is even, so we have capacity for
243
            // the block that will be added in the masked iterator
244
            let alloc_len = 1 + number_of_blocks - (number_of_blocks & 1);
248,643,928✔
245
            let mut blocks = Vec::with_capacity(alloc_len);
372,965,892✔
246
            if number_of_blocks != 0 {
124,321,964✔
247
                // the first block could be zero
248
                blocks.push(result.reader.read_block() as usize);
248,362,398✔
249
                // while the other can't
250
                for _ in 1..number_of_blocks {
213,048,229✔
251
                    blocks.push(result.reader.read_block() as usize + 1);
390,782,289✔
252
                }
253
            }
254
            // create the masked iterator
255
            let res = MaskedIterator::new(neighbors, blocks);
497,287,856✔
256
            nodes_left_to_decode -= res.len();
124,321,964✔
257

258
            result.copied_nodes_iter = Some(res);
248,643,928✔
259
        };
260

261
        // if we still have to read nodes
262
        if nodes_left_to_decode != 0 && self.min_interval_length != 0 {
329,640,419✔
263
            // read the number of intervals
264
            let number_of_intervals = result.reader.read_interval_count() as usize;
288,411,696✔
265
            if number_of_intervals != 0 {
144,205,848✔
266
                // pre-allocate with capacity for efficiency
267
                result.intervals = Vec::with_capacity(number_of_intervals + 1);
127,110,885✔
268
                let node_id_offset = (result.reader.read_interval_start()).to_int();
169,481,180✔
269

270
                debug_assert!((node_id as i64 + node_id_offset) >= 0);
84,740,590✔
271
                let mut start = (node_id as i64 + node_id_offset) as usize;
84,740,590✔
272
                let mut delta = result.reader.read_interval_len() as usize;
84,740,590✔
273
                delta += self.min_interval_length;
42,370,295✔
274
                // save the first interval
275
                result.intervals.push((start, delta));
127,110,885✔
276
                start += delta;
42,370,295✔
277
                nodes_left_to_decode -= delta;
42,370,295✔
278
                // decode the intervals
279
                for _ in 1..number_of_intervals {
86,883,051✔
280
                    start += 1 + result.reader.read_interval_start() as usize;
89,025,512✔
281
                    delta = result.reader.read_interval_len() as usize;
89,025,512✔
282
                    delta += self.min_interval_length;
89,025,512✔
283

284
                    result.intervals.push((start, delta));
178,051,024✔
285
                    start += delta;
44,512,756✔
286
                    nodes_left_to_decode -= delta;
44,512,756✔
287
                }
288
                // fake final interval to avoid checks in the implementation of
289
                // `next`
290
                result.intervals.push((usize::MAX - 1, 1));
127,110,885✔
291
            }
292
        }
293

294
        // decode just the first extra, if present (the others will be decoded on demand)
295
        if nodes_left_to_decode != 0 {
327,008,335✔
296
            let node_id_offset = result.reader.read_first_residual().to_int();
707,871,180✔
297
            result.next_residual_node = (node_id as i64 + node_id_offset) as usize;
141,574,236✔
298
            result.residuals_to_go = nodes_left_to_decode - 1;
141,574,236✔
299
        }
300

301
        // setup the first interval node so we can decode without branches
302
        if !result.intervals.is_empty() {
227,804,394✔
303
            let (start, len) = &mut result.intervals[0];
169,481,180✔
304
            *len -= 1;
84,740,590✔
305
            result.next_interval_node = *start;
84,740,590✔
306
            *start += 1;
42,370,295✔
307
            result.intervals_idx += (*len == 0) as usize;
42,370,295✔
308
        };
309

310
        // cache the first copied node so we don't have to check if the iter
311
        // ended at every call of `next`
312
        result.next_copied_node = result
185,434,099✔
313
            .copied_nodes_iter
185,434,099✔
314
            .as_mut()
185,434,099✔
315
            .and_then(|iter| iter.next())
516,455,223✔
316
            .unwrap_or(usize::MAX);
185,434,099✔
317

318
        result
185,434,099✔
319
    }
320
}
321

322
impl<F: SequentialDecoderFactory> BvGraph<F>
323
where
324
    for<'a> F::Decoder<'a>: Decode,
325
{
326
    #[inline(always)]
327
    /// Creates an iterator specialized in the degrees of the nodes.
328
    /// This is slightly faster because it can avoid decoding some of the nodes
329
    /// and completely skip the merging step.
330
    pub fn offset_deg_iter(&self) -> OffsetDegIter<F::Decoder<'_>> {
1✔
331
        OffsetDegIter::new(
332
            self.factory.new_decoder().unwrap(),
3✔
333
            self.number_of_nodes,
1✔
334
            self.compression_window,
1✔
335
            self.min_interval_length,
1✔
336
        )
337
    }
338
}
339

340
impl<F: RandomAccessDecoderFactory> BvGraph<F>
341
where
342
    for<'a> F::Decoder<'a>: Decode,
343
{
344
    #[inline(always)]
345
    /// Creates an iterator specialized in the degrees of the nodes starting
346
    /// from a given node.
347
    pub fn offset_deg_iter_from(&self, node: usize) -> OffsetDegIter<F::Decoder<'_>> {
100✔
348
        let mut backrefs = vec![0; self.compression_window];
300✔
349
        for node_id in node.saturating_sub(self.compression_window)..node {
1,744✔
350
            backrefs[node_id % self.compression_window] = self.outdegree(node_id);
2,688✔
351
        }
352
        OffsetDegIter::new_from(
353
            self.factory.new_decoder(node).unwrap(),
400✔
354
            self.number_of_nodes,
100✔
355
            self.compression_window,
100✔
356
            self.min_interval_length,
100✔
357
            node,
100✔
358
            backrefs,
100✔
359
        )
360
    }
361
}
362
impl<F> RandomAccessGraph for BvGraph<F> where F: RandomAccessDecoderFactory {}
363

364
/// The iterator returned from [`BvGraph`] that returns the successors of a
365
/// node in sorted order.
366
#[derive(Debug, Clone)]
367
pub struct Succ<D: Decode> {
368
    reader: D,
369
    /// The number of values left
370
    size: usize,
371
    /// Iterator over the destinations that we are going to copy
372
    /// from another node
373
    copied_nodes_iter: Option<MaskedIterator<Succ<D>>>,
374

375
    /// Intervals of extra nodes
376
    intervals: Vec<(usize, usize)>,
377
    /// The index of interval to return
378
    intervals_idx: usize,
379
    /// Remaining residual nodes
380
    residuals_to_go: usize,
381
    /// The next residual node
382
    next_residual_node: usize,
383
    /// The next residual node
384
    next_copied_node: usize,
385
    /// The next interval node
386
    next_interval_node: usize,
387
}
388

389
impl<D: Decode> ExactSizeIterator for Succ<D> {
390
    #[inline(always)]
391
    fn len(&self) -> usize {
443,313,060✔
392
        self.size
443,313,060✔
393
    }
394
}
395

396
unsafe impl<D: Decode> SortedIterator for Succ<D> {}
397

398
impl<D: Decode> Succ<D> {
399
    /// Creates an empty iterator
400
    fn new(reader: D) -> Self {
530,935,780✔
401
        Self {
402
            reader,
403
            size: 0,
404
            copied_nodes_iter: None,
405
            intervals: vec![],
530,935,780✔
406
            intervals_idx: 0,
407
            residuals_to_go: 0,
408
            next_residual_node: usize::MAX,
409
            next_copied_node: usize::MAX,
410
            next_interval_node: usize::MAX,
411
        }
412
    }
413
}
414

415
impl<D: Decode> Iterator for Succ<D> {
416
    type Item = usize;
417

418
    fn next(&mut self) -> Option<Self::Item> {
2,147,483,647✔
419
        // check if we should stop iterating
420
        if self.size == 0 {
2,147,483,647✔
421
            return None;
211,944,588✔
422
        }
423

424
        self.size -= 1;
2,147,483,647✔
425
        debug_assert!(
2,147,483,647✔
426
            self.next_copied_node != usize::MAX
2,147,483,647✔
427
                || self.next_residual_node != usize::MAX
2,147,483,647✔
428
                || self.next_interval_node != usize::MAX,
1,030,630,416✔
429
            "At least one of the nodes must present, this should be a problem with the degree.",
×
430
        );
431

432
        // find the smallest of the values
433
        let min = self.next_residual_node.min(self.next_interval_node);
2,147,483,647✔
434

435
        // depending on from where the node was, forward it
436
        if min >= self.next_copied_node {
2,147,483,647✔
437
            let res = self.next_copied_node;
2,147,483,647✔
438
            self.next_copied_node = self
2,147,483,647✔
439
                .copied_nodes_iter
2,147,483,647✔
440
                .as_mut()
2,147,483,647✔
441
                .and_then(|iter| iter.next())
2,147,483,647✔
442
                .unwrap_or(usize::MAX);
2,147,483,647✔
443
            return Some(res);
2,147,483,647✔
444
        } else if min == self.next_residual_node {
2,147,483,647✔
445
            if self.residuals_to_go == 0 {
1,543,229,284✔
446
                self.next_residual_node = usize::MAX;
290,745,480✔
447
            } else {
448
                self.residuals_to_go -= 1;
1,923,476,648✔
449
                // NOTE: here we cannot propagate the error
450
                self.next_residual_node += 1 + self.reader.read_residual() as usize;
961,738,324✔
451
            }
452
        } else {
453
            let (start, len) = &mut self.intervals[self.intervals_idx];
2,147,483,647✔
454
            debug_assert_ne!(
2,147,483,647✔
455
                *len, 0,
×
456
                "there should never be an interval with length zero here"
×
457
            );
458
            // if the interval has other values, just reduce the interval
459
            *len -= 1;
2,147,483,647✔
460
            self.next_interval_node = *start;
2,147,483,647✔
461
            *start += 1;
2,147,483,647✔
462
            self.intervals_idx += (*len == 0) as usize;
2,147,483,647✔
463
        }
464

465
        Some(min)
2,147,483,647✔
466
    }
467

468
    fn size_hint(&self) -> (usize, Option<usize>) {
4,064,316✔
469
        (self.size, Some(self.size))
4,064,316✔
470
    }
471
}
472

473
/// Convenience implementation that makes it possible to iterate over a
474
/// [`BvGraphSeq`] using the [`for_`](lender::for_) macro (see the
475
/// [crate documentation](crate)).
476
impl<'a, F: RandomAccessDecoderFactory> IntoLender for &'a BvGraph<F> {
477
    type Lender = <BvGraph<F> as SequentialLabeling>::Lender<'a>;
478

479
    #[inline(always)]
480
    fn into_lender(self) -> Self::Lender {
×
481
        self.iter()
×
482
    }
483
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc