• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 22046400135

16 Feb 2026 12:49AM UTC coverage: 72.401% (+11.3%) from 61.096%
22046400135

push

github

vigna
fmt

6060 of 8370 relevant lines covered (72.4%)

48832055.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.68
/webgraph/src/graphs/bvgraph/sequential.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use std::path::PathBuf;
9

10
use super::*;
11
use crate::utils::CircularBuffer;
12
use anyhow::Result;
13
use bitflags::Flags;
14
use dsi_bitstream::codes::ToInt;
15
use dsi_bitstream::traits::BE;
16
use dsi_bitstream::traits::BitSeek;
17
use lender::*;
18

19
/// A sequential graph in the BV format.
20
///
21
/// The graph depends on a [`SequentialDecoderFactory`] that can be used to
22
/// create decoders for the graph. This allows to decouple the graph from the
23
/// underlying storage format, and to use different storage formats for the same
24
/// graph. For example, one can use a memory-mapped file for the graph, or load
25
/// it in memory, or even generate it on the fly.
26
///
27
/// Note that the knowledge of the codes used by the graph is in the factory,
28
/// which provides methods to read each component of the BV format (outdegree,
29
/// reference offset, block count, etc.), whereas the knowledge of the
30
/// compression parameters (compression window and minimum interval length) is
31
/// in this structure.
32
#[derive(Debug, Clone)]
33
pub struct BvGraphSeq<F> {
34
    factory: F,
35
    number_of_nodes: usize,
36
    number_of_arcs: Option<u64>,
37
    compression_window: usize,
38
    min_interval_length: usize,
39
}
40

41
impl BvGraphSeq<()> {
42
    pub fn with_basename(
249,060✔
43
        basename: impl AsRef<std::path::Path>,
44
    ) -> LoadConfig<BE, Sequential, Dynamic, Mmap, Mmap> {
45
        LoadConfig {
46
            basename: PathBuf::from(basename.as_ref()),
996,240✔
47
            graph_load_flags: Flags::empty(),
498,120✔
48
            offsets_load_flags: Flags::empty(),
249,060✔
49
            _marker: std::marker::PhantomData,
50
        }
51
    }
52
}
53

54
impl<F: SequentialDecoderFactory> SplitLabeling for BvGraphSeq<F>
55
where
56
    for<'a> <F as SequentialDecoderFactory>::Decoder<'a>: Clone + Send + Sync,
57
{
58
    type SplitLender<'a>
59
        = split::seq::Lender<'a, BvGraphSeq<F>>
60
    where
61
        Self: 'a;
62
    type IntoIterator<'a>
63
        = split::seq::IntoIterator<'a, BvGraphSeq<F>>
64
    where
65
        Self: 'a;
66

67
    fn split_iter(&self, how_many: usize) -> Self::IntoIterator<'_> {
15✔
68
        split::seq::Iter::new(self.iter(), self.num_nodes(), how_many)
90✔
69
    }
70
}
71

72
impl<F: SequentialDecoderFactory> SequentialLabeling for BvGraphSeq<F> {
73
    type Label = usize;
74
    type Lender<'a>
75
        = Iter<F::Decoder<'a>>
76
    where
77
        Self: 'a;
78

79
    /// Returns the number of nodes in the graph.
80
    #[inline(always)]
81
    fn num_nodes(&self) -> usize {
622,122✔
82
        self.number_of_nodes
622,122✔
83
    }
84

85
    #[inline(always)]
86
    fn num_arcs_hint(&self) -> Option<u64> {
16✔
87
        self.number_of_arcs
16✔
88
    }
89

90
    #[inline(always)]
91
    fn iter_from(&self, from: usize) -> Self::Lender<'_> {
629,258✔
92
        let mut iter = Iter::new(
93
            self.factory.new_decoder().unwrap(),
1,887,774✔
94
            self.number_of_nodes,
629,258✔
95
            self.compression_window,
629,258✔
96
            self.min_interval_length,
629,258✔
97
        );
98

99
        let _ = iter.advance_by(from);
1,258,516✔
100

101
        iter
629,258✔
102
    }
103
}
104

105
impl<F: SequentialDecoderFactory> SequentialGraph for BvGraphSeq<F> {}
106

107
/// Convenience implementation that makes it possible to iterate
108
/// over the graph using the [`for_`] macro
109
/// (see the [crate documentation](crate)).
110
impl<'a, F: SequentialDecoderFactory> IntoLender for &'a BvGraphSeq<F> {
111
    type Lender = <BvGraphSeq<F> as SequentialLabeling>::Lender<'a>;
112

113
    #[inline(always)]
114
    fn into_lender(self) -> Self::Lender {
×
115
        self.iter()
×
116
    }
117
}
118

119
impl<F: SequentialDecoderFactory> BvGraphSeq<F> {
120
    /// Creates a new sequential graph from a codes reader builder
121
    /// and the number of nodes.
122
    pub fn new(
629,296✔
123
        codes_reader_builder: F,
124
        number_of_nodes: usize,
125
        number_of_arcs: Option<u64>,
126
        compression_window: usize,
127
        min_interval_length: usize,
128
    ) -> Self {
129
        Self {
130
            factory: codes_reader_builder,
131
            number_of_nodes,
132
            number_of_arcs,
133
            compression_window,
134
            min_interval_length,
135
        }
136
    }
137

138
    #[inline(always)]
139
    pub fn map_factory<F1, F0>(self, map_func: F0) -> BvGraphSeq<F1>
×
140
    where
141
        F0: FnOnce(F) -> F1,
142
        F1: SequentialDecoderFactory,
143
    {
144
        BvGraphSeq {
145
            factory: map_func(self.factory),
×
146
            number_of_nodes: self.number_of_nodes,
×
147
            number_of_arcs: self.number_of_arcs,
×
148
            compression_window: self.compression_window,
×
149
            min_interval_length: self.min_interval_length,
×
150
        }
151
    }
152

153
    /// Consumes self and returns the factory.
154
    #[inline(always)]
155
    pub fn into_inner(self) -> F {
×
156
        self.factory
×
157
    }
158
}
159

160
impl<F: SequentialDecoderFactory> BvGraphSeq<F> {
161
    /// Creates an iterator specialized in the degrees of the nodes.
162
    /// This is slightly faster because it can avoid decoding some of
163
    /// the nodes and completely skip the merging step.
164
    #[inline(always)]
165
    pub fn offset_deg_iter(&self) -> OffsetDegIter<F::Decoder<'_>> {
255,798✔
166
        OffsetDegIter::new(
167
            self.factory.new_decoder().unwrap(),
767,394✔
168
            self.number_of_nodes,
255,798✔
169
            self.compression_window,
255,798✔
170
            self.min_interval_length,
255,798✔
171
        )
172
    }
173
}
174

175
/// A fast sequential iterator over the nodes of the graph and their successors.
176
/// This iterator does not require to know the offsets of each node in the graph.
177
#[derive(Debug, Clone)]
178
pub struct Iter<D: Decode> {
179
    pub(crate) number_of_nodes: usize,
180
    pub(crate) compression_window: usize,
181
    pub(crate) min_interval_length: usize,
182
    pub(crate) decoder: D,
183
    pub(crate) backrefs: CircularBuffer<Vec<usize>>,
184
    pub(crate) current_node: usize,
185
}
186

187
impl<D: Decode + BitSeek> Iter<D> {
188
    /// Forwards the call of `get_pos` to the inner `codes_reader`.
189
    /// This returns the current bits offset in the bitstream.
190
    #[inline(always)]
191
    pub fn bit_pos(&mut self) -> Result<u64, <D as BitSeek>::Error> {
1,175,880✔
192
        self.decoder.bit_pos()
2,351,760✔
193
    }
194
}
195

196
impl<D: Decode> Iter<D> {
197
    /// Creates a new iterator from a codes reader
198
    pub fn new(
1,002,524✔
199
        decoder: D,
200
        number_of_nodes: usize,
201
        compression_window: usize,
202
        min_interval_length: usize,
203
    ) -> Self {
204
        Self {
205
            number_of_nodes,
206
            compression_window,
207
            min_interval_length,
208
            decoder,
209
            backrefs: CircularBuffer::new(compression_window + 1),
1,002,524✔
210
            current_node: 0,
211
        }
212
    }
213

214
    /// Get the successors of the next node in the stream
215
    pub fn next_successors(&mut self) -> Result<&[usize]> {
×
216
        let mut res = self.backrefs.take(self.current_node);
×
217
        res.clear();
×
218
        self.get_successors_iter_priv(self.current_node, &mut res)?;
×
219
        let res = self.backrefs.replace(self.current_node, res);
×
220
        self.current_node += 1;
×
221
        Ok(res)
×
222
    }
223

224
    /// Inner method called by `next_successors` and the iterator `next` method
225
    fn get_successors_iter_priv(&mut self, node_id: usize, results: &mut Vec<usize>) -> Result<()> {
233,372,221✔
226
        let degree = self.decoder.read_outdegree() as usize;
466,744,442✔
227
        // no edges, we are done!
228
        if degree == 0 {
233,372,221✔
229
            return Ok(());
29,155,453✔
230
        }
231

232
        // ensure that we have enough capacity in the vector for not reallocating
233
        results.reserve(degree.saturating_sub(results.capacity()));
1,225,300,608✔
234
        // read the reference offset
235
        let ref_delta = if self.compression_window != 0 {
408,433,536✔
236
            self.decoder.read_reference_offset() as usize
177,848,266✔
237
        } else {
238
            0
26,368,502✔
239
        };
240
        // if we copy nodes from a previous one
241
        if ref_delta != 0 {
204,216,768✔
242
            // compute the node id of the reference
243
            let reference_node_id = node_id - ref_delta;
157,688,210✔
244
            // retrieve the data
245
            let successors = &self.backrefs[reference_node_id];
157,688,210✔
246
            //debug_assert!(!neighbours.is_empty());
247
            // get the info on which destinations to copy
248
            let number_of_blocks = self.decoder.read_block_count() as usize;
157,688,210✔
249
            // no blocks, we copy everything
250
            if number_of_blocks == 0 {
105,902,400✔
251
                results.extend_from_slice(successors);
54,116,590✔
252
            } else {
253
                // otherwise we copy only the blocks of even index
254
                // the first block could be zero
255
                let mut idx = self.decoder.read_block() as usize;
103,571,620✔
256
                results.extend_from_slice(&successors[..idx]);
155,357,430✔
257

258
                // while the other can't
259
                for block_id in 1..number_of_blocks {
135,390,407✔
260
                    let block = self.decoder.read_block() as usize;
167,209,194✔
261
                    let end = idx + block + 1;
167,209,194✔
262
                    if block_id % 2 == 0 {
110,128,341✔
263
                        results.extend_from_slice(&successors[idx..end]);
106,094,976✔
264
                    }
265
                    idx = end;
83,604,597✔
266
                }
267
                if number_of_blocks & 1 == 0 {
82,342,919✔
268
                    results.extend_from_slice(&successors[idx..]);
91,671,327✔
269
                }
270
            }
271
        };
272

273
        // if we still have to read nodes
274
        let nodes_left_to_decode = degree - results.len();
612,650,304✔
275
        if nodes_left_to_decode != 0 && self.min_interval_length != 0 {
383,336,433✔
276
            // read the number of intervals
277
            let number_of_intervals = self.decoder.read_interval_count() as usize;
289,281,262✔
278
            if number_of_intervals != 0 {
144,640,631✔
279
                // pre-allocate with capacity for efficiency
280
                let node_id_offset = self.decoder.read_interval_start().to_int();
143,111,132✔
281
                let mut start = (node_id as i64 + node_id_offset) as usize;
71,555,566✔
282
                let mut delta = self.decoder.read_interval_len() as usize;
71,555,566✔
283
                delta += self.min_interval_length;
35,777,783✔
284
                // save the first interval
285
                results.extend(start..(start + delta));
143,111,132✔
286
                start += delta;
35,777,783✔
287
                // decode the intervals
288
                for _ in 1..number_of_intervals {
57,187,608✔
289
                    start += 1 + self.decoder.read_interval_start() as usize;
42,819,650✔
290
                    delta = self.decoder.read_interval_len() as usize;
42,819,650✔
291
                    delta += self.min_interval_length;
42,819,650✔
292

293
                    results.extend(start..(start + delta));
85,639,300✔
294

295
                    start += delta;
21,409,825✔
296
                }
297
            }
298
        }
299

300
        // decode the extra nodes if needed
301
        let nodes_left_to_decode = degree - results.len();
612,650,304✔
302
        self.decoder.num_of_residuals(nodes_left_to_decode);
612,650,304✔
303
        if nodes_left_to_decode != 0 {
204,216,768✔
304
            // pre-allocate with capacity for efficiency
305
            let node_id_offset = self.decoder.read_first_residual().to_int();
708,869,232✔
306
            let mut extra = (node_id as i64 + node_id_offset) as usize;
354,434,616✔
307
            results.push(extra);
531,651,924✔
308
            // decode the successive extra nodes
309
            for _ in 1..nodes_left_to_decode {
1,170,848,664✔
310
                extra += 1 + self.decoder.read_residual() as usize;
1,987,262,712✔
311
                results.push(extra);
1,987,262,712✔
312
            }
313
        }
314

315
        results.sort();
204,216,768✔
316
        Ok(())
204,216,768✔
317
    }
318
}
319

320
impl<'succ, D: Decode> NodeLabelsLender<'succ> for Iter<D> {
321
    type Label = usize;
322
    type IntoIterator = crate::traits::labels::AssumeSortedIterator<
323
        std::iter::Copied<std::slice::Iter<'succ, Self::Label>>,
324
    >;
325
}
326

327
impl<'succ, D: Decode> Lending<'succ> for Iter<D> {
328
    type Lend = (usize, <Self as NodeLabelsLender<'succ>>::IntoIterator);
329
}
330

331
impl<D: Decode> Lender for Iter<D> {
332
    check_covariance!();
333

334
    fn next(&mut self) -> Option<Lend<'_, Self>> {
233,372,643✔
335
        if self.current_node >= self.number_of_nodes {
233,372,643✔
336
            return None;
422✔
337
        }
338
        let mut res = self.backrefs.take(self.current_node);
933,488,884✔
339
        res.clear();
466,744,442✔
340
        self.get_successors_iter_priv(self.current_node, &mut res)
933,488,884✔
341
            .unwrap();
342

343
        let res = self.backrefs.replace(self.current_node, res);
1,166,861,105✔
344
        let node_id = self.current_node;
466,744,442✔
345
        self.current_node += 1;
233,372,221✔
346
        Some((node_id, unsafe {
466,744,442✔
347
            crate::traits::labels::AssumeSortedIterator::new(res.iter().copied())
466,744,442✔
348
        }))
349
    }
350

351
    fn size_hint(&self) -> (usize, Option<usize>) {
3,581,266✔
352
        let len = self.len();
10,743,798✔
353
        (len, Some(len))
3,581,266✔
354
    }
355
}
356

357
unsafe impl<D: Decode> SortedLender for Iter<D> {}
358

359
impl<D: Decode> ExactSizeLender for Iter<D> {
360
    #[inline(always)]
361
    fn len(&self) -> usize {
3,581,266✔
362
        self.number_of_nodes - self.current_node
3,581,266✔
363
    }
364
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc