• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 23446544835

23 Mar 2026 03:52PM UTC coverage: 68.05% (+0.05%) from 68.004%
23446544835

push

github

vigna
Fixed for new sux

6675 of 9809 relevant lines covered (68.05%)

47930325.82 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.05
/webgraph/src/graphs/bvgraph/sequential.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use std::path::PathBuf;
9

10
use super::*;
11
use crate::utils::CircularBuffer;
12
use anyhow::Result;
13
use bitflags::Flags;
14
use dsi_bitstream::codes::ToInt;
15
use dsi_bitstream::traits::BE;
16
use dsi_bitstream::traits::BitSeek;
17
use lender::*;
18

19
/// A sequential graph in the BV format.
20
///
21
/// The graph depends on a [`SequentialDecoderFactory`] that can be used to
22
/// create decoders for the graph. This makes it possible to decouple the graph from the
23
/// underlying storage format, and to use different storage formats for the same
24
/// graph. For example, one can use a memory-mapped file for the graph, or load
25
/// it in memory, or even generate it on the fly.
26
///
27
/// Note that the knowledge of the codes used by the graph is in the factory,
28
/// which provides methods to read each component of the BV format (outdegree,
29
/// reference offset, block count, etc.), whereas the knowledge of the
30
/// compression parameters (compression window and minimum interval length) is
31
/// in this structure.
32
#[derive(Debug, Clone)]
33
pub struct BvGraphSeq<F> {
34
    factory: F,
35
    number_of_nodes: usize,
36
    number_of_arcs: Option<u64>,
37
    compression_window: usize,
38
    min_interval_length: usize,
39
}
40

41
impl BvGraphSeq<()> {
42
    /// Returns a load configuration that can be customized.
43
    pub fn with_basename(
249,112✔
44
        basename: impl AsRef<std::path::Path>,
45
    ) -> LoadConfig<BE, Sequential, Dynamic, Mmap, Mmap> {
46
        LoadConfig {
47
            basename: PathBuf::from(basename.as_ref()),
996,448✔
48
            graph_load_flags: Flags::empty(),
498,224✔
49
            offsets_load_flags: Flags::empty(),
249,112✔
50
            _marker: std::marker::PhantomData,
51
        }
52
    }
53
}
54

55
impl<F: SequentialDecoderFactory> SplitLabeling for BvGraphSeq<F>
56
where
57
    for<'a> <F as SequentialDecoderFactory>::Decoder<'a>: Clone + Send + Sync,
58
{
59
    type SplitLender<'a>
60
        = split::seq::Lender<'a, BvGraphSeq<F>>
61
    where
62
        Self: 'a;
63
    type IntoIterator<'a>
64
        = split::seq::IntoIterator<'a, BvGraphSeq<F>>
65
    where
66
        Self: 'a;
67

68
    fn split_iter_at(&self, cutpoints: impl IntoIterator<Item = usize>) -> Self::IntoIterator<'_> {
25✔
69
        split::seq::Iter::new(self.iter(), cutpoints)
100✔
70
    }
71
}
72

73
impl<F: SequentialDecoderFactory> SequentialLabeling for BvGraphSeq<F> {
74
    type Label = usize;
75
    type Lender<'a>
76
        = NodeLabels<F::Decoder<'a>>
77
    where
78
        Self: 'a;
79

80
    /// Returns the number of nodes in the graph.
81
    #[inline(always)]
82
    fn num_nodes(&self) -> usize {
746,554✔
83
        self.number_of_nodes
746,554✔
84
    }
85

86
    #[inline(always)]
87
    fn num_arcs_hint(&self) -> Option<u64> {
21✔
88
        self.number_of_arcs
21✔
89
    }
90

91
    #[inline(always)]
92
    fn iter_from(&self, from: usize) -> Self::Lender<'_> {
755,646✔
93
        let mut iter = NodeLabels::new(
94
            self.factory.new_decoder().unwrap(),
2,266,938✔
95
            self.number_of_nodes,
755,646✔
96
            self.compression_window,
755,646✔
97
            self.min_interval_length,
755,646✔
98
        );
99

100
        debug_assert!(
755,646✔
101
            from <= self.number_of_nodes,
755,646✔
102
            "from ({from}) is greater than the number of nodes ({})",
×
103
            self.number_of_nodes
×
104
        );
105
        // advance_by returns Err only if the lender is exhausted before
106
        // reaching `from`, which is expected when `from == num_nodes`.
107
        let _ = iter.advance_by(from);
1,511,292✔
108

109
        iter
755,646✔
110
    }
111

112
    fn build_dcf(&self) -> DCF {
3✔
113
        let n = self.num_nodes();
9✔
114
        let num_arcs = self
6✔
115
            .num_arcs_hint()
116
            .expect("build_dcf requires num_arcs_hint()");
117
        let mut efb = sux::dict::EliasFanoBuilder::new(n + 1, num_arcs);
12✔
118
        efb.push(0);
6✔
119
        let mut cumul_deg = 0u64;
6✔
120
        let mut iter = OffsetDegIter::new(
121
            self.factory.new_decoder().unwrap(),
9✔
122
            n,
3✔
123
            self.compression_window,
3✔
124
            self.min_interval_length,
3✔
125
        );
126
        for _ in 0..n {
325,576✔
127
            cumul_deg += iter.next_degree().unwrap() as u64;
976,719✔
128
            efb.push(cumul_deg);
651,146✔
129
        }
130
        // SAFETY: the cumulative degrees are pushed in non-decreasing order.
131
        unsafe {
132
            efb.build().map_high_bits(|high_bits| {
12✔
133
                sux::rank_sel::SelectZeroAdaptConst::<
3✔
134
                    _,
3✔
135
                    _,
3✔
136
                    LOG2_ONES_PER_INVENTORY,
3✔
137
                    LOG2_WORDS_PER_SUBINVENTORY,
3✔
138
                >::new(sux::rank_sel::SelectAdaptConst::<
6✔
139
                    _,
3✔
140
                    _,
3✔
141
                    LOG2_ONES_PER_INVENTORY,
3✔
142
                    LOG2_WORDS_PER_SUBINVENTORY,
3✔
143
                >::new(high_bits))
6✔
144
            })
145
        }
146
    }
147
}
148

149
impl<F: SequentialDecoderFactory> SequentialGraph for BvGraphSeq<F> {}
150

151
/// Convenience implementation that makes it possible to iterate
152
/// over the graph using the [`for_`] macro
153
/// (see the [crate documentation](crate)).
154
impl<'a, F: SequentialDecoderFactory> IntoLender for &'a BvGraphSeq<F> {
155
    type Lender = <BvGraphSeq<F> as SequentialLabeling>::Lender<'a>;
156

157
    #[inline(always)]
158
    fn into_lender(self) -> Self::Lender {
×
159
        self.iter()
×
160
    }
161
}
162

163
impl<F: SequentialDecoderFactory> BvGraphSeq<F> {
164
    /// Creates a new sequential graph from a codes reader builder
165
    /// and the number of nodes.
166
    pub const fn new(
755,542✔
167
        codes_reader_builder: F,
168
        number_of_nodes: usize,
169
        number_of_arcs: Option<u64>,
170
        compression_window: usize,
171
        min_interval_length: usize,
172
    ) -> Self {
173
        Self {
174
            factory: codes_reader_builder,
175
            number_of_nodes,
176
            number_of_arcs,
177
            compression_window,
178
            min_interval_length,
179
        }
180
    }
181

182
    #[inline(always)]
183
    pub fn map_factory<F1, F0>(self, map_func: F0) -> BvGraphSeq<F1>
×
184
    where
185
        F0: FnOnce(F) -> F1,
186
        F1: SequentialDecoderFactory,
187
    {
188
        BvGraphSeq {
189
            factory: map_func(self.factory),
×
190
            number_of_nodes: self.number_of_nodes,
×
191
            number_of_arcs: self.number_of_arcs,
×
192
            compression_window: self.compression_window,
×
193
            min_interval_length: self.min_interval_length,
×
194
        }
195
    }
196

197
    /// Consumes self and returns the factory.
198
    #[inline(always)]
199
    pub fn into_inner(self) -> F {
×
200
        self.factory
×
201
    }
202
}
203

204
impl<F: SequentialDecoderFactory> BvGraphSeq<F> {
205
    /// Returns a specialized iterator that iterates over the offsets and
206
    /// degrees of the nodes.
207
    ///
208
    /// This iterator is slightly faster than the lender returned by
209
    /// [`iter`](Self::iter) because it does not require to rebuild the
210
    /// successors of each node.
211
    #[inline(always)]
212
    pub fn offset_deg_iter(&self) -> OffsetDegIter<F::Decoder<'_>> {
257,536✔
213
        OffsetDegIter::new(
214
            self.factory.new_decoder().unwrap(),
772,608✔
215
            self.number_of_nodes,
257,536✔
216
            self.compression_window,
257,536✔
217
            self.min_interval_length,
257,536✔
218
        )
219
    }
220
}
221

222
/// A fast sequential iterator over the nodes of the graph and their successors.
223
/// This iterator does not require to know the offsets of each node in the graph.
224
#[derive(Debug, Clone)]
225
pub struct NodeLabels<D: Decode> {
226
    pub(crate) number_of_nodes: usize,
227
    pub(crate) compression_window: usize,
228
    pub(crate) min_interval_length: usize,
229
    pub(crate) decoder: D,
230
    pub(crate) backrefs: CircularBuffer<Vec<usize>>,
231
    pub(crate) current_node: usize,
232
}
233

234
impl<D: Decode + BitSeek> NodeLabels<D> {
235
    /// Forwards the call of `get_pos` to the inner `codes_reader`.
236
    /// This returns the current bits offset in the bitstream.
237
    #[inline(always)]
238
    pub fn bit_pos(&mut self) -> Result<u64, <D as BitSeek>::Error> {
1,469,850✔
239
        self.decoder.bit_pos()
2,939,700✔
240
    }
241
}
242

243
impl<D: Decode> NodeLabels<D> {
244
    /// Creates a new iterator from a codes reader
245
    pub fn new(
1,253,388✔
246
        decoder: D,
247
        number_of_nodes: usize,
248
        compression_window: usize,
249
        min_interval_length: usize,
250
    ) -> Self {
251
        Self {
252
            number_of_nodes,
253
            compression_window,
254
            min_interval_length,
255
            decoder,
256
            backrefs: CircularBuffer::new(compression_window + 1),
1,253,388✔
257
            current_node: 0,
258
        }
259
    }
260

261
    /// Gets the successors of the next node in the stream.
262
    pub fn next_successors(&mut self) -> Result<&[usize]> {
×
263
        let mut res = self.backrefs.take(self.current_node);
×
264
        self.get_successors_iter_priv(self.current_node, &mut res)?;
×
265
        let res = self.backrefs.replace(self.current_node, res);
×
266
        self.current_node += 1;
×
267
        Ok(res)
×
268
    }
269

270
    /// Inner method called by `next_successors` and the iterator `next` method
271
    fn get_successors_iter_priv(&mut self, node_id: usize, results: &mut Vec<usize>) -> Result<()> {
288,942,433✔
272
        let degree = self.decoder.read_outdegree() as usize;
577,884,866✔
273
        // no edges, we are done!
274
        if degree == 0 {
288,942,433✔
275
            return Ok(());
35,757,138✔
276
        }
277

278
        results.clear();
506,370,590✔
279
        // ensure that we have enough capacity in the vector for not reallocating
280
        results.reserve(degree);
759,555,885✔
281
        // read the reference offset
282
        let ref_delta = if self.compression_window != 0 {
506,370,590✔
283
            self.decoder.read_reference_offset() as usize
220,595,854✔
284
        } else {
285
            0
32,589,441✔
286
        };
287
        // if we copy nodes from a previous one
288
        if ref_delta != 0 {
253,185,295✔
289
            // compute the node id of the reference
290
            let reference_node_id = node_id - ref_delta;
194,870,926✔
291
            // retrieve the data
292
            let successors = &self.backrefs[reference_node_id];
194,870,926✔
293
            // get the info on which destinations to copy
294
            let number_of_blocks = self.decoder.read_block_count() as usize;
194,870,926✔
295
            // no blocks, we copy everything
296
            if number_of_blocks == 0 {
130,801,401✔
297
                results.extend_from_slice(successors);
66,731,876✔
298
            } else {
299
                // otherwise we copy only the blocks of even index
300
                // the first block could be zero
301
                let mut idx = self.decoder.read_block() as usize;
128,139,050✔
302
                results.extend_from_slice(&successors[..idx]);
192,208,575✔
303

304
                // while the other can't
305
                for block_id in 1..number_of_blocks {
167,439,303✔
306
                    let block = self.decoder.read_block() as usize;
206,739,556✔
307
                    let end = idx + block + 1;
206,739,556✔
308
                    if block_id % 2 == 0 {
136,193,016✔
309
                        results.extend_from_slice(&successors[idx..end]);
131,292,952✔
310
                    }
311
                    idx = end;
103,369,778✔
312
                }
313
                if number_of_blocks & 1 == 0 {
101,792,827✔
314
                    results.extend_from_slice(&successors[idx..]);
113,169,906✔
315
                }
316
            }
317
        };
318

319
        // if we still have to read nodes
320
        let nodes_left_to_decode = degree - results.len();
759,555,885✔
321
        if nodes_left_to_decode != 0 && self.min_interval_length != 0 {
475,429,370✔
322
            // read the number of intervals
323
            let number_of_intervals = self.decoder.read_interval_count() as usize;
358,940,956✔
324
            if number_of_intervals != 0 {
179,470,478✔
325
                // pre-allocate with capacity for efficiency
326
                let node_id_offset = self.decoder.read_interval_start().to_int();
177,185,500✔
327
                let mut start = (node_id as i64 + node_id_offset) as usize;
88,592,750✔
328
                let mut delta = self.decoder.read_interval_len() as usize;
88,592,750✔
329
                delta += self.min_interval_length;
44,296,375✔
330
                // save the first interval
331
                results.extend(start..(start + delta));
177,185,500✔
332
                start += delta;
44,296,375✔
333
                // decode the intervals
334
                for _ in 1..number_of_intervals {
70,726,457✔
335
                    start += 1 + self.decoder.read_interval_start() as usize;
52,860,164✔
336
                    delta = self.decoder.read_interval_len() as usize;
52,860,164✔
337
                    delta += self.min_interval_length;
52,860,164✔
338

339
                    results.extend(start..(start + delta));
105,720,328✔
340

341
                    start += delta;
26,430,082✔
342
                }
343
            }
344
        }
345

346
        // decode the extra nodes if needed
347
        let nodes_left_to_decode = degree - results.len();
759,555,885✔
348
        if nodes_left_to_decode != 0 {
253,185,295✔
349
            // pre-allocate with capacity for efficiency
350
            let node_id_offset = self.decoder.read_first_residual().to_int();
879,725,300✔
351
            let mut extra = (node_id as i64 + node_id_offset) as usize;
439,862,650✔
352
            results.push(extra);
659,793,975✔
353
            // decode the successive extra nodes
354
            for _ in 1..nodes_left_to_decode {
1,456,955,504✔
355
                extra += 1 + self.decoder.read_residual() as usize;
2,147,483,647✔
356
                results.push(extra);
2,147,483,647✔
357
            }
358
        }
359

360
        results.sort();
253,185,295✔
361
        Ok(())
253,185,295✔
362
    }
363
}
364

365
impl<'succ, D: Decode> NodeLabelsLender<'succ> for NodeLabels<D> {
366
    type Label = usize;
367
    type IntoIterator = crate::traits::labels::AssumeSortedIterator<
368
        std::iter::Copied<std::slice::Iter<'succ, Self::Label>>,
369
    >;
370
}
371

372
impl<'succ, D: Decode> Lending<'succ> for NodeLabels<D> {
373
    type Lend = (usize, <Self as NodeLabelsLender<'succ>>::IntoIterator);
374
}
375

376
impl<D: Decode> Lender for NodeLabels<D> {
377
    check_covariance!();
378

379
    fn next(&mut self) -> Option<Lend<'_, Self>> {
288,942,676✔
380
        if self.current_node >= self.number_of_nodes {
288,942,676✔
381
            return None;
243✔
382
        }
383
        let mut res = self.backrefs.take(self.current_node);
1,155,769,732✔
384
        res.clear();
577,884,866✔
385
        self.get_successors_iter_priv(self.current_node, &mut res)
1,155,769,732✔
386
            .unwrap();
387

388
        let res = self.backrefs.replace(self.current_node, res);
1,444,712,165✔
389
        let node_id = self.current_node;
577,884,866✔
390
        self.current_node += 1;
288,942,433✔
391
        Some((node_id, unsafe {
577,884,866✔
392
            crate::traits::labels::AssumeSortedIterator::new(res.iter().copied())
577,884,866✔
393
        }))
394
    }
395

396
    fn size_hint(&self) -> (usize, Option<usize>) {
3,581,241✔
397
        let len = self.len();
10,743,723✔
398
        (len, Some(len))
3,581,241✔
399
    }
400
}
401

402
// SAFETY: BvGraph successors are always returned in sorted order.
403
unsafe impl<D: Decode> SortedLender for NodeLabels<D> {}
404

405
impl<D: Decode> ExactSizeLender for NodeLabels<D> {
406
    #[inline(always)]
407
    fn len(&self) -> usize {
325,616✔
408
        self.number_of_nodes - self.current_node
325,616✔
409
    }
410
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc