• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 22074206784

16 Feb 2026 06:46PM UTC coverage: 72.192% (-0.1%) from 72.324%
22074206784

push

github

vigna
Better visit tests

6049 of 8379 relevant lines covered (72.19%)

49396607.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.4
/webgraph/src/utils/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
//! Miscellaneous utilities.
9

10
use rand::RngExt;
11
use std::path::PathBuf;
12

13
/// Creates a new random dir inside the given folder
14
pub fn temp_dir<P: AsRef<std::path::Path>>(base: P) -> anyhow::Result<PathBuf> {
1✔
15
    let mut base = base.as_ref().to_owned();
3✔
16
    const ALPHABET: &[u8] = b"0123456789abcdef";
17
    let mut rnd = rand::rng();
2✔
18
    let mut random_str = String::new();
2✔
19
    loop {
×
20
        random_str.clear();
2✔
21
        for _ in 0..16 {
17✔
22
            let idx = rnd.random_range(0..ALPHABET.len());
80✔
23
            random_str.push(ALPHABET[idx] as char);
48✔
24
        }
25
        base.push(&random_str);
3✔
26

27
        if !base.exists() {
1✔
28
            std::fs::create_dir(&base)?;
2✔
29
            return Ok(base);
1✔
30
        }
31
        base.pop();
×
32
    }
33
}
34

35
mod batch_codec;
36
pub use batch_codec::*;
37

38
mod circular_buffer;
39
pub(crate) use circular_buffer::*;
40

41
mod ragged_array;
42
pub use ragged_array::RaggedArray;
43

44
mod mmap_helper;
45
pub use mmap_helper::*;
46

47
mod java_perm;
48
pub use java_perm::*;
49

50
mod granularity;
51
pub use granularity::*;
52

53
pub mod matrix;
54
pub use matrix::Matrix;
55

56
pub mod sort_pairs;
57
pub use sort_pairs::SortPairs;
58

59
pub mod par_sort_pairs;
60
pub use par_sort_pairs::ParSortPairs;
61

62
pub mod par_sort_iters;
63
pub use par_sort_iters::ParSortIters;
64

65
use crate::graphs::{
66
    arc_list_graph::NodeLabels,
67
    bvgraph::{Decode, Encode},
68
};
69

70
/// A decoder that encodes the read values using the given encoder.
71
/// This is commonly used to change the codes of a graph without decoding and
72
/// re-encoding it but by changing the codes.
73
pub struct Converter<D: Decode, E: Encode> {
74
    pub decoder: D,
75
    pub encoder: E,
76
    pub offset: usize,
77
}
78

79
impl<D: Decode, E: Encode> Decode for Converter<D, E> {
80
    // TODO: implement correctly start_node/end_node
81
    #[inline(always)]
82
    fn read_outdegree(&mut self) -> u64 {
1✔
83
        let res = self.decoder.read_outdegree();
3✔
84
        self.offset += self.encoder.write_outdegree(res).unwrap();
3✔
85
        res
1✔
86
    }
87
    #[inline(always)]
88
    fn read_reference_offset(&mut self) -> u64 {
1✔
89
        let res = self.decoder.read_reference_offset();
3✔
90
        self.offset += self.encoder.write_reference_offset(res).unwrap();
3✔
91
        res
1✔
92
    }
93
    #[inline(always)]
94
    fn read_block_count(&mut self) -> u64 {
1✔
95
        let res = self.decoder.read_block_count();
3✔
96
        self.offset += self.encoder.write_block_count(res).unwrap();
3✔
97
        res
1✔
98
    }
99
    #[inline(always)]
100
    fn read_block(&mut self) -> u64 {
1✔
101
        let res = self.decoder.read_block();
3✔
102
        self.offset += self.encoder.write_block(res).unwrap();
3✔
103
        res
1✔
104
    }
105
    #[inline(always)]
106
    fn read_interval_count(&mut self) -> u64 {
1✔
107
        let res = self.decoder.read_interval_count();
3✔
108
        self.offset += self.encoder.write_interval_count(res).unwrap();
3✔
109
        res
1✔
110
    }
111
    #[inline(always)]
112
    fn read_interval_start(&mut self) -> u64 {
1✔
113
        let res = self.decoder.read_interval_start();
3✔
114
        self.offset += self.encoder.write_interval_start(res).unwrap();
3✔
115
        res
1✔
116
    }
117
    #[inline(always)]
118
    fn read_interval_len(&mut self) -> u64 {
1✔
119
        let res = self.decoder.read_interval_len();
3✔
120
        self.offset += self.encoder.write_interval_len(res).unwrap();
3✔
121
        res
1✔
122
    }
123
    #[inline(always)]
124
    fn read_first_residual(&mut self) -> u64 {
1✔
125
        let res = self.decoder.read_first_residual();
3✔
126
        self.offset += self.encoder.write_first_residual(res).unwrap();
3✔
127
        res
1✔
128
    }
129
    #[inline(always)]
130
    fn read_residual(&mut self) -> u64 {
1✔
131
        let res = self.decoder.read_residual();
3✔
132
        self.offset += self.encoder.write_residual(res).unwrap();
3✔
133
        res
1✔
134
    }
135
    #[inline(always)]
136
    fn num_of_residuals(&mut self, num_of_residuals: usize) {
1✔
137
        self.encoder.num_of_residuals(num_of_residuals);
3✔
138
    }
139
}
140

141
/// An enum expressing the memory requirements for batched algorithms
142
/// such as [`SortPairs`], [`ParSortPairs`], and [`ParSortIters`].
143
///
144
/// This type implements [`Mul`](core::ops::Mul) and [`Div`](core::ops::Div) to
145
/// scale the memory usage requirements by a given factor, independently of the
146
/// variant.
147
#[derive(Clone, Copy, Debug)]
148
pub enum MemoryUsage {
149
    /// The target overall memory usage in bytes.
150
    ///
151
    /// This is the more user-friendly option. The actual number of elements
152
    /// can be computed using [`batch_size`](MemoryUsage::batch_size).
153
    MemorySize(usize),
154
    /// The number of elements used in all batches.
155
    ///
156
    /// This is a more low-level option that gives more control to the user, but
157
    /// the actual memory usage will depend on the size of labels (if any).
158
    BatchSize(usize),
159
}
160

161
/// Default implementation, returning half of the physical RAM.
162
impl Default for MemoryUsage {
163
    fn default() -> Self {
76✔
164
        Self::from_perc(50.0)
76✔
165
    }
166
}
167

168
impl core::fmt::Display for MemoryUsage {
169
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4✔
170
        match self {
4✔
171
            MemoryUsage::MemorySize(size) => write!(f, "{} bytes", size),
12✔
172
            MemoryUsage::BatchSize(size) => write!(f, "{} elements", size),
×
173
        }
174
    }
175
}
176

177
impl core::ops::Mul<usize> for MemoryUsage {
178
    type Output = MemoryUsage;
179

180
    fn mul(self, rhs: usize) -> Self::Output {
8✔
181
        match self {
8✔
182
            MemoryUsage::MemorySize(size) => MemoryUsage::MemorySize(size * rhs),
8✔
183
            MemoryUsage::BatchSize(size) => MemoryUsage::BatchSize(size * rhs),
8✔
184
        }
185
    }
186
}
187

188
impl core::ops::Div<usize> for MemoryUsage {
189
    type Output = MemoryUsage;
190

191
    fn div(self, rhs: usize) -> Self::Output {
40✔
192
        match self {
40✔
193
            MemoryUsage::MemorySize(size) => MemoryUsage::MemorySize(size / rhs),
40✔
194
            MemoryUsage::BatchSize(size) => MemoryUsage::BatchSize(size / rhs),
40✔
195
        }
196
    }
197
}
198

199
impl MemoryUsage {
200
    /// Creates a new memory usage expressed as a percentage of the
201
    /// physical RAM.
202
    pub fn from_perc(perc: f64) -> Self {
88✔
203
        let system = sysinfo::System::new_with_specifics(
204
            sysinfo::RefreshKind::nothing()
88✔
205
                .with_memory(sysinfo::MemoryRefreshKind::nothing().with_ram()),
264✔
206
        );
207
        MemoryUsage::MemorySize(
208
            usize::try_from((system.total_memory() as f64 * perc / 100.0) as u64)
264✔
209
                .expect("System memory overflows usize"),
88✔
210
        )
211
    }
212

213
    /// Returns the batch size for elements of type `T`.
214
    ///
215
    /// If the [memory usage is expressed as a number of
216
    /// bytes](MemoryUsage::MemorySize), this method divides the number of bytes
217
    /// by the size of `T` to obtain the number of elements. Otherwise, [it just
218
    /// returns specified batch size](MemoryUsage::BatchSize).
219
    pub fn batch_size<T>(&self) -> usize {
188✔
220
        match &self {
188✔
221
            MemoryUsage::MemorySize(memory_size) => {
46✔
222
                let elem_size = std::mem::size_of::<T>();
92✔
223
                assert!(elem_size > 0, "Element size cannot be zero");
92✔
224
                memory_size / elem_size
46✔
225
            }
226
            MemoryUsage::BatchSize(batch_size) => *batch_size,
284✔
227
        }
228
    }
229
}
230

231
/// Writes a human-readable representation of a large number using SI prefixes units.
232
pub fn humanize(value: f64) -> String {
188✔
233
    const UNITS: &[&str] = &["", "K", "M", "G", "T", "P", "E"];
234
    let mut v = value;
376✔
235
    let mut unit: usize = 0;
564✔
236
    while v >= 1000.0 && unit + 1 < UNITS.len() {
796✔
237
        v /= 1000.0;
152✔
238
        unit += 1;
152✔
239
    }
240
    if unit == 0 {
188✔
241
        format!("{:.0}{}", v, UNITS[unit])
144✔
242
    } else {
243
        format!("{:.3}{}", v, UNITS[unit])
232✔
244
    }
245
}
246

247
/// A structure holding partition iterators and their boundaries.
248
///
249
/// This type holds a list of iterators and a list of boundaries, one more
250
/// than the number of iterators. The implied semantics is that each iterator
251
/// returns (labelled) pairs of nodes, and that the first element of
252
/// each pair sits between the boundaries associated with the iterator.
253
///
254
/// This structures is returned by [`ParSortPairs`] and [`ParSortIters`] and can
255
/// easily be converted into lenders for use with
256
/// [`BvCompConfig::par_comp_lenders`](crate::graphs::bvgraph::BvCompConfig::par_comp_lenders)
257
/// using a convenient implementation of the [`From`] trait.
258
///
259
/// Note that it sufficient to write `let lenders: Vec<_> = split_iters.into()`
260
/// to perform the conversion. Type inference might not work properly if the
261
/// call is embedded in a larger expression, in which case an explicit type
262
/// annotation might be necessary.
263
pub struct SplitIters<I> {
264
    pub boundaries: Box<[usize]>,
265
    pub iters: Box<[I]>,
266
}
267

268
impl<I> SplitIters<I> {
269
    pub fn new(boundaries: Box<[usize]>, iters: Box<[I]>) -> Self {
23✔
270
        Self { boundaries, iters }
271
    }
272
}
273

274
impl<I> From<(Box<[usize]>, Box<[I]>)> for SplitIters<I> {
275
    fn from((boundaries, iters): (Box<[usize]>, Box<[I]>)) -> Self {
1✔
276
        Self::new(boundaries, iters)
3✔
277
    }
278
}
279

280
/// Conversion of a [`SplitIters`] of iterators on unlabeled pairs into a
281
/// sequence of pairs of starting points and associated lenders.
282
///
283
/// This is useful for converting the output of sorting utilities like
284
/// [`ParSortPairs`] or [`ParSortIters`] into a form suitable for
285
/// [`BvCompConfig::par_comp_lenders`](crate::graphs::bvgraph::BvCompConfig::par_comp_lenders)
286
/// when working with unlabeled graphs.
287
///
288
/// The pairs `(src, dst)` are automatically converted to labeled form with unit
289
/// labels, and the resulting lenders are wrapped with
290
/// [`LeftIterator`](crate::labels::proj::LeftIterator) to project out just the
291
/// successor nodes.
292
///
293
/// Note that it sufficient to write `let lenders: Vec<_> = split_iters.into()`
294
/// to perform the conversion. Type inference might not work properly if the
295
/// call is embedded in a larger expression, in which case an explicit type
296
/// annotation might be necessary.
297
impl<
298
    I: Iterator<Item = (usize, usize)> + Send + Sync,
299
    IT: IntoIterator<Item = (usize, usize), IntoIter = I>,
300
> From<SplitIters<IT>>
301
    for Vec<
302
        crate::labels::proj::LeftIterator<
303
            NodeLabels<(), std::iter::Map<I, fn((usize, usize)) -> ((usize, usize), ())>>,
304
        >,
305
    >
306
{
307
    fn from(split: SplitIters<IT>) -> Self {
3✔
308
        Box::into_iter(split.iters)
6✔
309
            .enumerate()
310
            .map(|(i, iter)| {
13✔
311
                let start_node = split.boundaries[i];
20✔
312
                let end_node = split.boundaries[i + 1];
20✔
313
                let num_partition_nodes = end_node - start_node;
20✔
314
                // Map pairs to labeled pairs with unit labels
315
                #[allow(clippy::type_complexity)]
×
316
                let map_fn: fn((usize, usize)) -> ((usize, usize), ()) = |pair| (pair, ());
6,432,346✔
317
                let labeled_iter = iter.into_iter().map(map_fn);
50✔
318
                let lender =
10✔
319
                    NodeLabels::try_new_from(num_partition_nodes, labeled_iter, start_node)
40✔
320
                        .expect("Iterator should start from the expected first node");
20✔
321
                // Wrap with LeftIterator to project out just the successor
322
                crate::labels::proj::LeftIterator(lender)
10✔
323
            })
324
            .collect()
325
    }
326
}
327

328
/// Conversion of a [`SplitIters`] of iterators on labelled pairs into a
329
/// sequences of pairs of starting points and associated lenders.
330
///
331
/// This is useful for converting the output of sorting utilities like
332
/// [`ParSortPairs`] or [`ParSortIters`] into a form suitable for
333
/// [`BvCompConfig::par_comp_lenders`](crate::graphs::bvgraph::BvCompConfig::par_comp_lenders).
334
///
335
/// Note that it sufficient to write `let lenders: Vec<_> = split_iters.into()`
336
/// to perform the conversion. Type inference might not work properly if the
337
/// call is embedded in a larger expression, in which case an explicit type
338
/// annotation might be necessary.
339
impl<
340
    L: Clone + Copy + 'static,
341
    I: Iterator<Item = ((usize, usize), L)> + Send + Sync,
342
    IT: IntoIterator<Item = ((usize, usize), L), IntoIter = I>,
343
> From<SplitIters<IT>> for Vec<NodeLabels<L, I>>
344
{
345
    fn from(split: SplitIters<IT>) -> Self {
2✔
346
        Box::into_iter(split.iters)
4✔
347
            .enumerate()
348
            .map(|(i, iter)| {
7✔
349
                let start_node = split.boundaries[i];
10✔
350
                let end_node = split.boundaries[i + 1];
10✔
351
                let num_partition_nodes = end_node - start_node;
10✔
352

353
                NodeLabels::try_new_from(num_partition_nodes, iter.into_iter(), start_node)
25✔
354
                    .expect("Iterator should start from the expected first node")
10✔
355
            })
356
            .collect()
357
    }
358
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc