• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 18573498435

16 Oct 2025 08:11PM UTC coverage: 48.064% (+0.04%) from 48.029%
18573498435

push

github

vigna
Docs consistency fix; minimal README.md for CLI and algo

8 of 12 new or added lines in 3 files covered. (66.67%)

433 existing lines in 14 files now uncovered.

3972 of 8264 relevant lines covered (48.06%)

22128303.67 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

39.42
/webgraph/src/utils/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
//! Miscellaneous utilities.
9

10
use rand::Rng;
11
use std::path::PathBuf;
12

13
/// Creates a new random dir inside the given folder
14
pub fn temp_dir<P: AsRef<std::path::Path>>(base: P) -> anyhow::Result<PathBuf> {
9✔
15
    let mut base = base.as_ref().to_owned();
27✔
16
    const ALPHABET: &[u8] = b"0123456789abcdef";
17
    let mut rnd = rand::rng();
18✔
18
    let mut random_str = String::new();
18✔
19
    loop {
×
20
        random_str.clear();
18✔
21
        for _ in 0..16 {
297✔
22
            let idx = rnd.random_range(0..ALPHABET.len());
144✔
23
            random_str.push(ALPHABET[idx] as char);
144✔
24
        }
25
        base.push(&random_str);
27✔
26

27
        if !base.exists() {
9✔
28
            std::fs::create_dir(&base)?;
18✔
29
            return Ok(base);
9✔
30
        }
31
        base.pop();
×
32
    }
33
}
34

35
mod circular_buffer;
36
pub(crate) use circular_buffer::*;
37

38
mod mmap_helper;
39
pub use mmap_helper::*;
40

41
mod java_perm;
42
pub use java_perm::*;
43

44
mod granularity;
45
pub use granularity::*;
46

47
pub mod sort_pairs;
48
pub use sort_pairs::SortPairs;
49

50
pub mod par_sort_pairs;
51
pub use par_sort_pairs::ParSortPairs;
52

53
pub mod par_sort_graph;
54
pub use par_sort_graph::ParSortGraph;
55

56
use crate::graphs::{
57
    arc_list_graph::Iter,
58
    bvgraph::{Decode, Encode},
59
};
60

61
/// A decoder that encodes the read values using the given encoder.
62
/// This is commonly used to change the codes of a graph without decoding and
63
/// re-encoding it but by changing the codes.
64
pub struct Converter<D: Decode, E: Encode> {
65
    pub decoder: D,
66
    pub encoder: E,
67
    pub offset: usize,
68
}
69

70
impl<D: Decode, E: Encode> Decode for Converter<D, E> {
71
    // TODO: implement correctly start_node/end_node
72
    #[inline(always)]
73
    fn read_outdegree(&mut self) -> u64 {
×
74
        let res = self.decoder.read_outdegree();
×
75
        self.offset += self.encoder.write_outdegree(res).unwrap();
×
76
        res
×
77
    }
78
    #[inline(always)]
79
    fn read_reference_offset(&mut self) -> u64 {
×
80
        let res = self.decoder.read_reference_offset();
×
81
        self.offset += self.encoder.write_reference_offset(res).unwrap();
×
82
        res
×
83
    }
84
    #[inline(always)]
85
    fn read_block_count(&mut self) -> u64 {
×
86
        let res = self.decoder.read_block_count();
×
87
        self.offset += self.encoder.write_block_count(res).unwrap();
×
88
        res
×
89
    }
90
    #[inline(always)]
91
    fn read_block(&mut self) -> u64 {
×
92
        let res = self.decoder.read_block();
×
93
        self.offset += self.encoder.write_block(res).unwrap();
×
94
        res
×
95
    }
96
    #[inline(always)]
97
    fn read_interval_count(&mut self) -> u64 {
×
98
        let res = self.decoder.read_interval_count();
×
99
        self.offset += self.encoder.write_interval_count(res).unwrap();
×
100
        res
×
101
    }
102
    #[inline(always)]
103
    fn read_interval_start(&mut self) -> u64 {
×
104
        let res = self.decoder.read_interval_start();
×
105
        self.offset += self.encoder.write_interval_start(res).unwrap();
×
106
        res
×
107
    }
108
    #[inline(always)]
109
    fn read_interval_len(&mut self) -> u64 {
×
110
        let res = self.decoder.read_interval_len();
×
111
        self.offset += self.encoder.write_interval_len(res).unwrap();
×
112
        res
×
113
    }
114
    #[inline(always)]
115
    fn read_first_residual(&mut self) -> u64 {
×
116
        let res = self.decoder.read_first_residual();
×
117
        self.offset += self.encoder.write_first_residual(res).unwrap();
×
118
        res
×
119
    }
120
    #[inline(always)]
121
    fn read_residual(&mut self) -> u64 {
×
122
        let res = self.decoder.read_residual();
×
123
        self.offset += self.encoder.write_residual(res).unwrap();
×
124
        res
×
125
    }
126
}
127

128
/// Utility macro to create [`thread_pools`](`rayon::ThreadPool`).
129
///
130
/// There are two forms of this macro:
131
/// * Create a [`ThreadPool`](rayon::ThreadPool) with the default settings:
132
/// ```
133
/// # use webgraph::thread_pool;
134
/// let t: rayon::ThreadPool = thread_pool![];
135
/// ```
136
/// * Create a [`ThreadPool`](rayon::ThreadPool) with a given number of threads:
137
/// ```
138
/// # use webgraph::thread_pool;
139
/// let t: rayon::ThreadPool = thread_pool![7];
140
/// assert_eq!(t.current_num_threads(), 7);
141
/// ```
142
#[macro_export]
143
macro_rules! thread_pool {
144
    () => {
145
        rayon::ThreadPoolBuilder::new()
146
            .build()
147
            .expect("Cannot build a ThreadPool with default parameters")
148
    };
149
    ($num_threads:expr) => {
150
        rayon::ThreadPoolBuilder::new()
151
            .num_threads($num_threads)
152
            .build()
153
            .unwrap_or_else(|_| {
×
154
                panic!(
×
155
                    "Cannot build a ThreadPool with default parameters and {} threads",
×
156
                    $num_threads,
157
                )
158
            })
159
    };
160
}
161

162
/// An enum expressing the memory requirements for batched algorithms
163
/// such as [`SortPairs`] and [`ParSortPairs`].
164
///
165
/// This type implements [`Mul`](core::ops::Mul) and [`Div`](core::ops::Div) to
166
/// scale the memory usage requirements by a given factor, independently of the
167
/// variant.
168
#[derive(Clone, Copy, Debug)]
169
pub enum MemoryUsage {
170
    /// The target overall memory usage in bytes.
171
    ///
172
    /// This is the more user-friendly option. The actual number of elements
173
    /// can be computed using [`batch_size`](MemoryUsage::batch_size).
174
    MemorySize(usize),
175
    /// The number of elements used in all batches.
176
    ///
177
    /// This is a more low-level option that gives more control to the user, but
178
    /// the actual memory usage will depend on the size of labels (if any).
179
    BatchSize(usize),
180
}
181

182
/// Default implementation, returning half of the physical RAM.
183
impl Default for MemoryUsage {
184
    fn default() -> Self {
12✔
185
        Self::from_perc(50.0)
12✔
186
    }
187
}
188

189
impl core::fmt::Display for MemoryUsage {
190
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4✔
191
        match self {
4✔
192
            MemoryUsage::MemorySize(size) => write!(f, "{} bytes", size),
4✔
193
            MemoryUsage::BatchSize(size) => write!(f, "{} elements", size),
×
194
        }
195
    }
196
}
197

198
impl core::ops::Mul<usize> for MemoryUsage {
199
    type Output = MemoryUsage;
200

201
    fn mul(self, rhs: usize) -> Self::Output {
×
202
        match self {
×
203
            MemoryUsage::MemorySize(size) => MemoryUsage::MemorySize(size * rhs),
×
204
            MemoryUsage::BatchSize(size) => MemoryUsage::BatchSize(size * rhs),
×
205
        }
206
    }
207
}
208

209
impl core::ops::Div<usize> for MemoryUsage {
210
    type Output = MemoryUsage;
211

212
    fn div(self, rhs: usize) -> Self::Output {
16✔
213
        match self {
16✔
214
            MemoryUsage::MemorySize(size) => MemoryUsage::MemorySize(size / rhs),
16✔
215
            MemoryUsage::BatchSize(size) => MemoryUsage::BatchSize(size / rhs),
×
216
        }
217
    }
218
}
219

220
impl MemoryUsage {
221
    /// Creates a new memory usage expressed as a percentage of the
222
    /// physical RAM.
223
    pub fn from_perc(perc: f64) -> Self {
20✔
224
        let system = sysinfo::System::new_with_specifics(
225
            sysinfo::RefreshKind::nothing()
20✔
226
                .with_memory(sysinfo::MemoryRefreshKind::nothing().with_ram()),
60✔
227
        );
228
        MemoryUsage::MemorySize(
229
            usize::try_from((system.total_memory() as f64 * perc / 100.0) as u64)
60✔
230
                .expect("System memory overflows usize"),
20✔
231
        )
232
    }
233

234
    /// Returns the batch size for elements of type `T`.
235
    ///
236
    /// If the [memory usage is expressed as a number of
237
    /// bytes](MemoryUsage::MemorySize), this method divides the number of bytes
238
    /// by the size of `T` to obtain the number of elements. Otherwise, [it just
239
    /// returns specified batch size](MemoryUsage::BatchSize).
240
    pub fn batch_size<T>(&self) -> usize {
133✔
241
        match &self {
133✔
242
            MemoryUsage::MemorySize(memory_size) => {
35✔
243
                let elem_size = std::mem::size_of::<T>();
×
244
                assert!(elem_size > 0, "Element size cannot be zero");
×
245
                memory_size / elem_size
35✔
246
            }
247
            MemoryUsage::BatchSize(batch_size) => *batch_size,
196✔
248
        }
249
    }
250
}
251

252
/// A structure holding partition iterators and their boundaries.
253
///
254
/// This type holds a list of iterators and a list of boundaries, one more
255
/// than the number of iterators. The implied semantics is that each iterator
256
/// returns (labelled) pairs of nodes, and that the first element of
257
/// each pair sits between the boundaries associated with the iterator.
258
///
259
/// This structures is returned by [`ParSortPairs`] and [`ParSortGraph`] and can
260
/// easily be converted into lenders for use with
261
/// [`BvComp::parallel_iter`](crate::graphs::bvgraph::BvComp::parallel_iter)
262
/// using a convenient implementation of the [`From`] trait.
263
pub struct SplitIters<I> {
264
    pub boundaries: Box<[usize]>,
265
    pub iters: Box<[I]>,
266
}
267

268
impl<I> SplitIters<I> {
269
    pub fn new(boundaries: Box<[usize]>, iters: Box<[I]>) -> Self {
1✔
270
        Self { boundaries, iters }
271
    }
272
}
273

274
impl<I> From<(Box<[usize]>, Box<[I]>)> for SplitIters<I> {
275
    fn from((boundaries, iters): (Box<[usize]>, Box<[I]>)) -> Self {
×
276
        Self::new(boundaries, iters)
×
277
    }
278
}
279

280
/// Conversion of a [`SplitIters`] of iterators on unlabeled pairs into a
281
/// sequence of pairs of starting points and associated lenders.
282
///
283
/// This is useful for converting the output of sorting utilities like
284
/// [`ParSortPairs`] or [`ParSortGraph`] into a form suitable for
285
/// [`BvComp::parallel_iter`](crate::graphs::bvgraph::BvComp::parallel_iter)
286
/// when working with unlabeled graphs.
287
///
288
/// The pairs `(src, dst)` are automatically converted to labeled form with unit
289
/// labels, and the resulting lenders are wrapped with
290
/// [`LeftIterator`](crate::labels::proj::LeftIterator) to project out just the
291
/// successor nodes.
292
impl<
293
        I: Iterator<Item = (usize, usize)> + Send + Sync,
294
        IT: IntoIterator<Item = (usize, usize), IntoIter = I>,
295
    > From<SplitIters<IT>>
296
    for Vec<(
297
        usize,
298
        crate::labels::proj::LeftIterator<
299
            Iter<(), std::iter::Map<I, fn((usize, usize)) -> (usize, usize, ())>>,
300
        >,
301
    )>
302
{
UNCOV
303
    fn from(split: SplitIters<IT>) -> Self {
×
UNCOV
304
        split
×
UNCOV
305
            .iters
×
306
            .into_vec()
307
            .into_iter()
308
            .enumerate()
UNCOV
309
            .map(|(i, iter)| {
×
UNCOV
310
                let start_node = split.boundaries[i];
×
UNCOV
311
                let end_node = split.boundaries[i + 1];
×
UNCOV
312
                let num_partition_nodes = end_node - start_node;
×
313
                // Map pairs to triples with unit labels
UNCOV
314
                let map_fn: fn((usize, usize)) -> (usize, usize, ()) = |(src, dst)| (src, dst, ());
×
UNCOV
315
                let labeled_iter = iter.into_iter().map(map_fn);
×
UNCOV
316
                let lender = Iter::try_new_from(num_partition_nodes, labeled_iter, start_node)
×
UNCOV
317
                    .expect("Iterator should start from the expected first node");
×
318
                // Wrap with LeftIterator to project out just the successor
UNCOV
319
                (start_node, crate::labels::proj::LeftIterator(lender))
×
320
            })
321
            .collect()
322
    }
323
}
324

325
/// Conversion of a [`SplitIters`] of iterators on labelled pairs into a
326
/// sequences of pairs of starting points and associated lenders.
327
///
328
/// This is useful for converting the output of sorting utilities like
329
/// [`ParSortPairs`] or [`ParSortGraph`] into a form suitable for
330
/// [`BvComp::parallel_iter`](crate::graphs::bvgraph::BvComp::parallel_iter).
331
impl<
332
        L: Clone + Copy + 'static,
333
        I: Iterator<Item = (usize, usize, L)> + Send + Sync,
334
        IT: IntoIterator<Item = (usize, usize, L), IntoIter = I>,
335
    > From<SplitIters<IT>> for Vec<(usize, Iter<L, I>)>
336
{
337
    fn from(split: SplitIters<IT>) -> Self {
1✔
338
        split
1✔
339
            .iters
1✔
340
            .into_vec()
341
            .into_iter()
342
            .enumerate()
343
            .map(|(i, iter)| {
4✔
344
                let start_node = split.boundaries[i];
6✔
345
                let end_node = split.boundaries[i + 1];
6✔
346
                let num_partition_nodes = end_node - start_node;
6✔
347
                let lender = Iter::try_new_from(num_partition_nodes, iter.into_iter(), start_node)
18✔
348
                    .expect("Iterator should start from the expected first node");
6✔
349
                (start_node, lender)
3✔
350
            })
351
            .collect()
352
    }
353
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc