• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 19278972970

11 Nov 2025 09:25PM UTC coverage: 62.316% (+14.3%) from 48.052%
19278972970

push

github

zommiommy
BatchCodec print stats and encoding time

40 of 53 new or added lines in 4 files covered. (75.47%)

814 existing lines in 46 files now uncovered.

5252 of 8428 relevant lines covered (62.32%)

29580067.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

57.01
/webgraph/src/utils/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
//! Miscellaneous utilities.
9

10
use rand::Rng;
11
use std::path::PathBuf;
12

13
/// Creates a new random dir inside the given folder
14
pub fn temp_dir<P: AsRef<std::path::Path>>(base: P) -> anyhow::Result<PathBuf> {
9✔
15
    let mut base = base.as_ref().to_owned();
27✔
16
    const ALPHABET: &[u8] = b"0123456789abcdef";
17
    let mut rnd = rand::rng();
18✔
18
    let mut random_str = String::new();
18✔
UNCOV
19
    loop {
×
20
        random_str.clear();
18✔
21
        for _ in 0..16 {
297✔
22
            let idx = rnd.random_range(0..ALPHABET.len());
720✔
23
            random_str.push(ALPHABET[idx] as char);
432✔
24
        }
25
        base.push(&random_str);
27✔
26

27
        if !base.exists() {
9✔
28
            std::fs::create_dir(&base)?;
18✔
29
            return Ok(base);
9✔
30
        }
UNCOV
31
        base.pop();
×
32
    }
33
}
34

35
mod batch_codec;
36
pub use batch_codec::*;
37

38
mod circular_buffer;
39
pub(crate) use circular_buffer::*;
40

41
mod mmap_helper;
42
pub use mmap_helper::*;
43

44
mod java_perm;
45
pub use java_perm::*;
46

47
mod granularity;
48
pub use granularity::*;
49

50
pub mod sort_pairs;
51
pub use sort_pairs::SortPairs;
52

53
pub mod par_sort_pairs;
54
pub use par_sort_pairs::ParSortPairs;
55

56
pub mod par_sort_iters;
57
pub use par_sort_iters::ParSortIters;
58

59
use crate::graphs::{
60
    arc_list_graph::Iter,
61
    bvgraph::{Decode, Encode},
62
};
63

64
/// A decoder that encodes the read values using the given encoder.
65
/// This is commonly used to change the codes of a graph without decoding and
66
/// re-encoding it but by changing the codes.
67
pub struct Converter<D: Decode, E: Encode> {
68
    pub decoder: D,
69
    pub encoder: E,
70
    pub offset: usize,
71
}
72

73
impl<D: Decode, E: Encode> Decode for Converter<D, E> {
74
    // TODO: implement correctly start_node/end_node
75
    #[inline(always)]
UNCOV
76
    fn read_outdegree(&mut self) -> u64 {
×
UNCOV
77
        let res = self.decoder.read_outdegree();
×
UNCOV
78
        self.offset += self.encoder.write_outdegree(res).unwrap();
×
UNCOV
79
        res
×
80
    }
81
    #[inline(always)]
UNCOV
82
    fn read_reference_offset(&mut self) -> u64 {
×
UNCOV
83
        let res = self.decoder.read_reference_offset();
×
UNCOV
84
        self.offset += self.encoder.write_reference_offset(res).unwrap();
×
UNCOV
85
        res
×
86
    }
87
    #[inline(always)]
UNCOV
88
    fn read_block_count(&mut self) -> u64 {
×
UNCOV
89
        let res = self.decoder.read_block_count();
×
UNCOV
90
        self.offset += self.encoder.write_block_count(res).unwrap();
×
91
        res
×
92
    }
93
    #[inline(always)]
UNCOV
94
    fn read_block(&mut self) -> u64 {
×
UNCOV
95
        let res = self.decoder.read_block();
×
UNCOV
96
        self.offset += self.encoder.write_block(res).unwrap();
×
UNCOV
97
        res
×
98
    }
99
    #[inline(always)]
UNCOV
100
    fn read_interval_count(&mut self) -> u64 {
×
UNCOV
101
        let res = self.decoder.read_interval_count();
×
UNCOV
102
        self.offset += self.encoder.write_interval_count(res).unwrap();
×
UNCOV
103
        res
×
104
    }
105
    #[inline(always)]
106
    fn read_interval_start(&mut self) -> u64 {
×
UNCOV
107
        let res = self.decoder.read_interval_start();
×
UNCOV
108
        self.offset += self.encoder.write_interval_start(res).unwrap();
×
UNCOV
109
        res
×
110
    }
111
    #[inline(always)]
UNCOV
112
    fn read_interval_len(&mut self) -> u64 {
×
UNCOV
113
        let res = self.decoder.read_interval_len();
×
UNCOV
114
        self.offset += self.encoder.write_interval_len(res).unwrap();
×
UNCOV
115
        res
×
116
    }
117
    #[inline(always)]
118
    fn read_first_residual(&mut self) -> u64 {
×
UNCOV
119
        let res = self.decoder.read_first_residual();
×
UNCOV
120
        self.offset += self.encoder.write_first_residual(res).unwrap();
×
UNCOV
121
        res
×
122
    }
123
    #[inline(always)]
UNCOV
124
    fn read_residual(&mut self) -> u64 {
×
UNCOV
125
        let res = self.decoder.read_residual();
×
UNCOV
126
        self.offset += self.encoder.write_residual(res).unwrap();
×
UNCOV
127
        res
×
128
    }
129
}
130

131
/// An enum expressing the memory requirements for batched algorithms
132
/// such as [`SortPairs`], [`ParSortPairs`], and [`ParSortIters`].
133
///
134
/// This type implements [`Mul`](core::ops::Mul) and [`Div`](core::ops::Div) to
135
/// scale the memory usage requirements by a given factor, independently of the
136
/// variant.
137
#[derive(Clone, Copy, Debug)]
138
pub enum MemoryUsage {
139
    /// The target overall memory usage in bytes.
140
    ///
141
    /// This is the more user-friendly option. The actual number of elements
142
    /// can be computed using [`batch_size`](MemoryUsage::batch_size).
143
    MemorySize(usize),
144
    /// The number of elements used in all batches.
145
    ///
146
    /// This is a more low-level option that gives more control to the user, but
147
    /// the actual memory usage will depend on the size of labels (if any).
148
    BatchSize(usize),
149
}
150

151
/// Default implementation, returning half of the physical RAM.
152
impl Default for MemoryUsage {
153
    fn default() -> Self {
16✔
154
        Self::from_perc(50.0)
16✔
155
    }
156
}
157

158
impl core::fmt::Display for MemoryUsage {
159
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4✔
160
        match self {
4✔
161
            MemoryUsage::MemorySize(size) => write!(f, "{} bytes", size),
16✔
162
            MemoryUsage::BatchSize(size) => write!(f, "{} elements", size),
×
163
        }
164
    }
165
}
166

167
impl core::ops::Mul<usize> for MemoryUsage {
168
    type Output = MemoryUsage;
169

UNCOV
170
    fn mul(self, rhs: usize) -> Self::Output {
×
UNCOV
171
        match self {
×
172
            MemoryUsage::MemorySize(size) => MemoryUsage::MemorySize(size * rhs),
×
173
            MemoryUsage::BatchSize(size) => MemoryUsage::BatchSize(size * rhs),
×
174
        }
175
    }
176
}
177

178
impl core::ops::Div<usize> for MemoryUsage {
179
    type Output = MemoryUsage;
180

181
    fn div(self, rhs: usize) -> Self::Output {
16✔
182
        match self {
16✔
183
            MemoryUsage::MemorySize(size) => MemoryUsage::MemorySize(size / rhs),
32✔
184
            MemoryUsage::BatchSize(size) => MemoryUsage::BatchSize(size / rhs),
×
185
        }
186
    }
187
}
188

189
impl MemoryUsage {
190
    /// Creates a new memory usage expressed as a percentage of the
191
    /// physical RAM.
192
    pub fn from_perc(perc: f64) -> Self {
24✔
193
        let system = sysinfo::System::new_with_specifics(
194
            sysinfo::RefreshKind::nothing()
24✔
195
                .with_memory(sysinfo::MemoryRefreshKind::nothing().with_ram()),
72✔
196
        );
197
        MemoryUsage::MemorySize(
198
            usize::try_from((system.total_memory() as f64 * perc / 100.0) as u64)
72✔
199
                .expect("System memory overflows usize"),
24✔
200
        )
201
    }
202

203
    /// Returns the batch size for elements of type `T`.
204
    ///
205
    /// If the [memory usage is expressed as a number of
206
    /// bytes](MemoryUsage::MemorySize), this method divides the number of bytes
207
    /// by the size of `T` to obtain the number of elements. Otherwise, [it just
208
    /// returns specified batch size](MemoryUsage::BatchSize).
209
    pub fn batch_size<T>(&self) -> usize {
135✔
210
        match &self {
135✔
211
            MemoryUsage::MemorySize(memory_size) => {
35✔
212
                let elem_size = std::mem::size_of::<T>();
70✔
213
                assert!(elem_size > 0, "Element size cannot be zero");
70✔
214
                memory_size / elem_size
35✔
215
            }
216
            MemoryUsage::BatchSize(batch_size) => *batch_size,
200✔
217
        }
218
    }
219
}
220

221
/// Writes a human-readable representation of a large number using SI prefixes units.
222
pub fn humanize(value: f64) -> String {
128✔
223
    const UNITS: &[&str] = &["", "K", "M", "G", "T", "P", "E"];
224
    let mut v = value;
256✔
225
    let mut unit: usize = 0;
384✔
226
    while v >= 1000.0 && unit + 1 < UNITS.len() {
560✔
227
        v /= 1000.0;
108✔
228
        unit += 1;
108✔
229
    }
230
    if unit == 0 {
128✔
231
        format!("{:.0}{}", v, UNITS[unit])
96✔
232
    } else {
233
        format!("{:.3}{}", v, UNITS[unit])
288✔
234
    }
235
}
236

237
/// A structure holding partition iterators and their boundaries.
238
///
239
/// This type holds a list of iterators and a list of boundaries, one more
240
/// than the number of iterators. The implied semantics is that each iterator
241
/// returns (labelled) pairs of nodes, and that the first element of
242
/// each pair sits between the boundaries associated with the iterator.
243
///
244
/// This structures is returned by [`ParSortPairs`] and [`ParSortIters`] and can
245
/// easily be converted into lenders for use with
246
/// [`BvComp::parallel_iter`](crate::graphs::bvgraph::BvComp::parallel_iter)
247
/// using a convenient implementation of the [`From`] trait.
248
///
249
/// Note that it sufficient to write `let lenders: Vec<_> = split_iters.into()`
250
/// to perform the conversion. Type inference might not work properly if the
251
/// call is embedded in a larger expression, in which case an explicit type
252
/// annotation might be necessary.
253
pub struct SplitIters<I> {
254
    pub boundaries: Box<[usize]>,
255
    pub iters: Box<[I]>,
256
}
257

258
impl<I> SplitIters<I> {
259
    pub fn new(boundaries: Box<[usize]>, iters: Box<[I]>) -> Self {
2✔
260
        Self { boundaries, iters }
261
    }
262
}
263

264
impl<I> From<(Box<[usize]>, Box<[I]>)> for SplitIters<I> {
UNCOV
265
    fn from((boundaries, iters): (Box<[usize]>, Box<[I]>)) -> Self {
×
UNCOV
266
        Self::new(boundaries, iters)
×
267
    }
268
}
269

270
/// Conversion of a [`SplitIters`] of iterators on unlabeled pairs into a
271
/// sequence of pairs of starting points and associated lenders.
272
///
273
/// This is useful for converting the output of sorting utilities like
274
/// [`ParSortPairs`] or [`ParSortIters`] into a form suitable for
275
/// [`BvComp::parallel_iter`](crate::graphs::bvgraph::BvComp::parallel_iter)
276
/// when working with unlabeled graphs.
277
///
278
/// The pairs `(src, dst)` are automatically converted to labeled form with unit
279
/// labels, and the resulting lenders are wrapped with
280
/// [`LeftIterator`](crate::labels::proj::LeftIterator) to project out just the
281
/// successor nodes.
282
///
283
/// Note that it sufficient to write `let lenders: Vec<_> = split_iters.into()`
284
/// to perform the conversion. Type inference might not work properly if the
285
/// call is embedded in a larger expression, in which case an explicit type
286
/// annotation might be necessary.
287
impl<
288
        I: Iterator<Item = (usize, usize)> + Send + Sync,
289
        IT: IntoIterator<Item = (usize, usize), IntoIter = I>,
290
    > From<SplitIters<IT>>
291
    for Vec<
292
        crate::labels::proj::LeftIterator<
293
            Iter<(), std::iter::Map<I, fn((usize, usize)) -> ((usize, usize), ())>>,
294
        >,
295
    >
296
{
297
    fn from(split: SplitIters<IT>) -> Self {
1✔
298
        Box::into_iter(split.iters)
2✔
299
            .enumerate()
300
            .map(|(i, iter)| {
5✔
301
                let start_node = split.boundaries[i];
8✔
302
                let end_node = split.boundaries[i + 1];
8✔
303
                let num_partition_nodes = end_node - start_node;
8✔
304
                // Map pairs to labeled pairs with unit labels
305
                let map_fn: fn((usize, usize)) -> ((usize, usize), ()) = |pair| (pair, ());
20✔
306
                let labeled_iter = iter.into_iter().map(map_fn);
20✔
307
                let lender = Iter::try_new_from(num_partition_nodes, labeled_iter, start_node)
20✔
308
                    .expect("Iterator should start from the expected first node");
8✔
309
                // Wrap with LeftIterator to project out just the successor
310
                crate::labels::proj::LeftIterator(lender)
4✔
311
            })
312
            .collect()
313
    }
314
}
315

316
/// Conversion of a [`SplitIters`] of iterators on labelled pairs into a
317
/// sequences of pairs of starting points and associated lenders.
318
///
319
/// This is useful for converting the output of sorting utilities like
320
/// [`ParSortPairs`] or [`ParSortIters`] into a form suitable for
321
/// [`BvComp::parallel_iter`](crate::graphs::bvgraph::BvComp::parallel_iter).
322
///
323
/// Note that it sufficient to write `let lenders: Vec<_> = split_iters.into()`
324
/// to perform the conversion. Type inference might not work properly if the
325
/// call is embedded in a larger expression, in which case an explicit type
326
/// annotation might be necessary.
327
impl<
328
        L: Clone + Copy + 'static,
329
        I: Iterator<Item = ((usize, usize), L)> + Send + Sync,
330
        IT: IntoIterator<Item = ((usize, usize), L), IntoIter = I>,
331
    > From<SplitIters<IT>> for Vec<Iter<L, I>>
332
{
333
    fn from(split: SplitIters<IT>) -> Self {
1✔
334
        Box::into_iter(split.iters)
2✔
335
            .enumerate()
336
            .map(|(i, iter)| {
4✔
337
                let start_node = split.boundaries[i];
6✔
338
                let end_node = split.boundaries[i + 1];
6✔
339
                let num_partition_nodes = end_node - start_node;
6✔
340

341
                Iter::try_new_from(num_partition_nodes, iter.into_iter(), start_node)
15✔
342
                    .expect("Iterator should start from the expected first node")
6✔
343
            })
344
            .collect()
345
    }
346
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc