• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 19278972970

11 Nov 2025 09:25PM UTC coverage: 62.316% (+14.3%) from 48.052%
19278972970

push

github

zommiommy
BatchCodec print stats and encoding time

40 of 53 new or added lines in 4 files covered. (75.47%)

814 existing lines in 46 files now uncovered.

5252 of 8428 relevant lines covered (62.32%)

29580067.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.0
/webgraph/src/utils/batch_codec/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3
 *
4
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5
 */
6

7
//! Traits and implementations to encode and decode batches of sorted triples
8
//! to/from disk.
9
//!
10
//! The traits and implementations in this module are used to customize the
11
//! encoding of batches of sorted triples to/from disk. They are used by
12
//! [`SortPairs`](crate::utils::sort_pairs::SortPairs) and other utilities built
13
//! on that (e.g.,
14
//! [`ParSortPairs`](crate::utils::par_sort_pairs::ParSortPairs)).
15
//!
16
//! They usually do not need to be accessed or modified by the end users, albeit
17
//! in some specific cases where performance or on-disk occupation is critical
18
//! they can be customized.
19

20
use anyhow::Result;
21

22
use super::ArcMmapHelper;
23
use core::fmt::Display;
24
use dsi_bitstream::prelude::*;
25
use rdst::*;
26
use std::fs::File;
27
use std::io::BufWriter;
28
use std::path::Path;
29

30
pub mod gaps;
31
pub mod grouped_gaps;
32

33
/// The recommended default batch codec for unlabelled batches.
34
pub type DefaultBatchCodec = grouped_gaps::GroupedGapsCodec;
35

36
pub type BitWriter<E> = BufBitWriter<E, WordAdapter<usize, BufWriter<File>>>;
37
pub type BitReader<E> = BufBitReader<E, MemWordReader<u32, ArcMmapHelper<u32>>>;
38

39
/// A trait for encoding and decoding batches of sorted triples.
40
pub trait BatchCodec: Send + Sync {
41
    /// The label type of the triples to encode and decode.
42
    /// While the bounds are not really necessary, in all the practical cases
43
    /// we need them.
44
    type Label: Copy + Send + Sync + 'static;
45
    //// The type returned by `decode_batch`, the iterator of which yields the
46
    //// decoded triples in sorted order.
47
    ///
48
    /// The type `IntoIter` has to be `Send + Sync + Clone` because most often we want
49
    /// to use them in [`SortPairs`](crate::utils::sort_pairs::SortPairs) and
50
    /// then in [`ArcListGraph`](crate::graphs::arc_list_graph::ArcListGraph)
51
    /// which require them.
52
    type DecodedBatch: IntoIterator<
53
        Item = ((usize, usize), Self::Label),
54
        IntoIter: Send + Sync + Clone,
55
    >;
56

57
    /// A type representing statistics about the encoded batch.
58
    /// This type has to implement `Display` so that we can log it.
59
    type EncodedBatchStats: Display;
60

61
    /// Given a batch of sorted triples, encodes them to disk and returns the
62
    /// number of bits written.
63
    ///
64
    /// Note that the input batch must be already sorted. Use
65
    /// [`encode_batch`](Self::encode_batch) otherwise.
66
    fn encode_sorted_batch(
67
        &self,
68
        path: impl AsRef<Path>,
69
        batch: &[((usize, usize), Self::Label)],
70
    ) -> Result<(usize, Self::EncodedBatchStats)>;
71

72
    /// Given a batch of triples, sort them, encodes them to disk, and returns
73
    /// the number of bits written.
74
    fn encode_batch(
75
        &self,
76
        path: impl AsRef<Path>,
77
        batch: &mut [((usize, usize), Self::Label)],
78
    ) -> Result<(usize, Self::EncodedBatchStats)>;
79

80
    /// Decodes a batch of triples from disk.
81
    ///
82
    /// The returned type's iterator yields the serialized triples in sorted order.
83
    fn decode_batch(&self, path: impl AsRef<Path>) -> Result<Self::DecodedBatch>;
84
}
85

86
/// Convenience alias to extract the iterator type of the decoded batch from a
87
/// [`BatchCodec`].
88
pub type CodecIter<C> = <<C as BatchCodec>::DecodedBatch as IntoIterator>::IntoIter;
89

90
/// An arc expressed as a pair of nodes and the associated label.
91
///
92
/// Equality and order are defined only (lexicographically) on the pair of
93
/// nodes.
94
///
95
/// Since we use this to sort a batch of `(usize, usize, L)` triples, in order
96
/// to safely transmute between the two types, Triple has to be
97
/// `repr(transparent)` of the same tuple type.
98
///
99
/// We use this to implement `RadixKey` for sorting batches of triples
100
/// using the [`rdst`](https://crates.io/crates/rdst) crate.
101
#[derive(Clone, Copy, Debug)]
102
#[repr(transparent)]
103
pub struct Triple<L>(((usize, usize), L));
104

105
impl<L> Triple<L> {
106
    /// slice of `Triple<L>`.
107
    ///
108
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
109
    /// same tuple type.
110
    pub fn cast_batch(batch: &[((usize, usize), L)]) -> &[Triple<L>] {
524✔
111
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
112
        unsafe { std::mem::transmute(batch) }
1,048✔
113
    }
114

115
    /// Converts a mutable reference to a slice of `((usize, usize), L)` triples
116
    /// into a mutable reference to a slice of `Triple<L>`.
117
    ///
118
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
119
    /// same tuple type.
120
    pub fn cast_batch_mut(batch: &mut [((usize, usize), L)]) -> &mut [Triple<L>] {
524✔
121
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
122
        unsafe { std::mem::transmute(batch) }
524✔
123
    }
124
}
125

126
impl<L> RadixKey for Triple<L> {
127
    const LEVELS: usize = 16;
128

129
    fn get_level(&self, level: usize) -> u8 {
843,975,504✔
130
        (if level < 8 {
843,975,504✔
131
            self.0 .0 .1 >> ((level % 8) * 8)
419,373,716✔
132
        } else {
133
            self.0 .0 .0 >> ((level % 8) * 8)
424,601,788✔
UNCOV
134
        }) as u8
×
135
    }
136
}
137

138
impl<L> PartialEq for Triple<L> {
UNCOV
139
    fn eq(&self, other: &Self) -> bool {
×
140
        self.0 .0 == other.0 .0
×
141
    }
142
}
143

144
impl<L> Eq for Triple<L> {}
145

146
impl<L> PartialOrd for Triple<L> {
147
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
38,585,551✔
148
        Some(self.cmp(other))
77,171,102✔
149
    }
150
}
151

152
impl<L> Ord for Triple<L> {
153
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
38,585,551✔
154
        self.0 .0.cmp(&other.0 .0)
115,756,653✔
155
    }
156
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc