• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 24635666654

19 Apr 2026 06:07PM UTC coverage: 66.542% (-0.4%) from 66.986%
24635666654

push

github

vigna
Refined methods

7 of 25 new or added lines in 5 files covered. (28.0%)

670 existing lines in 36 files now uncovered.

6752 of 10147 relevant lines covered (66.54%)

46688126.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.0
/webgraph/src/utils/batch_codec/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3
 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
//! Traits and implementations to encode and decode batches of sorted triples
9
//! to/from disk.
10
//!
11
//! The traits and implementations in this module are used to customize the
12
//! encoding of batches of sorted triples to/from disk. They are used by
13
//! [`ParSortPairs`], [`ParSortIters`], and the transform functions.
14
//!
15
//! [`ParSortPairs`]: crate::utils::par_sort_pairs::ParSortPairs
16
//! [`ParSortIters`]: crate::utils::par_sort_iters::ParSortIters
17
//!
18
//! They usually do not need to be accessed or modified by the end users, albeit
19
//! in some specific cases where performance or on-disk occupation is critical
20
//! they can be customized.
21

22
use anyhow::Result;
23

24
use super::ArcMmapHelper;
25
use core::fmt::Display;
26
use dsi_bitstream::prelude::*;
27
use rdst::*;
28
use std::fs::File;
29
use std::io::BufWriter;
30
use std::path::Path;
31

32
pub mod gaps;
33
pub mod grouped_gaps;
34

35
/// The recommended default batch codec for unlabeled batches.
36
///
37
/// When `DEDUP` is `true`, duplicates are eliminated during batch
38
/// serialization, reducing I/O and disk usage.
39
pub type DefaultBatchCodec<const DEDUP: bool = false> = grouped_gaps::GroupedGapsCodec<
40
    NE,
41
    (),
42
    { dsi_bitstream::dispatch::code_consts::GAMMA },
43
    { dsi_bitstream::dispatch::code_consts::GAMMA },
44
    { dsi_bitstream::dispatch::code_consts::DELTA },
45
    DEDUP,
46
>;
47

48
pub type BitWriter<E> = BufBitWriter<E, WordAdapter<usize, BufWriter<File>>>;
49
pub type BitReader<E> = BufBitReader<E, MemWordReader<u32, ArcMmapHelper<u32>>>;
50

51
/// Statistics about a batch encoded by a [`BatchCodec`].
52
///
53
/// Implementations also implement [`Display`] so they can be logged.
54
pub trait BatchStats: Display {
55
    /// The number of triples actually written to disk.
56
    ///
57
    /// When the codec is deduplicating, this is the number of unique triples
58
    /// that were encoded; it may be smaller than the number of arcs originally
59
    /// pushed into the batch.
60
    fn total_triples(&self) -> usize;
61
}
62

63
/// A trait for encoding and decoding batches of sorted triples.
64
pub trait BatchCodec: Send + Sync {
65
    /// The label type of the triples to encode and decode.
66
    /// While the bounds are not really necessary, in all the practical cases
67
    /// we need them.
68
    type Label: Copy + Send + Sync + 'static;
69
    /// The type returned by `decode_batch`, the iterator of which yields the
70
    /// decoded triples in sorted order.
71
    ///
72
    /// The type `IntoIter` has to be `Send + Sync + Clone` because they are
73
    /// used in [`ParSortedGraph`]/[`SortedLabeledGraph`] which require them.
74
    ///
75
    /// [`ParSortedGraph`]: crate::graphs::par_sorted_graph::ParSortedGraph
76
    /// [`SortedLabeledGraph`]: crate::graphs::par_sorted_graph::ParSortedLabeledGraph
77
    type DecodedBatch: IntoIterator<Item = ((usize, usize), Self::Label), IntoIter: Send + Sync + Clone>;
78

79
    /// A type representing statistics about the encoded batch.
80
    ///
81
    /// It has to implement [`Display`] so that we can log it, and
82
    /// [`BatchStats`] so that we can query the number of triples actually
83
    /// encoded (which may differ from the number of arcs pushed when
84
    /// deduplication is enabled).
85
    type EncodedBatchStats: BatchStats;
86

87
    /// Given a batch of sorted triples, encodes them to disk and returns the
88
    /// number of bits written.
89
    ///
90
    /// Note that the input batch must be already sorted. Use
91
    /// [`encode_batch`] otherwise.
92
    ///
93
    /// [`encode_batch`]: Self::encode_batch
94
    fn encode_sorted_batch(
95
        &self,
96
        path: impl AsRef<Path>,
97
        batch: &[((usize, usize), Self::Label)],
98
    ) -> Result<(usize, Self::EncodedBatchStats)>;
99

100
    /// Given a batch of triples, sorts them, encodes them to disk, and returns
101
    /// the number of bits written.
102
    fn encode_batch(
103
        &self,
104
        path: impl AsRef<Path>,
105
        batch: &mut [((usize, usize), Self::Label)],
106
    ) -> Result<(usize, Self::EncodedBatchStats)>;
107

108
    /// Decodes a batch of triples from disk.
109
    ///
110
    /// The returned type's iterator yields the serialized triples in sorted order.
111
    fn decode_batch(&self, path: impl AsRef<Path>) -> Result<Self::DecodedBatch>;
112
}
113

114
/// Convenience alias to extract the iterator type of the decoded batch from a
115
/// [`BatchCodec`].
116
pub type CodecIter<C> = <<C as BatchCodec>::DecodedBatch as IntoIterator>::IntoIter;
117

118
/// An arc expressed as a pair of nodes and the associated label.
119
///
120
/// Equality and order are defined only (lexicographically) on the pair of
121
/// nodes.
122
///
123
/// Since we use this to sort a batch of `(usize, usize, L)` triples, in order
124
/// to safely transmute between the two types, Triple has to be
125
/// `repr(transparent)` of the same tuple type.
126
///
127
/// We use this to implement `RadixKey` for sorting batches of triples
128
/// using the [`rdst`] crate.
129
///
130
/// [`rdst`]: https://crates.io/crates/rdst
131
#[derive(Clone, Copy, Debug)]
132
#[repr(transparent)]
133
pub struct Triple<L>(((usize, usize), L));
134

135
impl<L> Triple<L> {
136
    /// slice of `Triple<L>`.
137
    ///
138
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
139
    /// same tuple type.
140
    pub const fn cast_batch(batch: &[((usize, usize), L)]) -> &[Triple<L>] {
1,004✔
141
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
142
        unsafe { std::mem::transmute(batch) }
2,008✔
143
    }
144

145
    /// Converts a mutable reference to a slice of `((usize, usize), L)` triples
146
    /// into a mutable reference to a slice of `Triple<L>`.
147
    ///
148
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
149
    /// same tuple type.
150
    pub const fn cast_batch_mut(batch: &mut [((usize, usize), L)]) -> &mut [Triple<L>] {
1,003✔
151
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
152
        unsafe { std::mem::transmute(batch) }
1,003✔
153
    }
154
}
155

156
const USIZE_BYTES: usize = usize::BITS as usize / 8;
157

158
impl<L> RadixKey for Triple<L> {
159
    const LEVELS: usize = USIZE_BYTES * 2;
160

161
    fn get_level(&self, level: usize) -> u8 {
1,122,027,712✔
162
        (if level < USIZE_BYTES {
1,122,027,712✔
163
            self.0.0.1 >> ((level % USIZE_BYTES) * 8)
551,549,338✔
164
        } else {
165
            self.0.0.0 >> ((level % USIZE_BYTES) * 8)
570,478,374✔
UNCOV
166
        }) as u8
×
167
    }
168
}
169

170
impl<L> PartialEq for Triple<L> {
UNCOV
171
    fn eq(&self, other: &Self) -> bool {
×
UNCOV
172
        self.0.0 == other.0.0
×
173
    }
174
}
175

176
impl<L> Eq for Triple<L> {}
177

178
impl<L> PartialOrd for Triple<L> {
179
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
50,758,155✔
180
        Some(self.cmp(other))
101,516,310✔
181
    }
182
}
183

184
impl<L> Ord for Triple<L> {
185
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
50,758,155✔
186
        self.0.0.cmp(&other.0.0)
152,274,465✔
187
    }
188
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc