• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 24296210049

12 Apr 2026 01:49AM UTC coverage: 66.827% (-0.05%) from 66.877%
24296210049

push

github

vigna
Clippy hapy

6674 of 9987 relevant lines covered (66.83%)

47792805.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.0
/webgraph/src/utils/batch_codec/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3
 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
//! Traits and implementations to encode and decode batches of sorted triples
9
//! to/from disk.
10
//!
11
//! The traits and implementations in this module are used to customize the
12
//! encoding of batches of sorted triples to/from disk. They are used by
13
//! [`SortPairs`] and other utilities built on that (e.g., [`ParSortPairs`]).
14
//!
15
//! [`SortPairs`]: crate::utils::sort_pairs::SortPairs
16
//! [`ParSortPairs`]: crate::utils::par_sort_pairs::ParSortPairs
17
//!
18
//! They usually do not need to be accessed or modified by the end users, albeit
19
//! in some specific cases where performance or on-disk occupation is critical
20
//! they can be customized.
21

22
use anyhow::Result;
23

24
use super::ArcMmapHelper;
25
use core::fmt::Display;
26
use dsi_bitstream::prelude::*;
27
use rdst::*;
28
use std::fs::File;
29
use std::io::BufWriter;
30
use std::path::Path;
31

32
pub mod gaps;
33
pub mod grouped_gaps;
34

35
/// The recommended default batch codec for unlabeled batches.
36
///
37
/// When `DEDUP` is `true`, duplicates are eliminated during batch
38
/// serialization, reducing I/O and disk usage.
39
pub type DefaultBatchCodec<const DEDUP: bool = false> = grouped_gaps::GroupedGapsCodec<
40
    NE,
41
    (),
42
    (),
43
    { dsi_bitstream::dispatch::code_consts::GAMMA },
44
    { dsi_bitstream::dispatch::code_consts::GAMMA },
45
    { dsi_bitstream::dispatch::code_consts::DELTA },
46
    DEDUP,
47
>;
48

49
pub type BitWriter<E> = BufBitWriter<E, WordAdapter<usize, BufWriter<File>>>;
50
pub type BitReader<E> = BufBitReader<E, MemWordReader<u32, ArcMmapHelper<u32>>>;
51

52
/// A trait for encoding and decoding batches of sorted triples.
53
pub trait BatchCodec: Send + Sync {
54
    /// The label type of the triples to encode and decode.
55
    /// While the bounds are not really necessary, in all the practical cases
56
    /// we need them.
57
    type Label: Copy + Send + Sync + 'static;
58
    /// The type returned by `decode_batch`, the iterator of which yields the
59
    /// decoded triples in sorted order.
60
    ///
61
    /// The type `IntoIter` has to be `Send + Sync + Clone` because most often we want
62
    /// to use them in [`SortPairs`] and then in [`ArcListGraph`] which require
63
    /// them.
64
    ///
65
    /// [`SortPairs`]: crate::utils::sort_pairs::SortPairs
66
    /// [`ArcListGraph`]: crate::graphs::arc_list_graph::ArcListGraph
67
    type DecodedBatch: IntoIterator<Item = ((usize, usize), Self::Label), IntoIter: Send + Sync + Clone>;
68

69
    /// A type representing statistics about the encoded batch.
70
    /// This type has to implement `Display` so that we can log it.
71
    type EncodedBatchStats: Display;
72

73
    /// Given a batch of sorted triples, encodes them to disk and returns the
74
    /// number of bits written.
75
    ///
76
    /// Note that the input batch must be already sorted. Use
77
    /// [`encode_batch`] otherwise.
78
    ///
79
    /// [`encode_batch`]: Self::encode_batch
80
    fn encode_sorted_batch(
81
        &self,
82
        path: impl AsRef<Path>,
83
        batch: &[((usize, usize), Self::Label)],
84
    ) -> Result<(usize, Self::EncodedBatchStats)>;
85

86
    /// Given a batch of triples, sorts them, encodes them to disk, and returns
87
    /// the number of bits written.
88
    fn encode_batch(
89
        &self,
90
        path: impl AsRef<Path>,
91
        batch: &mut [((usize, usize), Self::Label)],
92
    ) -> Result<(usize, Self::EncodedBatchStats)>;
93

94
    /// Decodes a batch of triples from disk.
95
    ///
96
    /// The returned type's iterator yields the serialized triples in sorted order.
97
    fn decode_batch(&self, path: impl AsRef<Path>) -> Result<Self::DecodedBatch>;
98
}
99

100
/// Convenience alias to extract the iterator type of the decoded batch from a
101
/// [`BatchCodec`].
102
pub type CodecIter<C> = <<C as BatchCodec>::DecodedBatch as IntoIterator>::IntoIter;
103

104
/// An arc expressed as a pair of nodes and the associated label.
105
///
106
/// Equality and order are defined only (lexicographically) on the pair of
107
/// nodes.
108
///
109
/// Since we use this to sort a batch of `(usize, usize, L)` triples, in order
110
/// to safely transmute between the two types, Triple has to be
111
/// `repr(transparent)` of the same tuple type.
112
///
113
/// We use this to implement `RadixKey` for sorting batches of triples
114
/// using the [`rdst`] crate.
115
///
116
/// [`rdst`]: https://crates.io/crates/rdst
117
#[derive(Clone, Copy, Debug)]
118
#[repr(transparent)]
119
pub struct Triple<L>(((usize, usize), L));
120

121
impl<L> Triple<L> {
122
    /// slice of `Triple<L>`.
123
    ///
124
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
125
    /// same tuple type.
126
    pub const fn cast_batch(batch: &[((usize, usize), L)]) -> &[Triple<L>] {
564✔
127
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
128
        unsafe { std::mem::transmute(batch) }
1,128✔
129
    }
130

131
    /// Converts a mutable reference to a slice of `((usize, usize), L)` triples
132
    /// into a mutable reference to a slice of `Triple<L>`.
133
    ///
134
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
135
    /// same tuple type.
136
    pub const fn cast_batch_mut(batch: &mut [((usize, usize), L)]) -> &mut [Triple<L>] {
563✔
137
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
138
        unsafe { std::mem::transmute(batch) }
563✔
139
    }
140
}
141

142
const USIZE_BYTES: usize = usize::BITS as usize / 8;
143

144
impl<L> RadixKey for Triple<L> {
145
    const LEVELS: usize = USIZE_BYTES * 2;
146

147
    fn get_level(&self, level: usize) -> u8 {
1,122,623,009✔
148
        (if level < USIZE_BYTES {
1,122,623,009✔
149
            self.0.0.1 >> ((level % USIZE_BYTES) * 8)
552,444,244✔
150
        } else {
151
            self.0.0.0 >> ((level % USIZE_BYTES) * 8)
570,178,765✔
152
        }) as u8
×
153
    }
154
}
155

156
impl<L> PartialEq for Triple<L> {
157
    fn eq(&self, other: &Self) -> bool {
×
158
        self.0.0 == other.0.0
×
159
    }
160
}
161

162
impl<L> Eq for Triple<L> {}
163

164
impl<L> PartialOrd for Triple<L> {
165
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
50,757,558✔
166
        Some(self.cmp(other))
101,515,116✔
167
    }
168
}
169

170
impl<L> Ord for Triple<L> {
171
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
50,757,558✔
172
        self.0.0.cmp(&other.0.0)
152,272,674✔
173
    }
174
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc