• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 23365769388

20 Mar 2026 10:52PM UTC coverage: 68.228% (-3.0%) from 71.245%
23365769388

push

github

vigna
No le_bins,be_bins for webgraph

6655 of 9754 relevant lines covered (68.23%)

46582760.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.0
/webgraph/src/utils/batch_codec/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3
 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
//! Traits and implementations to encode and decode batches of sorted triples
9
//! to/from disk.
10
//!
11
//! The traits and implementations in this module are used to customize the
12
//! encoding of batches of sorted triples to/from disk. They are used by
13
//! [`SortPairs`](crate::utils::sort_pairs::SortPairs) and other utilities built
14
//! on that (e.g.,
15
//! [`ParSortPairs`](crate::utils::par_sort_pairs::ParSortPairs)).
16
//!
17
//! They usually do not need to be accessed or modified by the end users, albeit
18
//! in some specific cases where performance or on-disk occupation is critical
19
//! they can be customized.
20

21
use anyhow::Result;
22

23
use super::ArcMmapHelper;
24
use core::fmt::Display;
25
use dsi_bitstream::prelude::*;
26
use rdst::*;
27
use std::fs::File;
28
use std::io::BufWriter;
29
use std::path::Path;
30

31
pub mod gaps;
32
pub mod grouped_gaps;
33

34
/// The recommended default batch codec for unlabeled batches.
35
///
36
/// When `DEDUP` is `true`, duplicates are eliminated during batch
37
/// serialization, reducing I/O and disk usage.
38
pub type DefaultBatchCodec<const DEDUP: bool = false> = grouped_gaps::GroupedGapsCodec<
39
    NE,
40
    (),
41
    (),
42
    { dsi_bitstream::dispatch::code_consts::GAMMA },
43
    { dsi_bitstream::dispatch::code_consts::GAMMA },
44
    { dsi_bitstream::dispatch::code_consts::DELTA },
45
    DEDUP,
46
>;
47

48
pub type BitWriter<E> = BufBitWriter<E, WordAdapter<usize, BufWriter<File>>>;
49
pub type BitReader<E> = BufBitReader<E, MemWordReader<u32, ArcMmapHelper<u32>>>;
50

51
/// A trait for encoding and decoding batches of sorted triples.
52
pub trait BatchCodec: Send + Sync {
53
    /// The label type of the triples to encode and decode.
54
    /// While the bounds are not really necessary, in all the practical cases
55
    /// we need them.
56
    type Label: Copy + Send + Sync + 'static;
57
    /// The type returned by `decode_batch`, the iterator of which yields the
58
    /// decoded triples in sorted order.
59
    ///
60
    /// The type `IntoIter` has to be `Send + Sync + Clone` because most often we want
61
    /// to use them in [`SortPairs`](crate::utils::sort_pairs::SortPairs) and
62
    /// then in [`ArcListGraph`](crate::graphs::arc_list_graph::ArcListGraph)
63
    /// which require them.
64
    type DecodedBatch: IntoIterator<Item = ((usize, usize), Self::Label), IntoIter: Send + Sync + Clone>;
65

66
    /// A type representing statistics about the encoded batch.
67
    /// This type has to implement `Display` so that we can log it.
68
    type EncodedBatchStats: Display;
69

70
    /// Given a batch of sorted triples, encodes them to disk and returns the
71
    /// number of bits written.
72
    ///
73
    /// Note that the input batch must be already sorted. Use
74
    /// [`encode_batch`](Self::encode_batch) otherwise.
75
    fn encode_sorted_batch(
76
        &self,
77
        path: impl AsRef<Path>,
78
        batch: &[((usize, usize), Self::Label)],
79
    ) -> Result<(usize, Self::EncodedBatchStats)>;
80

81
    /// Given a batch of triples, sort them, encodes them to disk, and returns
82
    /// the number of bits written.
83
    fn encode_batch(
84
        &self,
85
        path: impl AsRef<Path>,
86
        batch: &mut [((usize, usize), Self::Label)],
87
    ) -> Result<(usize, Self::EncodedBatchStats)>;
88

89
    /// Decodes a batch of triples from disk.
90
    ///
91
    /// The returned type's iterator yields the serialized triples in sorted order.
92
    fn decode_batch(&self, path: impl AsRef<Path>) -> Result<Self::DecodedBatch>;
93
}
94

95
/// Convenience alias to extract the iterator type of the decoded batch from a
96
/// [`BatchCodec`].
97
pub type CodecIter<C> = <<C as BatchCodec>::DecodedBatch as IntoIterator>::IntoIter;
98

99
/// An arc expressed as a pair of nodes and the associated label.
100
///
101
/// Equality and order are defined only (lexicographically) on the pair of
102
/// nodes.
103
///
104
/// Since we use this to sort a batch of `(usize, usize, L)` triples, in order
105
/// to safely transmute between the two types, Triple has to be
106
/// `repr(transparent)` of the same tuple type.
107
///
108
/// We use this to implement `RadixKey` for sorting batches of triples
109
/// using the [`rdst`](https://crates.io/crates/rdst) crate.
110
#[derive(Clone, Copy, Debug)]
111
#[repr(transparent)]
112
pub struct Triple<L>(((usize, usize), L));
113

114
impl<L> Triple<L> {
115
    /// slice of `Triple<L>`.
116
    ///
117
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
118
    /// same tuple type.
119
    pub const fn cast_batch(batch: &[((usize, usize), L)]) -> &[Triple<L>] {
556✔
120
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
121
        unsafe { std::mem::transmute(batch) }
1,112✔
122
    }
123

124
    /// Converts a mutable reference to a slice of `((usize, usize), L)` triples
125
    /// into a mutable reference to a slice of `Triple<L>`.
126
    ///
127
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
128
    /// same tuple type.
129
    pub const fn cast_batch_mut(batch: &mut [((usize, usize), L)]) -> &mut [Triple<L>] {
555✔
130
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
131
        unsafe { std::mem::transmute(batch) }
555✔
132
    }
133
}
134

135
const USIZE_BYTES: usize = usize::BITS as usize / 8;
136

137
impl<L> RadixKey for Triple<L> {
138
    const LEVELS: usize = USIZE_BYTES * 2;
139

140
    fn get_level(&self, level: usize) -> u8 {
1,124,833,235✔
141
        (if level < USIZE_BYTES {
1,124,833,235✔
142
            self.0.0.1 >> ((level % USIZE_BYTES) * 8)
553,995,546✔
143
        } else {
144
            self.0.0.0 >> ((level % USIZE_BYTES) * 8)
570,837,689✔
145
        }) as u8
×
146
    }
147
}
148

149
impl<L> PartialEq for Triple<L> {
150
    fn eq(&self, other: &Self) -> bool {
×
151
        self.0.0 == other.0.0
×
152
    }
153
}
154

155
impl<L> Eq for Triple<L> {}
156

157
impl<L> PartialOrd for Triple<L> {
158
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
50,757,566✔
159
        Some(self.cmp(other))
101,515,132✔
160
    }
161
}
162

163
impl<L> Ord for Triple<L> {
164
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
50,757,566✔
165
        self.0.0.cmp(&other.0.0)
152,272,698✔
166
    }
167
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc