• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 18458664278

13 Oct 2025 07:39AM UTC coverage: 48.052% (+0.2%) from 47.835%
18458664278

Pull #152

github

vigna
Docs review
Pull Request #152: Introduce BatchCodec to substitute BatchIterator

112 of 233 new or added lines in 9 files covered. (48.07%)

7 existing lines in 3 files now uncovered.

4009 of 8343 relevant lines covered (48.05%)

24464313.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.0
/webgraph/src/utils/batch_codec/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3
 *
4
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5
 */
6

7
//! Traits and implementations to encode and decode batches of sorted triples
8
//! to/from disk.
9
//!
10
//! The traits and implementations in this module are used to customize the
11
//! encoding of batches of sorted triples to/from disk. They are used by
12
//! [`SortPairs`](crate::utils::sort_pairs::SortPairs) and other utilities built
13
//! on that (e.g.,
14
//! [`ParSortPairs`](crate::utils::par_sort_pairs::ParSortPairs)).
15
//!
16
//! They usually do not need to be accessed or modified by the end users, albeit
17
//! in some specific cases where performance or on-disk occupation is critical
18
//! they can be customized.
19

20
use anyhow::Result;
21

22
use super::ArcMmapHelper;
23
use dsi_bitstream::prelude::*;
24
use rdst::*;
25
use std::fs::File;
26
use std::io::BufWriter;
27
use std::path::Path;
28

29
pub mod gaps;
30
pub mod grouped_gaps;
31

32
/// The recommended default batch codec for unlabelled batches.
33
pub type DefaultBatchCodec = grouped_gaps::GroupedGapsCodec;
34

35
pub type BitWriter<E> = BufBitWriter<E, WordAdapter<usize, BufWriter<File>>>;
36
pub type BitReader<E> = BufBitReader<E, MemWordReader<u32, ArcMmapHelper<u32>>>;
37

38
/// A trait for encoding and decoding batches of sorted triples.
39
pub trait BatchCodec: Send + Sync {
40
    /// The label type of the triples to encode and decode.
41
    /// While the bounds are not really necessary, in all the practical cases
42
    /// we need them.
43
    type Label: Copy + Send + Sync + 'static;
44
    //// The type returned by `decode_batch`, the iterator of which yields the
45
    //// decoded triples in sorted order.
46
    ///
47
    /// The type `IntoIter` has to be `Send + Sync + Clone` because most often we want
48
    /// to use them in [`SortPairs`](crate::utils::sort_pairs::SortPairs) and
49
    /// then in [`ArcListGraph`](crate::graphs::arc_list_graph::ArcListGraph)
50
    /// which require them.
51
    type DecodedBatch: IntoIterator<
52
        Item = (usize, usize, Self::Label),
53
        IntoIter: Send + Sync + Clone,
54
    >;
55

56
    /// Given a batch of sorted triples, encodes them to disk and returns the
57
    /// number of bits written.
58
    ///
59
    /// Note that the input batch must be already sorted. Use
60
    /// [`encode_batch`](Self::encode_batch) otherwise.
61
    fn encode_sorted_batch(
62
        &self,
63
        path: impl AsRef<Path>,
64
        batch: &[(usize, usize, Self::Label)],
65
    ) -> Result<usize>;
66

67
    /// Given a batch of triples, sort them, encodes them to disk, and returns
68
    /// the number of bits written.
69
    fn encode_batch(
70
        &self,
71
        path: impl AsRef<Path>,
72
        batch: &mut [(usize, usize, Self::Label)],
73
    ) -> Result<usize>;
74

75
    /// Decodes a batch of triples from disk.
76
    ///
77
    /// The returned type's iterator yields the serialized triples in sorted order.
78
    fn decode_batch(&self, path: impl AsRef<Path>) -> Result<Self::DecodedBatch>;
79
}
80

81
/// Convenience alias to extract the iterator type of the decoded batch from a
82
/// [`BatchCodec`].
83
pub type CodecIter<C> = <<C as BatchCodec>::DecodedBatch as IntoIterator>::IntoIter;
84

85
/// An arc expressed as a pair of nodes and the associated label.
86
///
87
/// Equality and order are defined only (lexicographically) on the pair of
88
/// nodes.
89
///
90
/// Since we use this to sort a batch of `(usize, usize, L)` triples, in order
91
/// to safely transmute between the two types, Triple has to be
92
/// `repr(transparent)` of the same tuple type.
93
///
94
/// We use this to implement `RadixKey` for sorting batches of triples
95
/// using the [`rdst`](https://crates.io/crates/rdst) crate.
96
#[derive(Clone, Copy, Debug)]
97
#[repr(transparent)]
98
pub struct Triple<L>((usize, usize, L));
99

100
impl<L> Triple<L> {
101
    /// Converts a reference to a slice of `(usize, usize, L)` into a reference to a
102
    /// slice of `Triple<L>`.
103
    ///
104
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
105
    /// same tuple type.
106
    pub fn cast_batch(batch: &[(usize, usize, L)]) -> &[Triple<L>] {
440✔
107
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
108
        unsafe { std::mem::transmute(batch) }
880✔
109
    }
110

111
    /// Converts a mutable reference to a slice of `(usize, usize, L)` triples
112
    /// into a mutable reference to a slice of `Triple<L>`.
113
    ///
114
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
115
    /// same tuple type.
116
    pub fn cast_batch_mut(batch: &mut [(usize, usize, L)]) -> &mut [Triple<L>] {
440✔
117
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
118
        unsafe { std::mem::transmute(batch) }
440✔
119
    }
120
}
121

122
impl<L> RadixKey for Triple<L> {
123
    const LEVELS: usize = 16;
124

125
    fn get_level(&self, level: usize) -> u8 {
846,231,760✔
126
        (if level < 8 {
846,231,760✔
127
            self.0 .1 >> ((level % 8) * 8)
421,461,072✔
128
        } else {
129
            self.0 .0 >> ((level % 8) * 8)
424,770,688✔
NEW
130
        }) as u8
×
131
    }
132
}
133

134
impl<L> PartialEq for Triple<L> {
NEW
135
    fn eq(&self, other: &Self) -> bool {
×
NEW
136
        self.0 .0 == other.0 .0 && self.0 .1 == other.0 .1
×
137
    }
138
}
139

140
impl<L> Eq for Triple<L> {}
141

142
impl<L> PartialOrd for Triple<L> {
143
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
38,585,535✔
144
        Some(self.cmp(other))
77,171,070✔
145
    }
146
}
147

148
impl<L> Ord for Triple<L> {
149
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
38,585,535✔
150
        self.0 .0.cmp(&other.0 .0).then(self.0 .1.cmp(&other.0 .1))
270,098,745✔
151
    }
152
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc