• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 19383593169

15 Nov 2025 03:16AM UTC coverage: 62.139% (-0.06%) from 62.201%
19383593169

push

github

web-flow
Merge pull request #156 from progval/edition2024

Switch webgraph to 2024 edition

9 of 19 new or added lines in 6 files covered. (47.37%)

7 existing lines in 3 files now uncovered.

5247 of 8444 relevant lines covered (62.14%)

29479360.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.0
/webgraph/src/utils/batch_codec/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3
 *
4
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5
 */
6

7
//! Traits and implementations to encode and decode batches of sorted triples
8
//! to/from disk.
9
//!
10
//! The traits and implementations in this module are used to customize the
11
//! encoding of batches of sorted triples to/from disk. They are used by
12
//! [`SortPairs`](crate::utils::sort_pairs::SortPairs) and other utilities built
13
//! on that (e.g.,
14
//! [`ParSortPairs`](crate::utils::par_sort_pairs::ParSortPairs)).
15
//!
16
//! They usually do not need to be accessed or modified by the end users, albeit
17
//! in some specific cases where performance or on-disk occupation is critical
18
//! they can be customized.
19

20
use anyhow::Result;
21

22
use super::ArcMmapHelper;
23
use core::fmt::Display;
24
use dsi_bitstream::prelude::*;
25
use rdst::*;
26
use std::fs::File;
27
use std::io::BufWriter;
28
use std::path::Path;
29

30
pub mod gaps;
31
pub mod grouped_gaps;
32

33
/// The recommended default batch codec for unlabelled batches.
34
pub type DefaultBatchCodec = grouped_gaps::GroupedGapsCodec;
35

36
pub type BitWriter<E> = BufBitWriter<E, WordAdapter<usize, BufWriter<File>>>;
37
pub type BitReader<E> = BufBitReader<E, MemWordReader<u32, ArcMmapHelper<u32>>>;
38

39
/// A trait for encoding and decoding batches of sorted triples.
40
pub trait BatchCodec: Send + Sync {
41
    /// The label type of the triples to encode and decode.
42
    /// While the bounds are not really necessary, in all the practical cases
43
    /// we need them.
44
    type Label: Copy + Send + Sync + 'static;
45
    //// The type returned by `decode_batch`, the iterator of which yields the
46
    //// decoded triples in sorted order.
47
    ///
48
    /// The type `IntoIter` has to be `Send + Sync + Clone` because most often we want
49
    /// to use them in [`SortPairs`](crate::utils::sort_pairs::SortPairs) and
50
    /// then in [`ArcListGraph`](crate::graphs::arc_list_graph::ArcListGraph)
51
    /// which require them.
52
    type DecodedBatch: IntoIterator<Item = ((usize, usize), Self::Label), IntoIter: Send + Sync + Clone>;
53

54
    /// A type representing statistics about the encoded batch.
55
    /// This type has to implement `Display` so that we can log it.
56
    type EncodedBatchStats: Display;
57

58
    /// Given a batch of sorted triples, encodes them to disk and returns the
59
    /// number of bits written.
60
    ///
61
    /// Note that the input batch must be already sorted. Use
62
    /// [`encode_batch`](Self::encode_batch) otherwise.
63
    fn encode_sorted_batch(
64
        &self,
65
        path: impl AsRef<Path>,
66
        batch: &[((usize, usize), Self::Label)],
67
    ) -> Result<(usize, Self::EncodedBatchStats)>;
68

69
    /// Given a batch of triples, sort them, encodes them to disk, and returns
70
    /// the number of bits written.
71
    fn encode_batch(
72
        &self,
73
        path: impl AsRef<Path>,
74
        batch: &mut [((usize, usize), Self::Label)],
75
    ) -> Result<(usize, Self::EncodedBatchStats)>;
76

77
    /// Decodes a batch of triples from disk.
78
    ///
79
    /// The returned type's iterator yields the serialized triples in sorted order.
80
    fn decode_batch(&self, path: impl AsRef<Path>) -> Result<Self::DecodedBatch>;
81
}
82

83
/// Convenience alias to extract the iterator type of the decoded batch from a
84
/// [`BatchCodec`].
85
pub type CodecIter<C> = <<C as BatchCodec>::DecodedBatch as IntoIterator>::IntoIter;
86

87
/// An arc expressed as a pair of nodes and the associated label.
88
///
89
/// Equality and order are defined only (lexicographically) on the pair of
90
/// nodes.
91
///
92
/// Since we use this to sort a batch of `(usize, usize, L)` triples, in order
93
/// to safely transmute between the two types, Triple has to be
94
/// `repr(transparent)` of the same tuple type.
95
///
96
/// We use this to implement `RadixKey` for sorting batches of triples
97
/// using the [`rdst`](https://crates.io/crates/rdst) crate.
98
#[derive(Clone, Copy, Debug)]
99
#[repr(transparent)]
100
pub struct Triple<L>(((usize, usize), L));
101

102
impl<L> Triple<L> {
103
    /// slice of `Triple<L>`.
104
    ///
105
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
106
    /// same tuple type.
107
    pub fn cast_batch(batch: &[((usize, usize), L)]) -> &[Triple<L>] {
524✔
108
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
109
        unsafe { std::mem::transmute(batch) }
1,048✔
110
    }
111

112
    /// Converts a mutable reference to a slice of `((usize, usize), L)` triples
113
    /// into a mutable reference to a slice of `Triple<L>`.
114
    ///
115
    /// The conversion is safe because `Triple` is `repr(transparent)` of the
116
    /// same tuple type.
117
    pub fn cast_batch_mut(batch: &mut [((usize, usize), L)]) -> &mut [Triple<L>] {
524✔
118
        // SAFETY: `Triple` is `repr(transparent)` of the same tuple type.
119
        unsafe { std::mem::transmute(batch) }
524✔
120
    }
121
}
122

123
impl<L> RadixKey for Triple<L> {
124
    const LEVELS: usize = 16;
125

126
    fn get_level(&self, level: usize) -> u8 {
846,362,856✔
127
        (if level < 8 {
846,362,856✔
128
            self.0.0.1 >> ((level % 8) * 8)
421,454,412✔
129
        } else {
130
            self.0.0.0 >> ((level % 8) * 8)
424,908,444✔
131
        }) as u8
×
132
    }
133
}
134

135
impl<L> PartialEq for Triple<L> {
136
    fn eq(&self, other: &Self) -> bool {
×
NEW
137
        self.0.0 == other.0.0
×
138
    }
139
}
140

141
impl<L> Eq for Triple<L> {}
142

143
impl<L> PartialOrd for Triple<L> {
144
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
38,585,551✔
145
        Some(self.cmp(other))
77,171,102✔
146
    }
147
}
148

149
impl<L> Ord for Triple<L> {
150
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
38,585,551✔
151
        self.0.0.cmp(&other.0.0)
115,756,653✔
152
    }
153
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc