• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 20017908129

08 Dec 2025 05:36AM UTC coverage: 62.065% (+0.4%) from 61.641%
20017908129

push

github

zommiommy
Fix doctests

5435 of 8757 relevant lines covered (62.06%)

46674689.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.12
/webgraph/src/graphs/bvgraph/comp/flags.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use anyhow::{Result, bail, ensure};
9
use dsi_bitstream::dispatch::Codes;
10
use dsi_bitstream::traits::{BigEndian, Endianness, LittleEndian};
11
use std::collections::HashMap;
12

13
#[derive(Clone, Copy, Debug)]
14
#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))]
15
/// The compression flags for reading or compressing a graph.
16
///
17
/// As documented, one code might sets multiple values. This is done for
18
/// compatibility with the previous Java version of the library.
19
/// But the codes optimizers will return the optimal codes for each of them,
20
/// so if it identify some big save from using different codes, we can consider
21
/// splitting them.
22
pub struct CompFlags {
23
    /// The instantaneous code to use to encode the `outdegrees`
24
    pub outdegrees: Codes,
25
    /// The instantaneous code to use to encode the `reference_offset`
26
    pub references: Codes,
27
    /// The instantaneous code to use to encode the `block_count` and `blocks`
28
    pub blocks: Codes,
29
    /// The instantaneous code to use to encode the `interval_count`, `interval_start`, and `interval_len`.
30
    pub intervals: Codes,
31
    /// The instantaneous code to use to encode the `first_residual` and `residual`
32
    pub residuals: Codes,
33
    /// The minimum length of an interval to be compressed as (start, len)
34
    pub min_interval_length: usize,
35
    /// The number of previous nodes to use for reference compression
36
    pub compression_window: usize,
37
    /// The maximum recursion depth during decoding, this modulates the tradeoff
38
    /// between compression ratio and decoding speed
39
    pub max_ref_count: usize,
40
}
41

42
impl core::default::Default for CompFlags {
43
    fn default() -> Self {
1,866,553✔
44
        CompFlags {
45
            outdegrees: Codes::Gamma,
46
            references: Codes::Unary,
47
            blocks: Codes::Gamma,
48
            intervals: Codes::Gamma,
49
            residuals: Codes::Zeta(3),
1,866,553✔
50
            min_interval_length: 4,
51
            compression_window: 7,
52
            max_ref_count: 3,
53
        }
54
    }
55
}
56

57
const OLD_CODES: [Codes; 10] = [
58
    Codes::Unary,
59
    Codes::Gamma,
60
    Codes::Delta,
61
    Codes::Zeta(1),
62
    Codes::Zeta(2),
63
    Codes::Zeta(3),
64
    Codes::Zeta(4),
65
    Codes::Zeta(5),
66
    Codes::Zeta(6),
67
    Codes::Zeta(7),
68
];
69

70
impl CompFlags {
71
    /// Convert a string from the `compflags` field from the `.properties` file
72
    /// into which code to use.
73
    ///
74
    /// Returns `None` if the string is not recognized.
75
    pub fn code_from_str(s: &str, k: usize) -> Option<Codes> {
3,400,704✔
76
        match s.to_uppercase().as_str() {
3,400,704✔
77
            "UNARY" => Some(Codes::Unary),
4,396,032✔
78
            "GAMMA" => Some(Codes::Gamma),
2,985,984✔
79
            "DELTA" => Some(Codes::Delta),
3,400,704✔
80
            "ZETA" => Some(Codes::Zeta(k)),
373,248✔
81
            "PI1" => Some(Codes::Pi(1)),
124,416✔
82
            "PI2" => Some(Codes::Pi(2)),
124,416✔
83
            "PI3" => Some(Codes::Pi(3)),
124,416✔
84
            "PI4" => Some(Codes::Pi(4)),
124,416✔
85
            "ZETA1" => Some(Codes::Zeta(1)),
124,416✔
86
            "ZETA2" => Some(Codes::Zeta(2)),
248,832✔
87
            "ZETA3" => Some(Codes::Zeta(3)),
×
88
            "ZETA4" => Some(Codes::Zeta(4)),
×
89
            "ZETA5" => Some(Codes::Zeta(5)),
×
90
            "ZETA6" => Some(Codes::Zeta(6)),
×
91
            "ZETA7" => Some(Codes::Zeta(7)),
×
92
            _ => None,
×
93
        }
94
    }
95

96
    pub fn code_to_str(c: Codes, version: usize) -> Option<&'static str> {
3,400,704✔
97
        if version == 0 {
3,400,704✔
98
            match c {
1,700,352✔
99
                Codes::Unary => Some("UNARY"),
497,664✔
100
                Codes::Gamma => Some("GAMMA"),
290,304✔
101
                Codes::Delta => Some("DELTA"),
787,968✔
102
                Codes::Zeta(_) => Some("ZETA"),
124,416✔
103
                _ => unimplemented!("Code {:?} not supported", c),
104
            }
105
        } else {
106
            match c {
1,700,352✔
107
                Codes::Unary => Some("UNARY"),
497,664✔
108
                Codes::Gamma => Some("GAMMA"),
290,304✔
109
                Codes::Delta => Some("DELTA"),
787,968✔
110
                Codes::Zeta(1) => Some("ZETA1"),
×
111
                Codes::Zeta(2) => Some("ZETA2"),
124,416✔
112
                Codes::Zeta(3) => Some("ZETA3"),
×
113
                Codes::Zeta(4) => Some("ZETA4"),
×
114
                Codes::Zeta(5) => Some("ZETA5"),
×
115
                Codes::Zeta(6) => Some("ZETA6"),
×
116
                Codes::Zeta(7) => Some("ZETA7"),
×
117
                Codes::Pi(1) => Some("PI1"),
×
118
                Codes::Pi(2) => Some("PI2"),
×
119
                Codes::Pi(3) => Some("PI3"),
×
120
                Codes::Pi(4) => Some("PI4"),
×
121
                _ => unimplemented!("Code {:?} not supported", c),
122
            }
123
        }
124
    }
125

126
    fn contains_new_codes(&self) -> bool {
497,730✔
127
        !OLD_CODES.contains(&self.outdegrees)
995,460✔
128
            || !OLD_CODES.contains(&self.references)
995,460✔
129
            || !OLD_CODES.contains(&self.blocks)
995,460✔
130
            || !OLD_CODES.contains(&self.intervals)
995,460✔
131
            || !OLD_CODES.contains(&self.residuals)
995,460✔
132
    }
133

134
    pub fn to_properties<E: Endianness>(
622,146✔
135
        &self,
136
        num_nodes: usize,
137
        num_arcs: u64,
138
        bitstream_len: u64,
139
    ) -> Result<String> {
140
        let mut s = String::new();
1,244,292✔
141
        s.push_str("#BVGraph properties\n");
1,866,438✔
142
        s.push_str("graphclass=it.unimi.dsi.webgraph.BVGraph\n");
1,866,438✔
143

144
        // Version 1 if we have big-endian or new codes
145
        let version = (core::any::TypeId::of::<E>() != core::any::TypeId::of::<BigEndian>()
1,244,292✔
146
            || self.contains_new_codes()) as usize;
995,460✔
147

148
        s.push_str(&format!("version={version}\n"));
1,866,438✔
149
        s.push_str(&format!("endianness={}\n", E::NAME));
1,866,438✔
150

151
        s.push_str(&format!("nodes={num_nodes}\n"));
1,866,438✔
152
        s.push_str(&format!("arcs={num_arcs}\n"));
1,866,438✔
153
        s.push_str(&format!("minintervallength={}\n", self.min_interval_length));
1,866,438✔
154
        s.push_str(&format!("maxrefcount={}\n", self.max_ref_count));
1,866,438✔
155
        s.push_str(&format!("windowsize={}\n", self.compression_window));
1,866,438✔
156
        s.push_str(&format!(
2,488,584✔
157
            "bitsperlink={}\n",
622,146✔
158
            bitstream_len as f64 / num_arcs as f64
622,146✔
159
        ));
160
        s.push_str(&format!(
2,488,584✔
161
            "bitspernode={}\n",
622,146✔
162
            bitstream_len as f64 / num_nodes as f64
622,146✔
163
        ));
164
        s.push_str(&format!("length={bitstream_len}\n"));
1,866,438✔
165

166
        fn stirling(n: u64) -> f64 {
2,986,182✔
167
            let n = n as f64;
5,972,364✔
168
            n * (n.ln() - 1.0) + 0.5 * (2.0 * std::f64::consts::PI * n).ln()
8,958,546✔
169
        }
170

171
        let n_squared = (num_nodes * num_nodes) as u64;
1,244,292✔
172
        let theoretical_bound =
622,146✔
173
            (stirling(n_squared) - stirling(num_arcs) - stirling(n_squared - num_arcs))
3,110,730✔
174
                / 2.0_f64.ln();
622,146✔
175
        s.push_str(&format!(
2,488,584✔
176
            "compratio={:.3}\n",
622,146✔
177
            bitstream_len as f64 / theoretical_bound
622,146✔
178
        ));
179

180
        s.push_str("compressionflags=");
1,866,438✔
181
        let mut comp_flags = false;
1,244,292✔
182
        if self.outdegrees != Codes::Gamma {
1,036,866✔
183
            s.push_str(&format!(
1,658,880✔
184
                "OUTDEGREES_{}|",
414,720✔
185
                Self::code_to_str(self.outdegrees, version).unwrap()
1,244,160✔
186
            ));
187
            comp_flags = true;
414,720✔
188
        }
189
        if self.references != Codes::Unary {
1,036,866✔
190
            s.push_str(&format!(
1,658,880✔
191
                "REFERENCES_{}|",
414,720✔
192
                Self::code_to_str(self.references, version).unwrap()
1,244,160✔
193
            ));
194
            comp_flags = true;
414,720✔
195
        }
196
        if self.blocks != Codes::Gamma {
1,036,866✔
197
            s.push_str(&format!(
1,658,880✔
198
                "BLOCKS_{}|",
414,720✔
199
                Self::code_to_str(self.blocks, version).unwrap()
1,244,160✔
200
            ));
201
            comp_flags = true;
414,720✔
202
        }
203
        if self.intervals != Codes::Gamma {
1,036,866✔
204
            s.push_str(&format!(
1,658,880✔
205
                "INTERVALS_{}|",
414,720✔
206
                Self::code_to_str(self.intervals, version).unwrap()
1,244,160✔
207
            ));
208
            comp_flags = true;
414,720✔
209
        }
210
        if (version == 0 && !matches!(self.residuals, Codes::Zeta(_)))
1,368,708✔
211
            || self.residuals != (Codes::Zeta(3))
373,314✔
212
        {
213
            s.push_str(&format!(
1,866,240✔
214
                "RESIDUALS_{}|",
466,560✔
215
                Self::code_to_str(self.residuals, version).unwrap()
1,399,680✔
216
            ));
217
            comp_flags = true;
466,560✔
218
        }
219
        if comp_flags {
1,242,306✔
220
            s.pop();
620,160✔
221
        }
222
        s.push('\n');
1,244,292✔
223
        if version == 0 {
622,146✔
224
            // check that if a k is specified, it is the same for all codes
225
            let mut k = None;
995,460✔
226
            macro_rules! check_and_set_k {
×
227
                ($code:expr) => {
×
228
                    match $code {
×
229
                        Codes::Zeta(new_k) => {
×
230
                            if let Some(old_k) = k {
×
231
                                ensure!(old_k == new_k, "Only one value of k is supported")
×
232
                            }
233
                            k = Some(new_k)
×
234
                        }
235
                        _ => {}
×
236
                    }
237
                };
238
            }
239
            check_and_set_k!(self.outdegrees);
995,460✔
240
            check_and_set_k!(self.references);
995,460✔
241
            check_and_set_k!(self.blocks);
995,460✔
242
            check_and_set_k!(self.intervals);
995,460✔
243
            check_and_set_k!(self.residuals);
746,562✔
244
            // if no k was specified, use the default one (3)
245
            s.push_str(&format!("zetak={}\n", k.unwrap_or(3)));
1,990,920✔
246
        }
247
        Ok(s)
622,146✔
248
    }
249

250
    /// Convert the decoded `.properties` file into a `CompFlags` struct.
251
    /// Also check that the endianness is correct.
252
    pub fn from_properties<E: Endianness>(map: &HashMap<String, String>) -> Result<Self> {
622,294✔
253
        // Default values, same as the Java class
254
        let endianness = map
1,244,588✔
255
            .get("endianness")
256
            .map(|x| x.to_string())
1,866,610✔
257
            .unwrap_or_else(|| BigEndian::NAME.to_string());
622,566✔
258

259
        anyhow::ensure!(
622,294✔
260
            endianness == E::NAME,
622,294✔
261
            "Wrong endianness, got {} while expected {}",
×
262
            endianness,
×
263
            E::NAME
×
264
        );
265
        // check that the version was properly set for LE
266
        if core::any::TypeId::of::<E>() == core::any::TypeId::of::<LittleEndian>() {
622,294✔
267
            anyhow::ensure!(
124,416✔
268
                map.get("version").map(|x| x.parse::<u32>().unwrap()) == Some(1),
870,912✔
269
                "Wrong version, got {} while expected 1",
×
270
                map.get("version").unwrap_or(&"None".to_string())
×
271
            );
272
        }
273

274
        let mut cf = CompFlags::default();
1,244,588✔
275
        let mut k = 3;
1,244,588✔
276
        if let Some(spec_k) = map.get("zetak") {
1,742,466✔
277
            let spec_k = spec_k.parse::<usize>()?;
995,756✔
278
            if !(1..=7).contains(&spec_k) {
995,756✔
279
                bail!("Only ζ₁-ζ₇ are supported");
×
280
            }
281
            k = spec_k;
497,878✔
282
        }
283
        if let Some(comp_flags) = map.get("compressionflags") {
1,866,882✔
284
            if !comp_flags.is_empty() {
622,294✔
285
                for flag in comp_flags.split('|') {
2,745,600✔
286
                    let s: Vec<_> = flag.split('_').collect();
10,627,200✔
287
                    // FIXME: this is a hack to avoid having to implement
288
                    // FromStr for Code
289
                    let code = CompFlags::code_from_str(s[1], k).unwrap();
10,627,200✔
290
                    match s[0] {
2,125,440✔
291
                        "OUTDEGREES" => cf.outdegrees = code,
2,540,160✔
292
                        "REFERENCES" => cf.references = code,
2,125,440✔
293
                        "BLOCKS" => cf.blocks = code,
1,710,720✔
294
                        "INTERVALS" => cf.intervals = code,
1,296,000✔
295
                        "RESIDUALS" => cf.residuals = code,
933,120✔
296
                        "OFFSETS" => {
×
297
                            ensure!(code == Codes::Gamma, "Only γ code is supported for offsets")
×
298
                        }
299
                        _ => bail!("Unknown compression flag {}", flag),
×
300
                    }
301
                }
302
            }
303
        }
304
        if let Some(compression_window) = map.get("windowsize") {
1,866,882✔
305
            cf.compression_window = compression_window.parse()?;
622,294✔
306
        }
307
        if let Some(min_interval_length) = map.get("minintervallength") {
1,866,882✔
308
            cf.min_interval_length = min_interval_length.parse()?;
622,294✔
309
        }
310
        Ok(cf)
622,294✔
311
    }
312
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc