• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 22357185907

24 Feb 2026 03:17PM UTC coverage: 71.43% (-0.03%) from 71.455%
22357185907

push

github

vigna
Support for Omega codes in graphs; cosmetic changes

6 of 12 new or added lines in 5 files covered. (50.0%)

3 existing lines in 3 files now uncovered.

6283 of 8796 relevant lines covered (71.43%)

52384663.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.54
/webgraph/src/graphs/bvgraph/comp/flags.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use anyhow::{Context, Result, bail, ensure};
9
use dsi_bitstream::dispatch::Codes;
10
use dsi_bitstream::traits::{BigEndian, Endianness, LittleEndian};
11
use std::collections::HashMap;
12

13
#[derive(Clone, Copy, Debug)]
14
#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))]
15
/// The compression flags for reading or compressing a graph.
16
///
17
/// As documented, one code might set multiple values. This is done for
18
/// compatibility with the previous Java version of the library.
19
/// But the code optimizer will return the optimal codes for each of them,
20
/// so if it identifies some big saving from using different codes, we can consider
21
/// splitting them.
22
pub struct CompFlags {
23
    /// The instantaneous code to use to encode the `outdegrees`
24
    pub outdegrees: Codes,
25
    /// The instantaneous code to use to encode the `reference_offset`
26
    pub references: Codes,
27
    /// The instantaneous code to use to encode the `block_count` and `blocks`
28
    pub blocks: Codes,
29
    /// The instantaneous code to use to encode the `interval_count`, `interval_start`, and `interval_len`.
30
    pub intervals: Codes,
31
    /// The instantaneous code to use to encode the `first_residual` and `residual`
32
    pub residuals: Codes,
33
    /// The minimum length of an interval to be compressed as (start, len)
34
    pub min_interval_length: usize,
35
    /// The number of previous nodes to use for reference compression
36
    pub compression_window: usize,
37
    /// The maximum recursion depth during decoding, this modulates the tradeoff
38
    /// between compression ratio and decoding speed
39
    pub max_ref_count: usize,
40
}
41

42
impl core::default::Default for CompFlags {
43
    fn default() -> Self {
2,333,968✔
44
        CompFlags {
45
            outdegrees: Codes::Gamma,
46
            references: Codes::Unary,
47
            blocks: Codes::Gamma,
48
            intervals: Codes::Gamma,
49
            residuals: Codes::Zeta(3),
2,333,968✔
50
            min_interval_length: 4,
51
            compression_window: 7,
52
            max_ref_count: 3,
53
        }
54
    }
55
}
56

57
const OLD_CODES: [Codes; 10] = [
58
    Codes::Unary,
59
    Codes::Gamma,
60
    Codes::Delta,
61
    Codes::Zeta(1),
62
    Codes::Zeta(2),
63
    Codes::Zeta(3),
64
    Codes::Zeta(4),
65
    Codes::Zeta(5),
66
    Codes::Zeta(6),
67
    Codes::Zeta(7),
68
];
69

70
impl CompFlags {
71
    /// Convert a string from the `compflags` field from the `.properties` file
72
    /// into which code to use.
73
    ///
74
    /// Note that ζ codes are supported both with parameterless names (e.g.,
75
    /// "ZETA") and with the parameter in the name (e.g., "ZETA3"), for
76
    /// compatibility with previous versions of the library.
77
    ///
78
    /// For CLI ergonomics and compatibility, this codes must be the same as
79
    /// those appearing in the `PrivCode` enum of the `webgraph-cli` crate.
80
    ///
81
    /// Returns `None` if the string is not recognized.
82
    pub fn code_from_str(s: &str, k: usize) -> Option<Codes> {
4,251,150✔
83
        match s.to_uppercase().as_str() {
4,251,150✔
84
            "UNARY" => Some(Codes::Unary),
5,495,335✔
85
            "GAMMA" => Some(Codes::Gamma),
3,732,740✔
86
            "DELTA" => Some(Codes::Delta),
4,251,210✔
87
            "ZETA" => Some(Codes::Zeta(k)),
466,735✔
88
            "OMEGA" => Some(Codes::Omega),
155,605✔
89
            "PI1" => Some(Codes::Pi(1)),
155,615✔
90
            "PI2" => Some(Codes::Pi(2)),
155,610✔
91
            "PI3" => Some(Codes::Pi(3)),
155,590✔
92
            "PI4" => Some(Codes::Pi(4)),
155,580✔
93
            "ZETA1" => Some(Codes::Zeta(1)),
155,565✔
94
            "ZETA2" => Some(Codes::Zeta(2)),
311,080✔
95
            "ZETA3" => Some(Codes::Zeta(3)),
35✔
96
            "ZETA4" => Some(Codes::Zeta(4)),
30✔
97
            "ZETA5" => Some(Codes::Zeta(5)),
25✔
98
            "ZETA6" => Some(Codes::Zeta(6)),
20✔
99
            "ZETA7" => Some(Codes::Zeta(7)),
15✔
100
            _ => None,
5✔
101
        }
102
    }
103

104
    pub fn code_to_str(c: Codes, version: usize) -> Option<&'static str> {
4,251,175✔
105
        if version == 0 {
4,251,175✔
106
            match c {
2,125,630✔
107
                Codes::Unary => Some("UNARY"),
622,105✔
108
                Codes::Gamma => Some("GAMMA"),
362,895✔
109
                Codes::Delta => Some("DELTA"),
985,055✔
NEW
110
                Codes::Omega => Some("OMEGA"),
×
111
                Codes::Zeta(_) => Some("ZETA"),
155,570✔
112
                _ => None,
5✔
113
            }
114
        } else {
115
            match c {
2,125,545✔
116
                Codes::Unary => Some("UNARY"),
622,085✔
117
                Codes::Gamma => Some("GAMMA"),
362,885✔
118
                Codes::Delta => Some("DELTA"),
984,965✔
119
                Codes::Zeta(1) => Some("ZETA1"),
5✔
120
                Codes::Zeta(2) => Some("ZETA2"),
155,525✔
121
                Codes::Zeta(3) => Some("ZETA3"),
5✔
122
                Codes::Zeta(4) => Some("ZETA4"),
5✔
123
                Codes::Zeta(5) => Some("ZETA5"),
5✔
124
                Codes::Zeta(6) => Some("ZETA6"),
5✔
125
                Codes::Zeta(7) => Some("ZETA7"),
5✔
NEW
126
                Codes::Omega => Some("OMEGA"),
×
127
                Codes::Pi(1) => Some("PI1"),
10✔
128
                Codes::Pi(2) => Some("PI2"),
15✔
129
                Codes::Pi(3) => Some("PI3"),
10✔
130
                Codes::Pi(4) => Some("PI4"),
10✔
131
                _ => None,
10✔
132
            }
133
        }
134
    }
135

136
    fn contains_new_codes(&self) -> bool {
622,457✔
137
        !OLD_CODES.contains(&self.outdegrees)
1,244,914✔
138
            || !OLD_CODES.contains(&self.references)
1,244,914✔
139
            || !OLD_CODES.contains(&self.blocks)
1,244,914✔
140
            || !OLD_CODES.contains(&self.intervals)
1,244,914✔
141
            || !OLD_CODES.contains(&self.residuals)
1,244,914✔
142
    }
143

144
    pub fn to_properties<E: Endianness>(
746,881✔
145
        &self,
146
        num_nodes: usize,
147
        num_arcs: u64,
148
        bitstream_len: u64,
149
    ) -> Result<String> {
150
        let mut s = String::new();
1,493,762✔
151
        s.push_str("#BVGraph properties\n");
2,240,643✔
152
        s.push_str("graphclass=it.unimi.dsi.webgraph.BVGraph\n");
2,240,643✔
153

154
        // Version 1 if we have big-endian or new codes
155
        let version = (core::any::TypeId::of::<E>() != core::any::TypeId::of::<BigEndian>()
1,493,762✔
156
            || self.contains_new_codes()) as usize;
1,244,914✔
157

158
        s.push_str(&format!("version={version}\n"));
2,240,643✔
159
        s.push_str(&format!("endianness={}\n", E::NAME));
2,240,643✔
160

161
        s.push_str(&format!("nodes={num_nodes}\n"));
2,240,643✔
162
        s.push_str(&format!("arcs={num_arcs}\n"));
2,240,643✔
163
        s.push_str(&format!("minintervallength={}\n", self.min_interval_length));
2,240,643✔
164
        s.push_str(&format!("maxrefcount={}\n", self.max_ref_count));
2,240,643✔
165
        s.push_str(&format!("windowsize={}\n", self.compression_window));
2,240,643✔
166
        s.push_str(&format!(
2,987,524✔
167
            "bitsperlink={}\n",
746,881✔
168
            bitstream_len as f64 / num_arcs as f64
746,881✔
169
        ));
170
        s.push_str(&format!(
2,987,524✔
171
            "bitspernode={}\n",
746,881✔
172
            bitstream_len as f64 / num_nodes as f64
746,881✔
173
        ));
174
        s.push_str(&format!("length={bitstream_len}\n"));
2,240,643✔
175

176
        fn stirling(n: u64) -> f64 {
3,733,731✔
177
            let n = n as f64;
7,467,462✔
178
            n * (n.ln() - 1.0) + 0.5 * (2.0 * std::f64::consts::PI * n).ln()
11,201,193✔
179
        }
180

181
        let n_squared = (num_nodes * num_nodes) as u64;
1,493,762✔
182
        let theoretical_bound =
746,881✔
183
            (stirling(n_squared) - stirling(num_arcs) - stirling(n_squared - num_arcs))
3,734,405✔
184
                / 2.0_f64.ln();
746,881✔
185
        s.push_str(&format!(
2,987,524✔
186
            "compratio={:.3}\n",
746,881✔
187
            bitstream_len as f64 / theoretical_bound
746,881✔
188
        ));
189

190
        s.push_str("compressionflags=");
2,240,643✔
191
        let mut comp_flags = false;
1,493,762✔
192
        if self.outdegrees != Codes::Gamma {
746,881✔
193
            s.push_str(&format!(
1,493,115✔
194
                "OUTDEGREES_{}|",
×
195
                Self::code_to_str(self.outdegrees, version).with_context(|| format!(
1,990,820✔
196
                    "Code {:?} is not supported for outdegrees in version {version}",
×
197
                    self.outdegrees
×
198
                ))?
199
            ));
200
            comp_flags = true;
497,705✔
201
        }
202
        if self.references != Codes::Unary {
746,881✔
203
            s.push_str(&format!(
1,493,070✔
204
                "REFERENCES_{}|",
×
205
                Self::code_to_str(self.references, version).with_context(|| format!(
1,990,760✔
206
                    "Code {:?} is not supported for references in version {version}",
×
207
                    self.references
×
208
                ))?
209
            ));
210
            comp_flags = true;
497,690✔
211
        }
212
        if self.blocks != Codes::Gamma {
746,881✔
213
            s.push_str(&format!(
1,493,085✔
214
                "BLOCKS_{}|",
×
215
                Self::code_to_str(self.blocks, version).with_context(|| format!(
1,990,780✔
216
                    "Code {:?} is not supported for blocks in version {version}",
×
217
                    self.blocks
×
218
                ))?
219
            ));
220
            comp_flags = true;
497,695✔
221
        }
222
        if self.intervals != Codes::Gamma {
746,881✔
223
            s.push_str(&format!(
1,493,085✔
224
                "INTERVALS_{}|",
×
225
                Self::code_to_str(self.intervals, version).with_context(|| format!(
1,990,780✔
226
                    "Code {:?} is not supported for intervals in version {version}",
×
227
                    self.intervals
×
228
                ))?
229
            ));
230
            comp_flags = true;
497,695✔
231
        }
232
        if (version == 0 && !matches!(self.residuals, Codes::Zeta(_)))
1,680,403✔
233
            || self.residuals != (Codes::Zeta(3))
435,816✔
234
        {
235
            s.push_str(&format!(
1,679,724✔
236
                "RESIDUALS_{}|",
×
237
                Self::code_to_str(self.residuals, version).with_context(|| format!(
2,239,632✔
238
                    "Code {:?} is not supported for residuals in version {version}",
×
239
                    self.residuals
×
240
                ))?
241
            ));
242
            comp_flags = true;
559,908✔
243
        }
244
        if comp_flags {
1,491,114✔
245
            s.pop();
744,233✔
246
        }
247
        s.push('\n');
1,493,762✔
248
        if version == 0 {
746,881✔
249
            // check that if a k is specified, it is the same for all codes
250
            let mut k = None;
1,244,914✔
251
            macro_rules! check_and_set_k {
×
252
                ($code:expr) => {
×
253
                    match $code {
×
254
                        Codes::Zeta(new_k) => {
×
255
                            if let Some(old_k) = k {
×
256
                                ensure!(
×
257
                                    old_k == new_k,
×
258
                                    "Only one value of k is supported in version 0"
×
259
                                )
260
                            }
261
                            k = Some(new_k)
×
262
                        }
263
                        _ => {}
×
264
                    }
265
                };
266
            }
267
            check_and_set_k!(self.outdegrees);
1,244,904✔
268
            check_and_set_k!(self.references);
1,244,909✔
269
            check_and_set_k!(self.blocks);
1,244,909✔
270
            check_and_set_k!(self.intervals);
1,244,904✔
271
            check_and_set_k!(self.residuals);
933,522✔
272
            // if no k was specified, use the default one (3)
273
            s.push_str(&format!("zetak={}\n", k.unwrap_or(3)));
2,489,828✔
274
        }
275
        Ok(s)
746,881✔
276
    }
277

278
    /// Convert the decoded `.properties` file into a `CompFlags` struct.
279
    /// Also check that the endianness is correct.
280
    pub fn from_properties<E: Endianness>(map: &HashMap<String, String>) -> Result<Self> {
747,085✔
281
        // Default values, same as the Java class
282
        let endianness = map
1,494,170✔
283
            .get("endianness")
284
            .map(|x| x.to_string())
2,240,855✔
285
            .unwrap_or_else(|| BigEndian::NAME.to_string());
747,485✔
286

287
        anyhow::ensure!(
747,085✔
288
            endianness == E::NAME,
747,085✔
289
            "Wrong endianness, got {} while expected {}",
×
290
            endianness,
×
291
            E::NAME
×
292
        );
293
        // check that the version was properly set for LE
294
        if core::any::TypeId::of::<E>() == core::any::TypeId::of::<LittleEndian>() {
747,084✔
295
            anyhow::ensure!(
124,422✔
296
                map.get("version").map(|x| x.parse::<u32>().unwrap()) == Some(1),
870,954✔
297
                "Wrong version, got {} while expected 1",
×
298
                map.get("version").unwrap_or(&"None".to_string())
×
299
            );
300
        }
301

302
        let mut cf = CompFlags::default();
1,494,168✔
303
        let mut k = 3;
1,494,168✔
304
        if let Some(spec_k) = map.get("zetak") {
2,116,825✔
305
            let spec_k = spec_k.parse::<usize>()?;
1,245,314✔
306
            if !(1..=7).contains(&spec_k) {
1,245,314✔
307
                bail!("Only ζ₁-ζ₇ are supported");
×
308
            }
309
            k = spec_k;
622,657✔
310
        }
311
        if let Some(comp_flags) = map.get("compressionflags") {
2,241,252✔
312
            if !comp_flags.is_empty() {
747,084✔
313
                for flag in comp_flags.split('|') {
3,294,926✔
314
                    let s: Vec<_> = flag.split('_').collect();
12,753,465✔
315
                    let code = CompFlags::code_from_str(s[1], k).unwrap();
12,753,465✔
316
                    match s[0] {
2,550,693✔
317
                        "OUTDEGREES" => cf.outdegrees = code,
3,048,398✔
318
                        "REFERENCES" => cf.references = code,
2,550,678✔
319
                        "BLOCKS" => cf.blocks = code,
2,052,993✔
320
                        "INTERVALS" => cf.intervals = code,
1,555,298✔
321
                        "RESIDUALS" => cf.residuals = code,
1,119,816✔
322
                        "OFFSETS" => {
×
323
                            ensure!(code == Codes::Gamma, "Only γ code is supported for offsets")
×
324
                        }
325
                        _ => bail!("Unknown compression flag {}", flag),
×
326
                    }
327
                }
328
            }
329
        }
330
        if let Some(compression_window) = map.get("windowsize") {
2,241,247✔
331
            cf.compression_window = compression_window.parse()?;
747,079✔
332
        }
333
        if let Some(min_interval_length) = map.get("minintervallength") {
2,241,247✔
334
            cf.min_interval_length = min_interval_length.parse()?;
747,079✔
335
        }
336
        if let Some(max_ref_count) = map.get("maxrefcount") {
2,241,247✔
337
            cf.max_ref_count = max_ref_count.parse()?;
747,079✔
338
        }
339
        Ok(cf)
747,084✔
340
    }
341
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc