• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 14999125721

13 May 2025 02:22PM UTC coverage: 49.654% (-5.7%) from 55.382%
14999125721

push

github

zommiommy
wip hyperball cli

4 of 147 new or added lines in 4 files covered. (2.72%)

477 existing lines in 46 files now uncovered.

3663 of 7377 relevant lines covered (49.65%)

25650535.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

45.04
/webgraph/src/graphs/bvgraph/comp/flags.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use anyhow::{bail, ensure, Result};
9
use dsi_bitstream::dispatch::Codes;
10
use dsi_bitstream::traits::{BigEndian, Endianness, LittleEndian};
11
use std::collections::HashMap;
12

13
#[derive(Clone, Copy, Debug)]
14
#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))]
15
/// The compression flags for reading or compressing a graph.
16
///
17
/// As documented, one code might sets multiple values. This is done for
18
/// compatibility with the previous Java version of the library.
19
/// But the codes optimizers will return the optimal codes for each of them,
20
/// so if it identify some big save from using different codes, we can consider
21
/// splitting them.
22
pub struct CompFlags {
23
    /// The instantaneous code to use to encode the `outdegrees`
24
    pub outdegrees: Codes,
25
    /// The instantaneous code to use to encode the `reference_offset`
26
    pub references: Codes,
27
    /// The instantaneous code to use to encode the `block_count` and `blocks`
28
    pub blocks: Codes,
29
    /// The instantaneous code to use to encode the `interval_count`, `interval_start`, and `interval_len`.
30
    pub intervals: Codes,
31
    /// The instantaneous code to use to encode the `first_residual` and `residual`
32
    pub residuals: Codes,
33
    /// The minimum length of an interval to be compressed as (start, len)
34
    pub min_interval_length: usize,
35
    /// The number of previous nodes to use for reference compression
36
    pub compression_window: usize,
37
    /// The maximum recursion depth during decoding, this modulates the tradeoff
38
    /// between compression ratio and decoding speed
39
    pub max_ref_count: usize,
40
}
41

42
impl core::default::Default for CompFlags {
43
    fn default() -> Self {
160✔
44
        CompFlags {
45
            outdegrees: Codes::Gamma,
46
            references: Codes::Unary,
47
            blocks: Codes::Gamma,
48
            intervals: Codes::Gamma,
49
            residuals: Codes::Zeta { k: 3 },
160✔
50
            min_interval_length: 4,
51
            compression_window: 7,
52
            max_ref_count: 3,
53
        }
54
    }
55
}
56

57
impl CompFlags {
58
    /// Convert a string from the `compflags` field from the `.properties` file
59
    /// into which code to use.
60
    ///
61
    /// Returns `None` if the string is not recognized.
62
    pub fn code_from_str(s: &str, k: usize) -> Option<Codes> {
×
63
        match s.to_uppercase().as_str() {
×
64
            "UNARY" => Some(Codes::Unary),
×
65
            "GAMMA" => Some(Codes::Gamma),
×
66
            "DELTA" => Some(Codes::Delta),
×
67
            "ZETA" => Some(Codes::Zeta { k }),
×
68
            _ => None,
×
69
        }
70
    }
71

72
    pub fn code_to_str(c: Codes) -> Option<&'static str> {
×
73
        match c {
×
74
            Codes::Unary => Some("UNARY"),
×
75
            Codes::Gamma => Some("GAMMA"),
×
76
            Codes::Delta => Some("DELTA"),
×
77
            Codes::Zeta { k: _ } => Some("ZETA"),
×
78
            _ => unimplemented!("Code {:?} not supported", c),
79
        }
80
    }
81

82
    pub fn to_properties<E: Endianness>(
17✔
83
        &self,
84
        num_nodes: usize,
85
        num_arcs: u64,
86
        bitstream_len: u64,
87
    ) -> Result<String> {
88
        let mut s = String::new();
34✔
89
        s.push_str("#BVGraph properties\n");
51✔
90
        s.push_str("graphclass=it.unimi.dsi.webgraph.BVGraph\n");
51✔
91

92
        if core::any::TypeId::of::<E>() == core::any::TypeId::of::<BigEndian>() {
34✔
93
            s.push_str("version=0\n");
34✔
94
        } else {
95
            s.push_str("version=1\n");
×
96
        }
97
        s.push_str(&format!("endianness={}\n", E::NAME));
51✔
98

99
        s.push_str(&format!("nodes={}\n", num_nodes));
51✔
100
        s.push_str(&format!("arcs={}\n", num_arcs));
51✔
101
        s.push_str(&format!("minintervallength={}\n", self.min_interval_length));
51✔
102
        s.push_str(&format!("maxrefcount={}\n", self.max_ref_count));
51✔
103
        s.push_str(&format!("windowsize={}\n", self.compression_window));
51✔
104
        s.push_str(&format!(
68✔
105
            "bitsperlink={}\n",
34✔
106
            bitstream_len as f64 / num_arcs as f64
17✔
107
        ));
108
        s.push_str(&format!(
68✔
109
            "bitspernode={}\n",
34✔
110
            bitstream_len as f64 / num_nodes as f64
17✔
111
        ));
112
        s.push_str(&format!("length={}\n", bitstream_len));
51✔
113

114
        fn stirling(n: u64) -> f64 {
132✔
115
            let n = n as f64;
264✔
116
            n * (n.ln() - 1.0) + 0.5 * (2.0 * std::f64::consts::PI * n).ln()
396✔
117
        }
118

119
        let nsquared = (num_nodes * num_nodes) as u64;
34✔
120
        let theoretical_bound =
17✔
121
            (stirling(nsquared) - stirling(num_arcs) - stirling(nsquared - num_arcs))
85✔
122
                / 2.0_f64.ln();
17✔
123
        s.push_str(&format!(
68✔
124
            "compratio={:.3}\n",
34✔
125
            bitstream_len as f64 / theoretical_bound
17✔
126
        ));
127

128
        s.push_str("compressionflags=");
51✔
129
        let mut cflags = false;
34✔
130
        if self.outdegrees != Codes::Gamma {
17✔
131
            s.push_str(&format!(
×
132
                "OUTDEGREES_{}|",
×
133
                Self::code_to_str(self.outdegrees).unwrap()
×
134
            ));
135
            cflags = true;
×
136
        }
137
        if self.references != Codes::Unary {
17✔
138
            s.push_str(&format!(
×
139
                "REFERENCES_{}|",
×
140
                Self::code_to_str(self.references).unwrap()
×
141
            ));
142
            cflags = true;
×
143
        }
144
        if self.blocks != Codes::Gamma {
17✔
145
            s.push_str(&format!(
×
146
                "BLOCKS_{}|",
×
147
                Self::code_to_str(self.blocks).unwrap()
×
148
            ));
149
            cflags = true;
×
150
        }
151
        if self.intervals != Codes::Gamma {
17✔
152
            s.push_str(&format!(
×
153
                "INTERVALS_{}|",
×
154
                Self::code_to_str(self.intervals).unwrap()
×
155
            ));
156
            cflags = true;
×
157
        }
158
        if !matches!(self.residuals, Codes::Zeta { k: _ }) {
17✔
159
            s.push_str(&format!(
×
160
                "RESIDUALS_{}|",
×
161
                Self::code_to_str(self.residuals).unwrap()
×
162
            ));
163
            cflags = true;
×
164
        }
165
        if cflags {
17✔
166
            s.pop();
×
167
        }
168
        s.push('\n');
34✔
169
        // check that if a k is specified, it is the same for all codes
170
        let mut k = None;
34✔
171
        macro_rules! check_and_set_k {
×
172
            ($code:expr) => {
×
173
                match $code {
×
174
                    Codes::Zeta { k: new_k } => {
×
175
                        if let Some(old_k) = k {
×
176
                            ensure!(old_k == new_k, "Only one value of k is supported")
×
177
                        }
178
                        k = Some(new_k)
×
179
                    }
180
                    _ => {}
×
181
                }
182
            };
183
        }
184
        check_and_set_k!(self.outdegrees);
34✔
185
        check_and_set_k!(self.references);
34✔
186
        check_and_set_k!(self.blocks);
34✔
187
        check_and_set_k!(self.intervals);
34✔
188
        check_and_set_k!(self.residuals);
17✔
189
        // if no k was specified, use the default one (3)
190
        s.push_str(&format!("zetak={}\n", k.unwrap_or(3)));
17✔
UNCOV
191
        Ok(s)
×
192
    }
193

194
    /// Convert the decoded `.properties` file into a `CompFlags` struct.
195
    /// Also check that the endianness is correct.
196
    pub fn from_properties<E: Endianness>(map: &HashMap<String, String>) -> Result<Self> {
65✔
197
        // Default values, same as the Java class
198
        let endianness = map
130✔
199
            .get("endianness")
200
            .map(|x| x.to_string())
107✔
201
            .unwrap_or_else(|| BigEndian::NAME.to_string());
153✔
202

203
        anyhow::ensure!(
65✔
204
            endianness == E::NAME,
65✔
205
            "Wrong endianness, got {} while expected {}",
×
206
            endianness,
×
207
            E::NAME
×
208
        );
209
        // check that the version was properly set for LE
210
        if core::any::TypeId::of::<E>() == core::any::TypeId::of::<LittleEndian>() {
65✔
211
            anyhow::ensure!(
×
212
                map.get("version").map(|x| x.parse::<u32>().unwrap()) == Some(1),
×
213
                "Wrong version, got {} while expected 1",
×
214
                map.get("version").unwrap_or(&"None".to_string())
×
215
            );
216
        }
217

218
        let mut cf = CompFlags::default();
65✔
UNCOV
219
        let mut k = 3;
×
220
        if let Some(spec_k) = map.get("zeta_k") {
×
221
            let spec_k = spec_k.parse::<usize>()?;
×
222
            if !(1..=7).contains(&spec_k) {
×
223
                bail!("Only ζ₁-ζ₇ are supported");
×
224
            }
225
            k = spec_k;
×
226
        }
227
        if let Some(comp_flags) = map.get("compressionflags") {
130✔
228
            if !comp_flags.is_empty() {
×
229
                for flag in comp_flags.split('|') {
×
230
                    let s: Vec<_> = flag.split('_').collect();
×
231
                    // FIXME: this is a hack to avoid having to implement
232
                    // FromStr for Code
233
                    let code = CompFlags::code_from_str(s[1], k).unwrap();
×
234
                    match s[0] {
×
235
                        "OUTDEGREES" => cf.outdegrees = code,
×
236
                        "REFERENCES" => cf.references = code,
×
237
                        "BLOCKS" => cf.blocks = code,
×
238
                        "INTERVALS" => cf.intervals = code,
×
239
                        "RESIDUALS" => cf.residuals = code,
×
240
                        "OFFSETS" => {
×
241
                            ensure!(code == Codes::Gamma, "Only γ code is supported for offsets")
×
242
                        }
243
                        _ => bail!("Unknown compression flag {}", flag),
×
244
                    }
245
                }
246
            }
247
        }
248
        if let Some(compression_window) = map.get("windowsize") {
130✔
249
            cf.compression_window = compression_window.parse()?;
×
250
        }
251
        if let Some(min_interval_length) = map.get("minintervallength") {
130✔
252
            cf.min_interval_length = min_interval_length.parse()?;
×
253
        }
254
        Ok(cf)
65✔
255
    }
256
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc