• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 18008720487

25 Sep 2025 01:16PM UTC coverage: 49.589% (-0.4%) from 49.949%
18008720487

push

github

vigna
Fixed fuzzing code for new epserde

0 of 2 new or added lines in 1 file covered. (0.0%)

650 existing lines in 25 files now uncovered.

3862 of 7788 relevant lines covered (49.59%)

25127316.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

44.26
/cli/src/build/ef.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use crate::GlobalArgs;
9
use anyhow::{Context, Result};
10
use clap::Parser;
11
use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12
use dsi_bitstream::prelude::*;
13
use dsi_progress_logger::prelude::*;
14
use epserde::prelude::*;
15
use log::info;
16
use mmap_rs::MmapFlags;
17
use std::fs::File;
18
use std::io::{BufReader, BufWriter, Seek};
19
use std::path::PathBuf;
20
use sux::prelude::*;
21
use webgraph::prelude::*;
22

23
#[derive(Parser, Debug, Clone)]
24
#[command(name = "ef", about = "Builds the Elias-Fano representation of the offsets of a graph.", long_about = None)]
25
pub struct CliArgs {
26
    /// The basename of the graph (or labels).
27
    pub src: PathBuf,
28
    /// The number of nodes of the graph. When passed, we don't need to load the
29
    /// `.properties` file. This allows to build Elias-Fano from the offsets of
30
    /// something that might not be a graph but that has offsets, like labels.
31
    /// For this reason, if passed, we will also try to read the `.labeloffsets`
32
    /// file and then fallback to the usual `.offsets` file.
33
    pub number_of_nodes: Option<usize>,
34
}
35

36
pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
12✔
37
    let ef_path = args.src.with_extension(EF_EXTENSION);
24✔
38
    // check that ef_path is writable, this is the only portable way I found
39
    // to check that the file is writable.
40
    if ef_path.exists() && ef_path.metadata()?.permissions().readonly() {
12✔
41
        return Err(anyhow::anyhow!(
×
42
            "The file is not writable: {}",
×
43
            ef_path.display()
×
44
        ));
45
    }
46

47
    match get_endianness(&args.src)?.as_str() {
12✔
48
        #[cfg(feature = "be_bins")]
49
        BE::NAME => build_eliasfano::<BE>(global_args, args),
36✔
50
        #[cfg(feature = "le_bins")]
51
        LE::NAME => build_eliasfano::<LE>(global_args, args),
×
52
        e => panic!("Unknown endianness: {}", e),
×
53
    }
54
}
55

56
pub fn build_eliasfano<E: Endianness + 'static>(
12✔
57
    global_args: GlobalArgs,
58
    args: CliArgs,
59
) -> Result<()>
60
where
61
    for<'a> BufBitReader<E, MemWordReader<u32, &'a [u32]>>: CodesRead<E> + BitSeek,
62
{
63
    let mut pl = ProgressLogger::default();
24✔
64
    pl.display_memory(true).item_name("offset");
24✔
65
    if let Some(duration) = &global_args.log_interval {
12✔
66
        pl.log_interval(*duration);
×
67
    }
68

69
    let basename = args.src.clone();
36✔
70
    if let Some(num_nodes) = args.number_of_nodes {
12✔
71
        pl.expected_updates(Some(num_nodes));
×
72
        // Horribly temporary duplicated code for the case of label offsets.
73
        let of_file_path = basename.with_extension(LABELOFFSETS_EXTENSION);
×
74
        if of_file_path.exists() {
×
75
            let labels_path = basename.with_extension(LABELS_EXTENSION);
×
76
            let mut file = File::open(&labels_path)
×
77
                .with_context(|| format!("Could not open {}", labels_path.display()))?;
×
78
            let file_len = 8 * file
×
79
                .seek(std::io::SeekFrom::End(0))
×
80
                .with_context(|| format!("Could not seek to end of {}", labels_path.display()))?;
×
81

82
            let mut efb = EliasFanoBuilder::new(num_nodes + 1, file_len as usize);
×
83

84
            info!("The offsets file exists, reading it to build Elias-Fano");
×
85

86
            let of = <MmapHelper<u32>>::mmap(of_file_path, MmapFlags::SEQUENTIAL)?;
×
87
            build_eliasfano_from_offsets(
88
                &global_args,
×
89
                &args,
×
90
                num_nodes,
×
91
                of.new_reader(),
×
92
                &mut pl,
×
93
                &mut efb,
×
94
            )?;
95
            return serialize_eliasfano(&global_args, &args, efb, &mut pl);
×
96
        }
97
    }
98

99
    // Creates the offsets file
100
    let of_file_path = basename.with_extension(OFFSETS_EXTENSION);
12✔
101

102
    let graph_path = basename.with_extension(GRAPH_EXTENSION);
×
103
    info!("Getting size of graph at '{}'", graph_path.display());
24✔
104
    let mut file = File::open(&graph_path)
12✔
105
        .with_context(|| format!("Could not open {}", graph_path.display()))?;
×
106
    let file_len = 8 * file
12✔
107
        .seek(std::io::SeekFrom::End(0))
×
108
        .with_context(|| format!("Could not seek in {}", graph_path.display()))?;
×
109
    info!("Graph file size: {} bits", file_len);
12✔
110

111
    // if the num_of_nodes is not present, read it from the properties file
112
    // otherwise use the provided value, this is so we can build the Elias-Fano
113
    // for offsets of any custom format that might not use the standard
114
    // properties file
115
    let num_nodes = args
12✔
116
        .number_of_nodes
×
117
        .map(Ok::<_, anyhow::Error>)
×
118
        .unwrap_or_else(|| {
12✔
119
            let properties_path = basename.with_extension(PROPERTIES_EXTENSION);
24✔
120
            info!(
12✔
121
                "Reading num_of_nodes from properties file at '{}'",
12✔
122
                properties_path.display()
12✔
123
            );
124
            let f = File::open(&properties_path).with_context(|| {
48✔
125
                format!(
×
126
                    "Could not open properties file: {}",
×
127
                    properties_path.display()
×
128
                )
129
            })?;
130
            let map = java_properties::read(BufReader::new(f))?;
12✔
131
            Ok(map.get("nodes").unwrap().parse::<usize>()?)
×
132
        })?;
133
    pl.expected_updates(Some(num_nodes));
×
134

135
    let mut efb = EliasFanoBuilder::new(num_nodes + 1, file_len as usize);
×
136

137
    info!("Checking if offsets exists at '{}'", of_file_path.display());
24✔
138
    // if the offset files exists, read it to build elias-fano
139
    if of_file_path.exists() {
×
140
        info!("The offsets file exists, reading it to build Elias-Fano");
24✔
141
        let of = <MmapHelper<u32>>::mmap(of_file_path, MmapFlags::SEQUENTIAL)?;
36✔
142
        build_eliasfano_from_offsets(
143
            &global_args,
×
144
            &args,
×
145
            num_nodes,
×
146
            of.new_reader(),
×
147
            &mut pl,
×
148
            &mut efb,
×
149
        )?;
150
    } else {
151
        build_eliasfano_from_graph(&args, &mut pl, &mut efb)?;
×
152
    }
153

154
    serialize_eliasfano(&global_args, &args, efb, &mut pl)
12✔
155
}
156

157
pub fn build_eliasfano_from_graph(
×
158
    args: &CliArgs,
159
    pl: &mut impl ProgressLog,
160
    efb: &mut EliasFanoBuilder,
161
) -> Result<()> {
162
    info!("The offsets file does not exists, reading the graph to build Elias-Fano");
×
163
    match get_endianness(&args.src)?.as_str() {
×
164
        #[cfg(feature = "be_bins")]
165
        BE::NAME => build_eliasfano_from_graph_with_endianness::<BE>(args, pl, efb),
×
166
        #[cfg(feature = "le_bins")]
167
        LE::NAME => build_eliasfano_from_graph_with_endianness::<LE>(args, pl, efb),
×
168
        e => panic!("Unknown endianness: {}", e),
×
169
    }
170
}
171

172
pub fn build_eliasfano_from_offsets<E: Endianness>(
12✔
173
    _global_args: &GlobalArgs,
174
    _args: &CliArgs,
175
    num_nodes: usize,
176
    mut reader: impl GammaRead<E>,
177
    pl: &mut impl ProgressLog,
178
    efb: &mut EliasFanoBuilder,
179
) -> Result<()> {
180
    info!("Building Elias-Fano from offsets...");
24✔
181

182
    // progress bar
183
    pl.start("Translating offsets to EliasFano...");
24✔
184
    // read the graph a write the offsets
185
    let mut offset = 0;
24✔
186
    for _node_id in 0..num_nodes + 1 {
3,906,708✔
187
        // write where
188
        offset += reader.read_gamma().context("Could not read gamma")?;
11,720,088✔
189
        efb.push(offset as _);
3,906,696✔
190
        // decode the next nodes so we know where the next node_id starts
191
        pl.light_update();
×
192
    }
193
    pl.done();
12✔
194
    Ok(())
×
195
}
196

197
pub fn build_eliasfano_from_graph_with_endianness<E: Endianness>(
×
198
    args: &CliArgs,
199
    pl: &mut impl ProgressLog,
200
    efb: &mut EliasFanoBuilder,
201
) -> Result<()>
202
where
203
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
204
    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek,
205
{
206
    let seq_graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
207
        .endianness::<E>()
208
        .load()
209
        .with_context(|| format!("Could not load graph at {}", args.src.display()))?;
×
210
    // otherwise directly read the graph
211
    // progress bar
212
    pl.start("Building EliasFano...");
×
213
    // read the graph a write the offsets
214
    let mut iter = seq_graph.offset_deg_iter();
×
215
    for (new_offset, _degree) in iter.by_ref() {
×
216
        // write where
217
        efb.push(new_offset as _);
×
218
        // decode the next nodes so we know where the next node_id starts
219
        pl.light_update();
×
220
    }
221
    efb.push(iter.get_pos() as _);
×
222
    Ok(())
×
223
}
224

225
pub fn serialize_eliasfano(
12✔
226
    global_args: &GlobalArgs,
227
    args: &CliArgs,
228
    efb: EliasFanoBuilder,
229
    pl: &mut impl ProgressLog,
230
) -> Result<()> {
231
    let ef = efb.build();
36✔
232
    pl.done();
24✔
233

234
    let mut pl = ProgressLogger::default();
24✔
235
    pl.display_memory(true);
24✔
236
    if let Some(duration) = &global_args.log_interval {
12✔
237
        pl.log_interval(*duration);
×
238
    }
239
    pl.start("Building the Index over the ones in the high-bits...");
24✔
240
    let ef: EF = unsafe { ef.map_high_bits(SelectAdaptConst::<_, _, 12, 4>::new) };
48✔
241
    pl.done();
24✔
242

243
    let mut pl = ProgressLogger::default();
24✔
244
    pl.display_memory(true);
24✔
245
    if let Some(duration) = &global_args.log_interval {
12✔
246
        pl.log_interval(*duration);
×
247
    }
248
    pl.start("Writing to disk...");
24✔
249

250
    let ef_path = args.src.with_extension(EF_EXTENSION);
24✔
251
    info!("Creating Elias-Fano at '{}'", ef_path.display());
36✔
252
    let mut ef_file = BufWriter::new(
253
        File::create(&ef_path)
24✔
254
            .with_context(|| format!("Could not create {}", ef_path.display()))?,
12✔
255
    );
256

257
    // serialize and dump the schema to disk
258
    unsafe {
259
        ef.serialize(&mut ef_file)
×
UNCOV
260
            .with_context(|| format!("Could not serialize EliasFano to {}", ef_path.display()))
×
261
    }?;
262

263
    pl.done();
12✔
UNCOV
264
    Ok(())
×
265
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc