• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 25381911000

05 May 2026 02:17PM UTC coverage: 69.023% (+0.03%) from 68.996%
25381911000

push

github

vigna
WebGraph passes miri

7527 of 10905 relevant lines covered (69.02%)

49639982.83 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.85
/webgraph/src/graphs/bvgraph/load.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use super::*;
9
use crate::prelude::*;
10
use anyhow::{Context, Result};
11
use dsi_bitstream::prelude::*;
12
use dsi_bitstream::{dispatch::code_consts, dispatch::factory::CodesReaderFactoryHelper};
13
use epserde::deser::Owned;
14
use epserde::prelude::*;
15
use sealed::sealed;
16
use std::{
17
    io::BufReader,
18
    path::{Path, PathBuf},
19
};
20

21
/// Sequential or random access.
22
#[doc(hidden)]
23
#[sealed]
24
pub trait Access: 'static {}
25

26
#[derive(Debug, Clone)]
27
pub struct Sequential;
28
#[sealed]
29
impl Access for Sequential {}
30

31
#[derive(Debug, Clone)]
32
pub struct Random;
33
#[sealed]
34
impl Access for Random {}
35

36
/// [`Static`] or [`Dynamic`] dispatch.
37
#[sealed]
38
pub trait Dispatch: 'static {}
39

40
/// Static dispatch.
41
///
42
/// You have to specify all codes used of the graph. The defaults
43
/// are the same as the default parameters of the Java version.
44
#[derive(Debug, Clone)]
45
pub struct Static<
46
    const OUTDEGREES: usize = { code_consts::GAMMA },
47
    const REFERENCES: usize = { code_consts::UNARY },
48
    const BLOCKS: usize = { code_consts::GAMMA },
49
    const INTERVALS: usize = { code_consts::GAMMA },
50
    const RESIDUALS: usize = { code_consts::ZETA3 },
51
>;
52

53
#[sealed]
54
impl<
55
    const OUTDEGREES: usize,
56
    const REFERENCES: usize,
57
    const BLOCKS: usize,
58
    const INTERVALS: usize,
59
    const RESIDUALS: usize,
60
> Dispatch for Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>
61
{
62
}
63

64
/// Dynamic dispatch.
65
///
66
/// Parameters are retrieved from the graph properties.
67
#[derive(Debug, Clone)]
68
pub struct Dynamic;
69

70
#[sealed]
71
impl Dispatch for Dynamic {}
72

73
/// Load mode.
74
///
75
/// The load mode is the way the graph data is accessed. Each load mode has
76
/// a corresponding strategy to access the graph and the offsets.
77
///
78
/// You can set both modes with [`LoadConf::mode`], or set them separately with
79
/// [`LoadConf::graph_mode`] and [`LoadConf::offsets_mode`].
80
#[sealed]
81
pub trait LoadMode: 'static {
82
    type Factory<E: Endianness>;
83

84
    fn new_factory<E: Endianness, P: AsRef<Path>>(
85
        graph: P,
86
        flags: codecs::MemoryFlags,
87
    ) -> Result<Self::Factory<E>>;
88

89
    type Offsets: Offsets;
90

91
    fn load_offsets<P: AsRef<Path>>(
92
        offsets: P,
93
        flags: MemoryFlags,
94
    ) -> Result<MemCase<Self::Offsets>>;
95
}
96

97
/// A type alias for a buffered reader that reads from a memory buffer a `u32` at a time.
98
pub type MemBufReader<'a, E> = BufBitReader<E, MemWordReader<u32, &'a [u32]>>;
99
/// A type alias for a buffered reader that reads from a file buffer a `u32` at a time.
100
pub type FileBufReader<E> = BufBitReader<E, WordAdapter<u32, BufReader<std::fs::File>>>;
101
/// A type alias for the [`CodesReaderFactory`] associated with a [`LoadMode`].
102
///
103
/// This type can be used in client methods that abstract over endianness to
104
/// impose the necessary trait bounds on the factory associated with the load
105
/// mode: one has just to write, for example, for the [`Mmap`] load mode:
106
/// ```ignore
107
/// LoadModeFactory<E, Mmap>: CodesReaderFactoryHelper<E>
108
/// ```
109
///
110
/// Additional trait bounds on the [`CodesRead`] associated with the factory
111
/// can be imposed by using the [`LoadModeCodesReader`] type alias.
112
pub type LoadModeFactory<E, LM> = <LM as LoadMode>::Factory<E>;
113
/// A type alias for the code reader returned by the [`CodesReaderFactory`]
114
/// associated with a [`LoadMode`].
115
///
116
/// This type can be used in client methods that abstract over endianness to
117
/// impose bounds on the code reader associated to the factory associated with
118
/// the load mode, usually in conjunction with [`LoadModeFactory`]. For example,
119
/// for the [`Mmap`] load mode:
120
/// ```ignore
121
/// LoadModeFactory<E, Mmap>: CodesReaderFactoryHelper<E>
122
/// LoadModeCodesReader<'a, E, Mmap>: BitSeek
123
/// ```
124
pub type LoadModeCodesReader<'a, E, LM> =
125
    <LoadModeFactory<E, LM> as CodesReaderFactory<E>>::CodesReader<'a>;
126

127
/// The graph is read from a file; offsets are fully deserialized in memory.
128
///
129
/// Note that you must guarantee that the graph file is padded with enough
130
/// zeroes so that it can be read one `u32` at a time.
131
#[derive(Debug, Clone)]
132
pub struct File;
133
#[sealed]
134
impl LoadMode for File {
135
    type Factory<E: Endianness> = FileFactory<E>;
136
    type Offsets = Owned<EF>;
137

138
    fn new_factory<E: Endianness, P: AsRef<Path>>(
1✔
139
        graph: P,
140
        _flags: MemoryFlags,
141
    ) -> Result<Self::Factory<E>> {
142
        FileFactory::<E>::new(graph)
2✔
143
    }
144

145
    fn load_offsets<P: AsRef<Path>>(
1✔
146
        offsets: P,
147
        _flags: MemoryFlags,
148
    ) -> Result<MemCase<Self::Offsets>> {
149
        let path = offsets.as_ref();
3✔
150

151
        unsafe {
152
            EF::load_full(path)
2✔
153
                .with_context(|| format!("Cannot load Elias–Fano pointer list {}", path.display()))
1✔
154
                .map(Into::into)
1✔
155
        }
156
    }
157
}
158

159
/// The graph and offsets are memory mapped.
160
///
161
/// This is the default mode. You can [set memory-mapping flags].
162
///
163
/// [set memory-mapping flags]: LoadConf::flags
164
#[derive(Debug, Clone)]
165
pub struct Mmap;
166
#[sealed]
167
impl LoadMode for Mmap {
168
    type Factory<E: Endianness> = MmapHelper<u32>;
169
    type Offsets = EF;
170

171
    fn new_factory<E: Endianness, P: AsRef<Path>>(
746,833✔
172
        graph: P,
173
        flags: MemoryFlags,
174
    ) -> Result<Self::Factory<E>> {
175
        MmapHelper::mmap(graph, flags.into())
2,987,332✔
176
    }
177

178
    fn load_offsets<P: AsRef<Path>>(
58✔
179
        offsets: P,
180
        flags: MemoryFlags,
181
    ) -> Result<MemCase<Self::Offsets>> {
182
        let path = offsets.as_ref();
174✔
183

184
        unsafe {
185
            EF::mmap(path, flags.into())
232✔
186
                .with_context(|| format!("Cannot map Elias–Fano pointer list {}", path.display()))
58✔
187
        }
188
    }
189
}
190

191
/// The graph and offsets are loaded into allocated memory.
192
#[derive(Debug, Clone)]
193
pub struct LoadMem;
194
#[sealed]
195
impl LoadMode for LoadMem {
196
    type Factory<E: Endianness> = MemoryFactory<E, Box<[u32]>>;
197
    type Offsets = EF;
198

199
    fn new_factory<E: Endianness, P: AsRef<Path>>(
66✔
200
        graph: P,
201
        _flags: MemoryFlags,
202
    ) -> Result<Self::Factory<E>> {
203
        MemoryFactory::<E, _>::new_mem(graph)
132✔
204
    }
205

206
    fn load_offsets<P: AsRef<Path>>(
7✔
207
        offsets: P,
208
        _flags: MemoryFlags,
209
    ) -> Result<MemCase<Self::Offsets>> {
210
        let path = offsets.as_ref();
21✔
211

212
        unsafe {
213
            EF::load_mem(path)
14✔
214
                .with_context(|| format!("Cannot load Elias–Fano pointer list {}", path.display()))
7✔
215
        }
216
    }
217
}
218

219
/// The graph and offsets are loaded into memory obtained via `mmap()`.
220
///
221
/// You can [set memory-mapping flags].
222
///
223
/// [set memory-mapping flags]: LoadConf::flags
224
#[derive(Debug, Clone)]
225
pub struct LoadMmap;
226
#[sealed]
227
impl LoadMode for LoadMmap {
228
    type Factory<E: Endianness> = MemoryFactory<E, MmapHelper<u32>>;
229
    type Offsets = EF;
230

231
    fn new_factory<E: Endianness, P: AsRef<Path>>(
12✔
232
        graph: P,
233
        flags: MemoryFlags,
234
    ) -> Result<Self::Factory<E>> {
235
        MemoryFactory::<E, _>::new_mmap(graph, flags)
36✔
236
    }
237

238
    fn load_offsets<P: AsRef<Path>>(
10✔
239
        offsets: P,
240
        flags: MemoryFlags,
241
    ) -> Result<MemCase<Self::Offsets>> {
242
        let path = offsets.as_ref();
30✔
243

244
        unsafe {
245
            EF::load_mmap(path, flags.into())
40✔
246
                .with_context(|| format!("Cannot load Elias–Fano pointer list {}", path.display()))
10✔
247
        }
248
    }
249
}
250

251
/// A load configuration for a [`BvGraph`]/[`BvGraphSeq`].
252
///
253
/// A basic configuration is returned by
254
/// [`BvGraph::with_basename`]/[`BvGraphSeq::with_basename`]. The configuration
255
/// can then be customized using the setter methods of this struct, chained in
256
/// builder style, and finalized by calling [`load`].
257
///
258
/// # Defaults
259
///
260
/// The default configuration returned by `with_basename` uses:
261
/// - big endianness ([`BE`]);
262
/// - [dynamic dispatch];
263
/// - [memory mapping] for both the graph and the offsets.
264
///
265
/// # Configuration Axes
266
///
267
/// ## Access Mode
268
///
269
/// - [`BvGraph::with_basename`] returns a configuration for **random access**,
270
///   which requires the Elias–Fano offsets file (`.ef`). The resulting graph
271
///   supports both random access and sequential iteration.
272
/// - [`BvGraphSeq::with_basename`] returns a configuration for **sequential
273
///   access**, which only needs the graph file (`.graph`). The resulting graph
274
///   supports only sequential iteration.
275
///
276
/// ## Endianness
277
///
278
/// - [`endianness`]: sets the endianness of the graph file. Use
279
///   `endianness::<BE>()` for big-endian (the default and the Java convention)
280
///   or `endianness::<LE>()` for little-endian.
281
///
282
/// ## Code Dispatch
283
///
284
/// - [`dispatch`]: chooses between:
285
///   - [`Dynamic`] (default): reads the codes from the properties file;
286
///     slightly slower due to indirect dispatch, but works with any graph.
287
///   - [`Static`]: the codes are fixed at compile time via const generics,
288
///     enabling more aggressive optimization. The defaults match the Java
289
///     defaults (γ for outdegrees, unary for references, γ for blocks, γ for
290
///     intervals, ζ₃ for residuals). If your graph uses non-default codes,
291
///     you must specify them explicitly.
292
///
293
/// ## Load Mode
294
///
295
/// Controls how the graph bitstream and the offsets are accessed.
296
///
297
/// - [`mode`]: sets the load mode for **both** the graph and the offsets. You
298
///   can also set them independently:
299
///   - [`graph_mode`]: sets the mode for the graph only;
300
///   - [`offsets_mode`]: sets the mode for the offsets only (random access
301
///     only).
302
///
303
/// The available modes are:
304
///
305
/// - [`Mmap`] (default): memory maps the file. This is the most
306
///   memory-efficient mode, as the OS manages paging. It is the recommended
307
///   mode for large graphs.
308
/// - [`LoadMem`]: reads the file into allocated memory.
309
/// - [`LoadMmap`]: reads the file into memory obtained via `mmap`, rather than
310
///   the standard allocator.
311
/// - [`File`]: reads the graph from a file stream. The offsets are fully
312
///   deserialized in memory using [ε-serde]'s [`load_full`]. Note that the
313
///   graph file must be padded correctly for this mode.
314
///
315
/// ## Memory flags
316
///
317
/// When using [`Mmap`] or [`LoadMmap`], you can set [`MemoryFlags`] to
318
/// request transparent huge pages, etc.:
319
///
320
/// - [`flags`]: sets flags for both the graph and offsets.
321
/// - [`graph_flags`]: sets flags for the graph only.
322
/// - [`offsets_flags`]: sets flags for the offsets only (random access only).
323
///
324
/// # Examples
325
///
326
/// Load with all defaults (big-endian, dynamic dispatch, memory-mapped):
327
/// ```ignore
328
/// let graph = BvGraph::with_basename("BASENAME").load()?;
329
/// ```
330
///
331
/// Load a little-endian graph:
332
/// ```ignore
333
/// let graph = BvGraph::with_basename("BASENAME")
334
///     .endianness::<LE>()
335
///     .load()?;
336
/// ```
337
///
338
/// Load with static dispatch (using default codes):
339
/// ```ignore
340
/// let graph = BvGraph::with_basename("BASENAME")
341
///     .dispatch::<Static>()
342
///     .load()?;
343
/// ```
344
///
345
/// Load into memory rather than memory-mapping:
346
/// ```ignore
347
/// let graph = BvGraph::with_basename("BASENAME")
348
///     .mode::<LoadMem>()
349
///     .load()?;
350
/// ```
351
///
352
/// Load a sequential-access graph (no `.ef` file needed):
353
/// ```ignore
354
/// let graph = BvGraphSeq::with_basename("BASENAME").load()?;
355
/// ```
356
///
357
/// Combine options:
358
/// ```ignore
359
/// let graph = BvGraph::with_basename("BASENAME")
360
///     .endianness::<LE>()
361
///     .dispatch::<Static>()
362
///     .mode::<LoadMem>()
363
///     .load()?;
364
/// ```
365
///
366
/// [`load`]: LoadConf::load
367
/// [dynamic dispatch]: `Dynamic`
368
/// [memory mapping]: `Mmap`
369
/// [`endianness`]: LoadConf::endianness
370
/// [`dispatch`]: LoadConf::dispatch
371
/// [`mode`]: LoadConf::mode
372
/// [`graph_mode`]: LoadConf::graph_mode
373
/// [`offsets_mode`]: LoadConf::offsets_mode
374
/// [`load_full`]: epserde::deser::Deserialize::load_full
375
/// [`flags`]: LoadConf::flags
376
/// [`graph_flags`]: LoadConf::graph_flags
377
/// [`offsets_flags`]: LoadConf::offsets_flags
378
/// [ε-serde]: <https://docs.rs/epserde/latest/epserde/>
379
#[derive(Debug, Clone)]
380
pub struct LoadConf<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode> {
381
    pub(crate) basename: PathBuf,
382
    pub(crate) graph_load_flags: MemoryFlags,
383
    pub(crate) offsets_load_flags: MemoryFlags,
384
    pub(crate) _marker: std::marker::PhantomData<(E, A, D, GLM, OLM)>,
385
}
386

387
impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
388
    LoadConf<E, A, D, GLM, OLM>
389
{
390
    /// Sets the endianness of the graph and offsets file.
391
    pub fn endianness<E2: Endianness>(self) -> LoadConf<E2, A, D, GLM, OLM>
746,915✔
392
    where
393
        GLM: LoadMode,
394
        OLM: LoadMode,
395
    {
396
        LoadConf {
397
            basename: self.basename,
1,493,830✔
398
            graph_load_flags: self.graph_load_flags,
1,493,830✔
399
            offsets_load_flags: self.offsets_load_flags,
746,915✔
400
            _marker: std::marker::PhantomData,
401
        }
402
    }
403
}
404

405
impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
406
    LoadConf<E, A, D, GLM, OLM>
407
{
408
    /// Choose between [`Static`] and [`Dynamic`] dispatch.
409
    pub fn dispatch<D2: Dispatch>(self) -> LoadConf<E, A, D2, GLM, OLM> {
9✔
410
        LoadConf {
411
            basename: self.basename,
18✔
412
            graph_load_flags: self.graph_load_flags,
18✔
413
            offsets_load_flags: self.offsets_load_flags,
9✔
414
            _marker: std::marker::PhantomData,
415
        }
416
    }
417
}
418

419
impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
420
    LoadConf<E, A, D, GLM, OLM>
421
{
422
    /// Choose the [`LoadMode`] for the graph and offsets.
423
    pub fn mode<LM: LoadMode>(self) -> LoadConf<E, A, D, LM, LM> {
79✔
424
        LoadConf {
425
            basename: self.basename,
158✔
426
            graph_load_flags: self.graph_load_flags,
158✔
427
            offsets_load_flags: self.offsets_load_flags,
79✔
428
            _marker: std::marker::PhantomData,
429
        }
430
    }
431
}
432

433
impl<E: Endianness, A: Access, D: Dispatch> LoadConf<E, A, D, Mmap, Mmap> {
434
    /// Sets flags for memory-mapping (both graph and offsets).
435
    pub fn flags(self, flags: MemoryFlags) -> LoadConf<E, A, D, Mmap, Mmap> {
×
436
        LoadConf {
437
            basename: self.basename,
×
438
            graph_load_flags: flags,
439
            offsets_load_flags: flags,
440
            _marker: std::marker::PhantomData,
441
        }
442
    }
443
}
444

445
impl<E: Endianness, A: Access, D: Dispatch> LoadConf<E, A, D, LoadMmap, LoadMmap> {
446
    /// Sets flags for memory obtained from `mmap()` (both graph and offsets).
447
    pub fn flags(self, flags: MemoryFlags) -> LoadConf<E, A, D, LoadMmap, LoadMmap> {
10✔
448
        LoadConf {
449
            basename: self.basename,
20✔
450
            graph_load_flags: flags,
451
            offsets_load_flags: flags,
452
            _marker: std::marker::PhantomData,
453
        }
454
    }
455
}
456

457
impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
458
    LoadConf<E, A, D, GLM, OLM>
459
{
460
    /// Choose the [`LoadMode`] for the graph only.
461
    pub fn graph_mode<NGLM: LoadMode>(self) -> LoadConf<E, A, D, NGLM, OLM> {
×
462
        LoadConf {
463
            basename: self.basename,
×
464
            graph_load_flags: self.graph_load_flags,
×
465
            offsets_load_flags: self.offsets_load_flags,
×
466
            _marker: std::marker::PhantomData,
467
        }
468
    }
469
}
470

471
impl<E: Endianness, A: Access, D: Dispatch, OLM: LoadMode> LoadConf<E, A, D, Mmap, OLM> {
472
    /// Sets flags for memory-mapping the graph.
473
    pub fn graph_flags(self, flags: MemoryFlags) -> LoadConf<E, A, D, Mmap, OLM> {
×
474
        LoadConf {
475
            basename: self.basename,
×
476
            graph_load_flags: flags,
477
            offsets_load_flags: self.offsets_load_flags,
×
478
            _marker: std::marker::PhantomData,
479
        }
480
    }
481
}
482

483
impl<E: Endianness, A: Access, D: Dispatch, OLM: LoadMode> LoadConf<E, A, D, LoadMmap, OLM> {
484
    /// Sets flags for memory obtained from `mmap()` for the graph.
485
    pub fn graph_flags(self, flags: MemoryFlags) -> LoadConf<E, A, D, LoadMmap, OLM> {
×
486
        LoadConf {
487
            basename: self.basename,
×
488
            graph_load_flags: flags,
489
            offsets_load_flags: self.offsets_load_flags,
×
490
            _marker: std::marker::PhantomData,
491
        }
492
    }
493
}
494

495
impl<E: Endianness, D: Dispatch, GLM: LoadMode, OLM: LoadMode> LoadConf<E, Random, D, GLM, OLM> {
496
    /// Choose the [`LoadMode`] for the offsets only.
497
    pub fn offsets_mode<NOLM: LoadMode>(self) -> LoadConf<E, Random, D, GLM, NOLM> {
×
498
        LoadConf {
499
            basename: self.basename,
×
500
            graph_load_flags: self.graph_load_flags,
×
501
            offsets_load_flags: self.offsets_load_flags,
×
502
            _marker: std::marker::PhantomData,
503
        }
504
    }
505
}
506

507
impl<E: Endianness, D: Dispatch, GLM: LoadMode> LoadConf<E, Random, D, GLM, Mmap> {
508
    /// Sets flags for memory-mapping the offsets.
509
    pub fn offsets_flags(self, flags: MemoryFlags) -> LoadConf<E, Random, D, GLM, Mmap> {
×
510
        LoadConf {
511
            basename: self.basename,
×
512
            graph_load_flags: self.graph_load_flags,
×
513
            offsets_load_flags: flags,
514
            _marker: std::marker::PhantomData,
515
        }
516
    }
517
}
518

519
impl<E: Endianness, D: Dispatch, GLM: LoadMode> LoadConf<E, Random, D, GLM, LoadMmap> {
520
    /// Sets flags for memory obtained from `mmap()` for the offsets.
521
    pub fn offsets_flags(self, flags: MemoryFlags) -> LoadConf<E, Random, D, GLM, LoadMmap> {
×
522
        LoadConf {
523
            basename: self.basename,
×
524
            graph_load_flags: self.graph_load_flags,
×
525
            offsets_load_flags: flags,
526
            _marker: std::marker::PhantomData,
527
        }
528
    }
529
}
530

531
impl<E: Endianness, GLM: LoadMode, OLM: LoadMode> LoadConf<E, Random, Dynamic, GLM, OLM> {
532
    /// Loads a random-access graph with dynamic dispatch.
533
    pub fn load(
75✔
534
        mut self,
535
    ) -> anyhow::Result<BvGraph<DynCodesDecoderFactory<E, GLM::Factory<E>, OLM::Offsets>>>
536
    where
537
        <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
538
        for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E> + BitSeek,
539
    {
540
        warn_if_ef_stale(&self.basename);
150✔
541
        self.basename.set_extension(PROPERTIES_EXTENSION);
150✔
542
        let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)
375✔
543
            .with_context(|| {
75✔
544
                format!("Could not load properties file {}", self.basename.display())
×
545
            })?;
546
        self.basename.set_extension(GRAPH_EXTENSION);
150✔
547
        let factory = GLM::new_factory(&self.basename, self.graph_load_flags)
300✔
548
            .with_context(|| format!("Could not load graph file {}", self.basename.display()))?;
75✔
549
        self.basename.set_extension(EF_EXTENSION);
150✔
550
        let offsets = OLM::load_offsets(&self.basename, self.offsets_load_flags)
300✔
551
            .with_context(|| format!("Could not load offsets file {}", self.basename.display()))?;
75✔
552

553
        Ok(BvGraph::new(
75✔
554
            DynCodesDecoderFactory::new(factory, offsets, comp_flags)?,
300✔
555
            num_nodes,
75✔
556
            num_arcs,
75✔
557
            comp_flags.compression_window,
75✔
558
            comp_flags.min_interval_length,
75✔
559
        ))
560
    }
561
}
562

563
impl<E: Endianness, GLM: LoadMode, OLM: LoadMode> LoadConf<E, Sequential, Dynamic, GLM, OLM> {
564
    /// Loads a sequential graph with dynamic dispatch.
565
    pub fn load(
746,688✔
566
        mut self,
567
    ) -> anyhow::Result<
568
        BvGraphSeq<DynCodesDecoderFactory<E, GLM::Factory<E>, Owned<EmptyDict<u64, u64>>>>,
569
    >
570
    where
571
        <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
572
        for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E>,
573
    {
574
        self.basename.set_extension(PROPERTIES_EXTENSION);
1,493,376✔
575
        let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)
3,733,440✔
576
            .with_context(|| {
746,688✔
577
                format!("Could not load properties file {}", self.basename.display())
×
578
            })?;
579
        self.basename.set_extension(GRAPH_EXTENSION);
1,493,376✔
580
        let factory = GLM::new_factory(&self.basename, self.graph_load_flags)
2,986,752✔
581
            .with_context(|| format!("Could not load graph file {}", self.basename.display()))?;
746,688✔
582

583
        Ok(BvGraphSeq::new(
746,688✔
584
            DynCodesDecoderFactory::new(factory, EmptyDict::default().into(), comp_flags)?,
3,733,440✔
585
            num_nodes,
746,688✔
586
            Some(num_arcs),
746,688✔
587
            comp_flags.compression_window,
746,688✔
588
            comp_flags.min_interval_length,
746,688✔
589
        ))
590
    }
591
}
592

593
impl<
594
    E: Endianness,
595
    GLM: LoadMode,
596
    OLM: LoadMode,
597
    const OUTDEGREES: usize,
598
    const REFERENCES: usize,
599
    const BLOCKS: usize,
600
    const INTERVALS: usize,
601
    const RESIDUALS: usize,
602
> LoadConf<E, Random, Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>, GLM, OLM>
603
{
604
    /// Loads a random-access graph with static dispatch.
605
    pub fn load(
1✔
606
        mut self,
607
    ) -> anyhow::Result<
608
        BvGraph<
609
            ConstCodesDecoderFactory<
610
                E,
611
                GLM::Factory<E>,
612
                OLM::Offsets,
613
                OUTDEGREES,
614
                REFERENCES,
615
                BLOCKS,
616
                INTERVALS,
617
                RESIDUALS,
618
            >,
619
        >,
620
    >
621
    where
622
        <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
623
        for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E> + BitSeek,
624
    {
625
        warn_if_ef_stale(&self.basename);
2✔
626
        self.basename.set_extension(PROPERTIES_EXTENSION);
2✔
627
        let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)
5✔
628
            .with_context(|| {
1✔
629
                format!("Could not load properties file {}", self.basename.display())
×
630
            })?;
631
        self.basename.set_extension(GRAPH_EXTENSION);
2✔
632
        let factory = GLM::new_factory(&self.basename, self.graph_load_flags)
4✔
633
            .with_context(|| format!("Could not load graph file {}", self.basename.display()))?;
1✔
634
        self.basename.set_extension(EF_EXTENSION);
2✔
635
        let offsets = OLM::load_offsets(&self.basename, self.offsets_load_flags)
4✔
636
            .with_context(|| format!("Could not load offsets file {}", self.basename.display()))?;
1✔
637

638
        Ok(BvGraph::new(
1✔
639
            ConstCodesDecoderFactory::new(factory, offsets, comp_flags)?,
4✔
640
            num_nodes,
1✔
641
            num_arcs,
1✔
642
            comp_flags.compression_window,
1✔
643
            comp_flags.min_interval_length,
1✔
644
        ))
645
    }
646
}
647

648
impl<
649
    E: Endianness,
650
    GLM: LoadMode,
651
    OLM: LoadMode,
652
    const OUTDEGREES: usize,
653
    const REFERENCES: usize,
654
    const BLOCKS: usize,
655
    const INTERVALS: usize,
656
    const RESIDUALS: usize,
657
> LoadConf<E, Sequential, Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>, GLM, OLM>
658
{
659
    /// Loads a sequential graph with static dispatch.
660
    pub fn load(
8✔
661
        mut self,
662
    ) -> anyhow::Result<
663
        BvGraphSeq<
664
            ConstCodesDecoderFactory<
665
                E,
666
                GLM::Factory<E>,
667
                Owned<EmptyDict<u64, u64>>,
668
                OUTDEGREES,
669
                REFERENCES,
670
                BLOCKS,
671
                INTERVALS,
672
                RESIDUALS,
673
            >,
674
        >,
675
    >
676
    where
677
        <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
678
        for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E>,
679
    {
680
        self.basename.set_extension(PROPERTIES_EXTENSION);
16✔
681
        let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)
40✔
682
            .with_context(|| {
8✔
683
                format!("Could not load properties file {}", self.basename.display())
×
684
            })?;
685
        self.basename.set_extension(GRAPH_EXTENSION);
16✔
686
        let factory = GLM::new_factory(&self.basename, self.graph_load_flags)
32✔
687
            .with_context(|| format!("Could not load graph file {}", self.basename.display()))?;
8✔
688

689
        Ok(BvGraphSeq::new(
8✔
690
            ConstCodesDecoderFactory::new(factory, EmptyDict::default().into(), comp_flags)?,
40✔
691
            num_nodes,
8✔
692
            Some(num_arcs),
8✔
693
            comp_flags.compression_window,
8✔
694
            comp_flags.min_interval_length,
8✔
695
        ))
696
    }
697
}
698

699
/// Checks if the `.ef` file is older than the .graph file and log a warning if so.
700
///
701
/// This is important because if the graph has been recompressed, the `.ef` file
702
/// will be stale and needs to be rebuilt. This is a very common scenario, in
703
/// particular when testing compression techniques.
704
fn warn_if_ef_stale(basename: &Path) {
240✔
705
    if std::env::var_os("DO_NOT_CHECK_MOD_TIMES").is_some() {
480✔
706
        return;
×
707
    }
708
    let graph_path = basename.with_extension(GRAPH_EXTENSION);
720✔
709
    let ef_path = basename.with_extension(EF_EXTENSION);
720✔
710

711
    let graph_modified = match std::fs::metadata(&graph_path).and_then(|m| m.modified()) {
1,440✔
712
        Ok(t) => t,
480✔
713
        Err(_) => return, // Can't check, skip warning
×
714
    };
715

716
    let ef_modified = match std::fs::metadata(&ef_path).and_then(|m| m.modified()) {
1,440✔
717
        Ok(t) => t,
480✔
718
        Err(_) => return, // Can't check, skip warning
×
719
    };
720

721
    if ef_modified < graph_modified {
240✔
722
        log::warn!(
170✔
723
            "The Elias–Fano file {} is older than the graph file {}; \
724
             this may indicate that the graph has been modified and the .ef file is stale. \
725
             Consider rebuilding it with \"webgraph build ef {}\", just touch it if this warning is spurious, \
726
             or set the environment variable DO_NOT_CHECK_MOD_TIMES to disable this check.",
727
            ef_path.display(),
5✔
728
            graph_path.display(),
5✔
729
            basename.display()
10✔
730
        );
731
    }
732
}
733

734
/// Reads the `.properties` file and returns the endianness.
735
pub fn get_endianness<P: AsRef<Path>>(basename: P) -> Result<String> {
57✔
736
    let path = basename.as_ref().with_extension(PROPERTIES_EXTENSION);
171✔
737
    let f = std::fs::File::open(&path)
171✔
738
        .with_context(|| format!("Cannot open property file {}", path.display()))?;
57✔
739
    let map = java_properties::read(BufReader::new(f))
228✔
740
        .with_context(|| format!("cannot parse {} as a java properties file", path.display()))?;
57✔
741

742
    let endianness = map
114✔
743
        .get("endianness")
744
        .map(|x| x.to_string())
121✔
745
        .unwrap_or_else(|| BigEndian::NAME.to_string());
107✔
746

747
    Ok(endianness)
57✔
748
}
749

750
/// Metadata read from a label `.properties` file.
751
#[derive(Debug, Clone)]
752
pub struct LabelProperties {
753
    /// Number of nodes in the graph.
754
    pub num_nodes: usize,
755
    /// Number of arcs (labels) in the graph.
756
    pub num_arcs: u64,
757
    /// Stable name of the serializer that produced the labels.
758
    pub serializer: String,
759
}
760

761
/// Reads the label `.properties` file for the given label basename and
762
/// returns the metadata. Checks that the endianness matches `E`.
763
pub fn parse_label_properties<E: Endianness>(
7✔
764
    label_basename: impl AsRef<Path>,
765
) -> Result<LabelProperties> {
766
    let path = label_basename.as_ref().with_extension(PROPERTIES_EXTENSION);
21✔
767
    let name = path.display();
14✔
768
    let f = std::fs::File::open(&path)
21✔
769
        .with_context(|| format!("Cannot open label properties {name}"))?;
7✔
770
    let map = java_properties::read(BufReader::new(f))
28✔
771
        .with_context(|| format!("Cannot parse {name} as a properties file"))?;
7✔
772

773
    let endianness = map
14✔
774
        .get("endianness")
775
        .map(|x| x.to_string())
21✔
776
        .unwrap_or_else(|| BigEndian::NAME.to_string());
7✔
777
    anyhow::ensure!(
7✔
778
        endianness == E::NAME,
7✔
779
        "Label endianness mismatch in {name}: found {endianness}, expected {}",
×
780
        E::NAME
×
781
    );
782

783
    let num_nodes = map
14✔
784
        .get("nodes")
785
        .with_context(|| format!("Missing 'nodes' property in {name}"))?
7✔
786
        .parse::<usize>()
787
        .with_context(|| format!("Cannot parse 'nodes' as usize in {name}"))?;
7✔
788
    let num_arcs = map
14✔
789
        .get("arcs")
790
        .with_context(|| format!("Missing 'arcs' property in {name}"))?
7✔
791
        .parse::<u64>()
792
        .with_context(|| format!("Cannot parse 'arcs' as u64 in {name}"))?;
7✔
793
    let serializer = map
14✔
794
        .get("serializer")
795
        .with_context(|| format!("Missing 'serializer' property in {name}"))?
7✔
796
        .to_string();
797

798
    Ok(LabelProperties {
7✔
799
        num_nodes,
14✔
800
        num_arcs,
7✔
801
        serializer,
7✔
802
    })
803
}
804

805
/// Reads the `.properties` file and returns the number of nodes, number of arcs, and compression
806
/// flags for the graph. The endianness is checked against the expected one.
807
pub fn parse_properties<E: Endianness>(path: impl AsRef<Path>) -> Result<(usize, u64, CompFlags)> {
747,173✔
808
    let name = path.as_ref().display();
2,241,519✔
809
    let f =
747,173✔
810
        std::fs::File::open(&path).with_context(|| format!("Cannot open property file {name}"))?;
2,241,519✔
811
    let map = java_properties::read(BufReader::new(f))
2,988,692✔
812
        .with_context(|| format!("cannot parse {name} as a java properties file"))?;
747,173✔
813

814
    let num_nodes = map
1,494,346✔
815
        .get("nodes")
816
        .with_context(|| format!("Missing 'nodes' property in {name}"))?
747,173✔
817
        .parse::<usize>()
818
        .with_context(|| format!("Cannot parse 'nodes' as usize in {name}"))?;
747,173✔
819
    let num_arcs = map
1,494,346✔
820
        .get("arcs")
821
        .with_context(|| format!("Missing 'arcs' property in {name}"))?
747,173✔
822
        .parse::<u64>()
823
        .with_context(|| format!("Cannot parse arcs as u64 in {name}"))?;
747,173✔
824

825
    let comp_flags = CompFlags::from_properties::<E>(&map)
2,241,519✔
826
        .with_context(|| format!("Cannot parse compression flags from {name}"))?;
747,173✔
827
    Ok((num_nodes, num_arcs, comp_flags))
1,494,346✔
828
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc