• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 18008720487

25 Sep 2025 01:16PM UTC coverage: 49.589% (-0.4%) from 49.949%
18008720487

push

github

vigna
Fixed fuzzing code for new epserde

0 of 2 new or added lines in 1 file covered. (0.0%)

650 existing lines in 25 files now uncovered.

3862 of 7788 relevant lines covered (49.59%)

25127316.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

34.83
/cli/src/lib.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4
 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
5
 *
6
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7
 */
8

9
//! Command-line interface structs, functions, and methods.
10
//!
11
//! Each module correspond to a group of commands, and each command is
12
//! implemented as a submodule.
13

14
use anyhow::{anyhow, bail, ensure, Context, Result};
15
use clap::{Args, CommandFactory, Parser, Subcommand, ValueEnum};
16
use common_traits::{ToBytes, UnsignedInt};
17
use dsi_bitstream::dispatch::Codes;
18
use epserde::ser::Serialize;
19
use jiff::fmt::friendly::{Designator, Spacing, SpanPrinter};
20
use jiff::SpanRound;
21
use std::io::{BufWriter, Write};
22
use std::path::{Path, PathBuf};
23
use std::time::Duration;
24
use std::time::SystemTime;
25
use sux::bits::BitFieldVec;
26
use sysinfo::System;
27
use webgraph::prelude::CompFlags;
28
use webgraph::utils::Granularity;
29

30
#[cfg(not(any(feature = "le_bins", feature = "be_bins")))]
31
compile_error!("At least one of the features `le_bins` or `be_bins` must be enabled.");
32

33
pub mod build_info {
34
    include!(concat!(env!("OUT_DIR"), "/built.rs"));
35

36
    pub fn version_string() -> String {
76✔
37
        format!(
76✔
38
            "{}
39
git info: {} {} {}
40
build info: built on {} for {} with {}",
41
            PKG_VERSION,
76✔
42
            GIT_VERSION.unwrap_or(""),
228✔
43
            GIT_COMMIT_HASH.unwrap_or(""),
228✔
44
            match GIT_DIRTY {
76✔
45
                None => "",
76✔
46
                Some(true) => "(dirty)",
×
47
                Some(false) => "(clean)",
×
48
            },
49
            BUILD_DATE,
50
            TARGET,
76✔
51
            RUSTC_VERSION
76✔
52
        )
53
    }
54
}
55

56
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
57
/// Enum for instantaneous codes.
58
///
59
/// It is used to implement [`ValueEnum`] here instead of in [`dsi_bitstream`].
60
pub enum PrivCode {
61
    Unary,
62
    Gamma,
63
    Delta,
64
    Zeta1,
65
    Zeta2,
66
    Zeta3,
67
    Zeta4,
68
    Zeta5,
69
    Zeta6,
70
    Zeta7,
71
}
72

73
impl From<PrivCode> for Codes {
74
    fn from(value: PrivCode) -> Self {
40✔
75
        match value {
40✔
76
            PrivCode::Unary => Codes::Unary,
8✔
77
            PrivCode::Gamma => Codes::Gamma,
24✔
78
            PrivCode::Delta => Codes::Delta,
×
79
            PrivCode::Zeta1 => Codes::Zeta { k: 1 },
80
            PrivCode::Zeta2 => Codes::Zeta { k: 2 },
81
            PrivCode::Zeta3 => Codes::Zeta { k: 3 },
82
            PrivCode::Zeta4 => Codes::Zeta { k: 4 },
83
            PrivCode::Zeta5 => Codes::Zeta { k: 5 },
84
            PrivCode::Zeta6 => Codes::Zeta { k: 6 },
85
            PrivCode::Zeta7 => Codes::Zeta { k: 7 },
86
        }
87
    }
88
}
89

90
#[derive(Args, Debug)]
91
/// Shared CLI arguments for reading files containing arcs.
92
pub struct ArcsArgs {
93
    #[arg(long, default_value_t = '#')]
94
    /// Ignore lines that start with this symbol.
95
    pub line_comment_symbol: char,
96

97
    #[arg(long, default_value_t = 0)]
98
    /// How many lines to skip, ignoring comment lines.
99
    pub lines_to_skip: usize,
100

101
    #[arg(long)]
102
    /// How many lines to parse, after skipping the first lines_to_skip and
103
    /// ignoring comment lines.
104
    pub max_arcs: Option<usize>,
105

106
    #[arg(long, default_value_t = '\t')]
107
    /// The column separator.
108
    pub separator: char,
109

110
    #[arg(long, default_value_t = 0)]
111
    /// The index of the column containing the source node of an arc.
112
    pub source_column: usize,
113

114
    #[arg(long, default_value_t = 1)]
115
    /// The index of the column containing the target node of an arc.
116
    pub target_column: usize,
117

118
    #[arg(long, default_value_t = false)]
119
    /// Source and destinations are node identifiers.
120
    pub exact: bool,
121
}
122

123
/// Parses the number of threads from a string.
124
///
125
/// This function is meant to be used with `#[arg(...,  value_parser =
126
/// num_threads_parser)]`.
127
pub fn num_threads_parser(arg: &str) -> Result<usize> {
12✔
128
    let num_threads = arg.parse::<usize>()?;
36✔
129
    ensure!(num_threads > 0, "Number of threads must be greater than 0");
×
130
    Ok(num_threads)
12✔
131
}
132

133
/// Shared CLI arguments for commands that specify a number of threads.
134
#[derive(Args, Debug)]
135
pub struct NumThreadsArg {
136
    #[arg(short = 'j', long, default_value_t = rayon::current_num_threads().max(1), value_parser = num_threads_parser)]
137
    /// The number of threads to use.
138
    pub num_threads: usize,
139
}
140

141
/// Shared CLI arguments for commands that specify a granularity.
142
#[derive(Args, Debug)]
143
pub struct GranularityArgs {
144
    #[arg(long, conflicts_with("node_granularity"))]
145
    /// The tentative number of arcs used to define the size of a parallel job
146
    /// (advanced option).
147
    pub arc_granularity: Option<u64>,
148

149
    #[arg(long, conflicts_with("arc_granularity"))]
150
    /// The tentative number of nodes used to define the size of a parallel job
151
    /// (advanced option).
152
    pub node_granularity: Option<usize>,
153
}
154

155
impl GranularityArgs {
156
    pub fn into_granularity(&self) -> Granularity {
4✔
157
        match (self.arc_granularity, self.node_granularity) {
8✔
158
            (Some(_), Some(_)) => unreachable!(),
159
            (Some(arc_granularity), None) => Granularity::Arcs(arc_granularity),
×
160
            (None, Some(node_granularity)) => Granularity::Nodes(node_granularity),
×
161
            (None, None) => Granularity::default(),
4✔
162
        }
163
    }
164
}
165

166
/// Shared CLI arguments for commands that specify a batch size.
167
#[derive(Args, Debug)]
168
pub struct BatchSizeArg {
169
    #[clap(short = 'b', long, value_parser = batch_size, default_value = "50%")]
170
    /// The number of pairs to be used in batches. Two times this number of
171
    /// `usize` will be allocated to sort pairs. You can use the SI and NIST
172
    /// multipliers k, M, G, T, P, ki, Mi, Gi, Ti, and Pi. You can also use a
173
    /// percentage of the available memory by appending a `%` to the number.
174
    pub batch_size: usize,
175
}
176

177
#[derive(Debug, Clone, Copy, ValueEnum)]
178
/// How to store vectors of floats.
179
pub enum FloatVectorFormat {
180
    /// Java-compatible format: a sequence of big-endian floats (32 or 64 bits).
181
    Java,
182
    /// A slice of floats (32 or 64 bits) serialized using ε-serde.
183
    Epserde,
184
    /// ASCII format, one float per line.
185
    Ascii,
186
    /// A JSON Array.
187
    Json,
188
}
189

190
impl FloatVectorFormat {
191
    /// Stores float values in the specified `path` using the format defined by
192
    /// `self`.
193
    ///
194
    /// If the result is a textual format, i.e., ASCII or JSON, `precision`
195
    /// will be used to truncate the float values to the specified number of
196
    /// decimal digits.
197
    pub fn store<F>(
×
198
        &self,
199
        path: impl AsRef<Path>,
200
        values: &[F],
201
        precision: Option<usize>,
202
    ) -> Result<()>
203
    where
204
        F: ToBytes + core::fmt::Display + epserde::ser::Serialize + Copy,
205
        for<'a> &'a [F]: epserde::ser::Serialize,
206
    {
207
        let precision = precision.unwrap_or(f64::DIGITS as usize);
×
208
        create_parent_dir(&path)?;
×
209
        let path_display = path.as_ref().display();
×
210
        let mut file = std::fs::File::create(&path)
×
211
            .with_context(|| format!("Could not create vector at {}", path_display))?;
×
212

213
        match self {
×
214
            FloatVectorFormat::Epserde => {
×
215
                log::info!("Storing in ε-serde format at {}", path_display);
×
216
                unsafe {
217
                    values
×
218
                        .serialize(&mut file)
×
UNCOV
219
                        .with_context(|| format!("Could not write vector to {}", path_display))
×
220
                }?;
221
            }
222
            FloatVectorFormat::Java => {
×
223
                log::info!("Storing in Java format at {}", path_display);
×
224
                for word in values.iter() {
×
UNCOV
225
                    file.write_all(word.to_be_bytes().as_ref())
×
UNCOV
226
                        .with_context(|| format!("Could not write vector to {}", path_display))?;
×
227
                }
228
            }
229
            FloatVectorFormat::Ascii => {
×
230
                log::info!("Storing in ASCII format at {}", path_display);
×
231
                for word in values.iter() {
×
UNCOV
232
                    writeln!(file, "{word:.precision$}")
×
UNCOV
233
                        .with_context(|| format!("Could not write vector to {}", path_display))?;
×
234
                }
235
            }
236
            FloatVectorFormat::Json => {
×
237
                log::info!("Storing in JSON format at {}", path_display);
×
238
                write!(file, "[")?;
×
239
                for word in values.iter().take(values.len().saturating_sub(2)) {
×
UNCOV
240
                    write!(file, "{word:.precision$}, ")
×
241
                        .with_context(|| format!("Could not write vector to {}", path_display))?;
×
242
                }
243
                if let Some(last) = values.last() {
×
UNCOV
244
                    write!(file, "{last:.precision$}")
×
245
                        .with_context(|| format!("Could not write vector to {}", path_display))?;
×
246
                }
UNCOV
247
                write!(file, "]")?;
×
248
            }
249
        }
250

UNCOV
251
        Ok(())
×
252
    }
253
}
254

255
#[derive(Debug, Clone, Copy, ValueEnum)]
256
/// How to store vectors of integers.
257
pub enum IntVectorFormat {
258
    /// Java-compatible format: a sequence of big-endian longs (64 bits).
259
    Java,
260
    /// A slice of usize serialized using ε-serde.
261
    Epserde,
262
    /// A BitFieldVec stored using ε-serde. It stores each element using
263
    /// ⌊log₂(max)⌋ + 1 bits. It requires to allocate the `BitFieldVec` in RAM
264
    /// before serializing it.
265
    BitFieldVec,
266
    /// ASCII format, one float per line.
267
    Ascii,
268
    /// A JSON Array.
269
    Json,
270
}
271

272
impl IntVectorFormat {
273
    /// Stores a vector of `u64` in the specified `path`` using the format defined by `self`.
274
    ///
275
    /// `max` is the maximum value of the vector. If it is not provided, it will
276
    /// be computed from the data.
277
    pub fn store(&self, path: impl AsRef<Path>, data: &[u64], max: Option<u64>) -> Result<()> {
×
278
        // Ensure the parent directory exists
279
        create_parent_dir(&path)?;
×
280

281
        let mut file = std::fs::File::create(&path)
×
UNCOV
282
            .with_context(|| format!("Could not create vector at {}", path.as_ref().display()))?;
×
283
        let mut buf = BufWriter::new(&mut file);
×
284

285
        debug_assert_eq!(
×
286
            max,
×
UNCOV
287
            max.map(|_| { data.iter().copied().max().unwrap_or(0) }),
×
UNCOV
288
            "The wrong maximum value was provided for the vector"
×
289
        );
290

291
        match self {
×
292
            IntVectorFormat::Epserde => {
×
293
                log::info!("Storing in epserde format at {}", path.as_ref().display());
×
294
                unsafe {
UNCOV
295
                    data.serialize(&mut buf).with_context(|| {
×
296
                        format!("Could not write vector to {}", path.as_ref().display())
×
297
                    })
298
                }?;
299
            }
UNCOV
300
            IntVectorFormat::BitFieldVec => {
×
301
                log::info!(
×
302
                    "Storing in BitFieldVec format at {}",
×
303
                    path.as_ref().display()
×
304
                );
305
                let max = max.unwrap_or_else(|| {
×
UNCOV
306
                    data.iter()
×
307
                        .copied()
×
308
                        .max()
×
309
                        .unwrap_or_else(|| panic!("Empty vector"))
×
310
                });
311
                let bit_width = max.len() as usize;
×
312
                log::info!("Using {} bits per element", bit_width);
×
UNCOV
313
                let mut bit_field_vec = <BitFieldVec<u64, _>>::with_capacity(bit_width, data.len());
×
UNCOV
314
                bit_field_vec.extend(data.iter().copied());
×
315
                unsafe {
316
                    bit_field_vec.store(&path).with_context(|| {
×
317
                        format!("Could not write vector to {}", path.as_ref().display())
×
318
                    })
319
                }?;
320
            }
UNCOV
321
            IntVectorFormat::Java => {
×
UNCOV
322
                log::info!("Storing in Java format at {}", path.as_ref().display());
×
323
                for word in data.iter() {
×
324
                    buf.write_all(&word.to_be_bytes()).with_context(|| {
×
325
                        format!("Could not write vector to {}", path.as_ref().display())
×
326
                    })?;
327
                }
328
            }
UNCOV
329
            IntVectorFormat::Ascii => {
×
UNCOV
330
                log::info!("Storing in ASCII format at {}", path.as_ref().display());
×
331
                for word in data.iter() {
×
332
                    writeln!(buf, "{}", word).with_context(|| {
×
333
                        format!("Could not write vector to {}", path.as_ref().display())
×
334
                    })?;
335
                }
336
            }
UNCOV
337
            IntVectorFormat::Json => {
×
UNCOV
338
                log::info!("Storing in JSON format at {}", path.as_ref().display());
×
339
                write!(buf, "[")?;
×
340
                for word in data.iter().take(data.len().saturating_sub(2)) {
×
341
                    write!(buf, "{}, ", word).with_context(|| {
×
UNCOV
342
                        format!("Could not write vector to {}", path.as_ref().display())
×
343
                    })?;
344
                }
UNCOV
345
                if let Some(last) = data.last() {
×
UNCOV
346
                    write!(buf, "{}", last).with_context(|| {
×
UNCOV
347
                        format!("Could not write vector to {}", path.as_ref().display())
×
348
                    })?;
349
                }
UNCOV
350
                write!(buf, "]")?;
×
351
            }
352
        };
353

UNCOV
354
        Ok(())
×
355
    }
356

357
    #[cfg(target_pointer_width = "64")]
358
    /// Stores a vector of `usize` in the specified `path` using the format defined by `self`.
359
    /// `max` is the maximum value of the vector, if it is not provided, it will
360
    /// be computed from the data.
361
    ///
362
    /// This helper method is available only on 64-bit architectures as Java's format
363
    /// uses of 64-bit integers.
364
    pub fn store_usizes(
×
365
        &self,
366
        path: impl AsRef<Path>,
367
        data: &[usize],
368
        max: Option<usize>,
369
    ) -> Result<()> {
UNCOV
370
        self.store(
×
UNCOV
371
            path,
×
UNCOV
372
            unsafe { core::mem::transmute::<&[usize], &[u64]>(data) },
×
UNCOV
373
            max.map(|x| x as u64),
×
374
        )
375
    }
376
}
377

378
/// Parses a batch size.
379
///
380
/// This function accepts either a number (possibly followed by a
381
/// SI or NIST multiplier k, M, G, T, P, ki, Mi, Gi, Ti, or Pi), or a percentage
382
/// (followed by a `%`) that is interpreted as a percentage of the core
383
/// memory. The function returns the number of pairs to be used for batches.
384
pub fn batch_size(arg: &str) -> anyhow::Result<usize> {
8✔
385
    const PREF_SYMS: [(&str, u64); 10] = [
386
        ("k", 1E3 as u64),
387
        ("m", 1E6 as u64),
388
        ("g", 1E9 as u64),
389
        ("t", 1E12 as u64),
390
        ("p", 1E15 as u64),
391
        ("ki", 1 << 10),
392
        ("mi", 1 << 20),
393
        ("gi", 1 << 30),
394
        ("ti", 1 << 40),
395
        ("pi", 1 << 50),
396
    ];
397
    let arg = arg.trim().to_ascii_lowercase();
24✔
398
    ensure!(!arg.is_empty(), "empty string");
16✔
399

400
    if arg.ends_with('%') {
8✔
401
        let perc = arg[..arg.len() - 1].parse::<f64>()?;
32✔
UNCOV
402
        ensure!(perc >= 0.0 || perc <= 100.0, "percentage out of range");
×
403
        let mut system = System::new();
8✔
404
        system.refresh_memory();
×
405
        let num_pairs: usize = (((system.total_memory() as f64) * (perc / 100.0)
8✔
UNCOV
406
            / (std::mem::size_of::<(usize, usize)>() as f64))
×
407
            as u64)
×
408
            .try_into()?;
409
        // TODO: try_align_to when available
410
        return Ok(num_pairs.align_to(1 << 20)); // Round up to MiBs
×
411
    }
412

413
    arg.chars().position(|c| c.is_alphabetic()).map_or_else(
×
414
        || Ok(arg.parse::<usize>()?),
×
415
        |pos| {
×
UNCOV
416
            let (num, pref_sym) = arg.split_at(pos);
×
417
            let multiplier = PREF_SYMS
×
UNCOV
418
                .iter()
×
UNCOV
419
                .find(|(x, _)| *x == pref_sym)
×
UNCOV
420
                .map(|(_, m)| m)
×
UNCOV
421
                .ok_or(anyhow!("invalid prefix symbol"))?;
×
422

UNCOV
423
            Ok((num.parse::<u64>()? * multiplier).try_into()?)
×
424
        },
425
    )
426
}
427

428
#[derive(Args, Debug)]
429
/// Shared CLI arguments for compression.
430
pub struct CompressArgs {
431
    /// The endianness of the graph to write
432
    #[clap(short = 'E', long)]
433
    pub endianness: Option<String>,
434

435
    /// The compression windows
436
    #[clap(short = 'w', long, default_value_t = 7)]
437
    pub compression_window: usize,
438
    /// The minimum interval length
439
    #[clap(short = 'i', long, default_value_t = 4)]
440
    pub min_interval_length: usize,
441
    /// The maximum recursion depth for references (-1 for infinite recursion depth)
442
    #[clap(short = 'r', long, default_value_t = 3)]
443
    pub max_ref_count: isize,
444

445
    #[arg(value_enum)]
446
    #[clap(long, default_value = "gamma")]
447
    /// The code to use for the outdegree
448
    pub outdegrees: PrivCode,
449

450
    #[arg(value_enum)]
451
    #[clap(long, default_value = "unary")]
452
    /// The code to use for the reference offsets
453
    pub references: PrivCode,
454

455
    #[arg(value_enum)]
456
    #[clap(long, default_value = "gamma")]
457
    /// The code to use for the blocks
458
    pub blocks: PrivCode,
459

460
    #[arg(value_enum)]
461
    #[clap(long, default_value = "zeta3")]
462
    /// The code to use for the residuals
463
    pub residuals: PrivCode,
464
}
465

466
impl From<CompressArgs> for CompFlags {
467
    fn from(value: CompressArgs) -> Self {
8✔
468
        CompFlags {
469
            outdegrees: value.outdegrees.into(),
16✔
470
            references: value.references.into(),
16✔
471
            blocks: value.blocks.into(),
16✔
472
            intervals: PrivCode::Gamma.into(),
16✔
473
            residuals: value.residuals.into(),
16✔
474
            min_interval_length: value.min_interval_length,
8✔
475
            compression_window: value.compression_window,
8✔
476
            max_ref_count: match value.max_ref_count {
8✔
477
                -1 => usize::MAX,
478
                _ => value.max_ref_count as usize,
479
            },
480
        }
481
    }
482
}
483

484
/// Creates a [`ThreadPool`](rayon::ThreadPool) with the given number of threads.
485
pub fn get_thread_pool(num_threads: usize) -> rayon::ThreadPool {
12✔
486
    rayon::ThreadPoolBuilder::new()
12✔
487
        .num_threads(num_threads)
24✔
488
        .build()
489
        .expect("Failed to create thread pool")
490
}
491

492
/// Appends a string to the filename of a path.
493
///
494
/// # Panics
495
/// * Will panic if there is no filename.
496
/// * Will panic in test mode if the path has an extension.
497
pub fn append(path: impl AsRef<Path>, s: impl AsRef<str>) -> PathBuf {
×
UNCOV
498
    debug_assert!(path.as_ref().extension().is_none());
×
UNCOV
499
    let mut path_buf = path.as_ref().to_owned();
×
UNCOV
500
    let mut filename = path_buf.file_name().unwrap().to_owned();
×
UNCOV
501
    filename.push(s.as_ref());
×
UNCOV
502
    path_buf.push(filename);
×
UNCOV
503
    path_buf
×
504
}
505

506
/// Creates all parent directories of the given file path.
507
pub fn create_parent_dir(file_path: impl AsRef<Path>) -> Result<()> {
20✔
508
    // ensure that the dst directory exists
509
    if let Some(parent_dir) = file_path.as_ref().parent() {
40✔
UNCOV
510
        std::fs::create_dir_all(parent_dir).with_context(|| {
×
UNCOV
511
            format!(
×
UNCOV
512
                "Failed to create the directory {:?}",
×
UNCOV
513
                parent_dir.to_string_lossy()
×
514
            )
515
        })?;
516
    }
517
    Ok(())
20✔
518
}
519

520
/// Parse a duration from a string.
521
/// For compatibility with Java, if no suffix is given, it is assumed to be in milliseconds.
522
/// You can use suffixes, the available ones are:
523
/// - `s` for seconds
524
/// - `m` for minutes
525
/// - `h` for hours
526
/// - `d` for days
527
///
528
/// Example: `1d2h3m4s567` this is parsed as: 1 day, 2 hours, 3 minutes, 4 seconds, and 567 milliseconds.
529
fn parse_duration(value: &str) -> Result<Duration> {
×
530
    if value.is_empty() {
×
531
        bail!("Empty duration string, if you want every 0 milliseconds use `0`.");
×
532
    }
533
    let mut duration = Duration::from_secs(0);
×
UNCOV
534
    let mut acc = String::new();
×
535
    for c in value.chars() {
×
536
        if c.is_ascii_digit() {
×
537
            acc.push(c);
×
538
        } else if c.is_whitespace() {
×
539
            continue;
×
540
        } else {
541
            let dur = acc.parse::<u64>()?;
×
UNCOV
542
            match c {
×
543
                's' => duration += Duration::from_secs(dur),
×
UNCOV
544
                'm' => duration += Duration::from_secs(dur * 60),
×
UNCOV
545
                'h' => duration += Duration::from_secs(dur * 60 * 60),
×
546
                'd' => duration += Duration::from_secs(dur * 60 * 60 * 24),
×
547
                _ => return Err(anyhow!("Invalid duration suffix: {}", c)),
×
548
            }
UNCOV
549
            acc.clear();
×
550
        }
551
    }
UNCOV
552
    if !acc.is_empty() {
×
UNCOV
553
        let dur = acc.parse::<u64>()?;
×
UNCOV
554
        duration += Duration::from_millis(dur);
×
555
    }
UNCOV
556
    Ok(duration)
×
557
}
558

559
pub fn init_env_logger() -> Result<()> {
4✔
560
    let mut builder =
4✔
561
        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"));
12✔
562

563
    let start = std::time::Instant::now();
8✔
564
    let printer = SpanPrinter::new()
8✔
565
        .spacing(Spacing::None)
8✔
566
        .designator(Designator::Compact);
8✔
567
    let span_round = SpanRound::new()
8✔
568
        .largest(jiff::Unit::Day)
8✔
569
        .smallest(jiff::Unit::Millisecond)
8✔
570
        .days_are_24_hours();
571

572
    builder.format(move |buf, record| {
3,824✔
573
        let Ok(ts) = jiff::Timestamp::try_from(SystemTime::now()) else {
7,632✔
574
            return Err(std::io::Error::other("Failed to get timestamp"));
×
575
        };
576
        let style = buf.default_level_style(record.level());
×
577
        let elapsed = start.elapsed();
×
578
        let span = jiff::Span::new()
×
579
            .seconds(elapsed.as_secs() as i64)
×
580
            .milliseconds(elapsed.subsec_millis() as i64);
×
581
        let span = span.round(span_round).expect("Failed to round span");
×
582
        writeln!(
×
583
            buf,
×
584
            "{} {} {style}{}{style:#} [{:?}] {} - {}",
×
UNCOV
585
            ts.strftime("%F %T%.3f"),
×
UNCOV
586
            printer.span_to_string(&span),
×
UNCOV
587
            record.level(),
×
UNCOV
588
            std::thread::current().id(),
×
UNCOV
589
            record.target(),
×
UNCOV
590
            record.args()
×
591
        )
592
    });
593
    builder.init();
8✔
594
    Ok(())
4✔
595
}
596

597
#[derive(Args, Debug)]
598
pub struct GlobalArgs {
599
    #[arg(long, value_parser = parse_duration, global=true, display_order = 1000)]
600
    /// How often to log progress. Default is 10s. You can use the suffixes `s`
601
    /// for seconds, `m` for minutes, `h` for hours, and `d` for days. If no
602
    /// suffix is provided it is assumed to be in milliseconds.
603
    /// Example: `1d2h3m4s567` is parsed as 1 day + 2 hours + 3 minutes + 4
604
    /// seconds + 567 milliseconds = 93784567 milliseconds.
605
    pub log_interval: Option<Duration>,
606
}
607

608
#[derive(Subcommand, Debug)]
609
pub enum SubCommands {
610
    #[command(subcommand)]
611
    Analyze(analyze::SubCommands),
612
    #[command(subcommand)]
613
    Bench(bench::SubCommands),
614
    #[command(subcommand)]
615
    Build(build::SubCommands),
616
    #[command(subcommand)]
617
    Check(check::SubCommands),
618
    #[command(subcommand)]
619
    From(from::SubCommands),
620
    #[command(subcommand)]
621
    Perm(perm::SubCommands),
622
    #[command(subcommand)]
623
    Run(run::SubCommands),
624
    #[command(subcommand)]
625
    To(to::SubCommands),
626
    #[command(subcommand)]
627
    Transform(transform::SubCommands),
628
}
629

630
#[derive(Parser, Debug)]
631
#[command(name = "webgraph", version=build_info::version_string())]
632
/// Webgraph tools to build, convert, modify, and analyze graphs.
633
///
634
/// Noteworthy environment variables:
635
///
636
/// - RUST_MIN_STACK: minimum thread stack size (in bytes); we suggest
637
///   RUST_MIN_STACK=8388608 (8MiB)
638
///
639
/// - TMPDIR: where to store temporary files (potentially very large ones)
640
///
641
/// - RUST_LOG: configuration for env_logger
642
///   <https://docs.rs/env_logger/latest/env_logger/>
643
pub struct Cli {
644
    #[command(subcommand)]
645
    pub command: SubCommands,
646
    #[clap(flatten)]
647
    pub args: GlobalArgs,
648
}
649

650
pub mod dist;
651
pub mod sccs;
652

653
pub mod analyze;
654
pub mod bench;
655
pub mod build;
656
pub mod check;
657
pub mod from;
658
pub mod perm;
659
pub mod run;
660
pub mod to;
661
pub mod transform;
662

663
/// The entry point of the command-line interface.
664
pub fn cli_main<I, T>(args: I) -> Result<()>
12✔
665
where
666
    I: IntoIterator<Item = T>,
667
    T: Into<std::ffi::OsString> + Clone,
668
{
669
    let start = std::time::Instant::now();
24✔
670
    let cli = Cli::parse_from(args);
36✔
671
    match cli.command {
12✔
UNCOV
672
        SubCommands::Analyze(args) => {
×
UNCOV
673
            analyze::main(cli.args, args)?;
×
674
        }
675
        SubCommands::Bench(args) => {
×
676
            bench::main(cli.args, args)?;
×
677
        }
678
        SubCommands::Build(args) => {
7✔
679
            build::main(cli.args, args, Cli::command())?;
28✔
680
        }
UNCOV
681
        SubCommands::Check(args) => {
×
UNCOV
682
            check::main(cli.args, args)?;
×
683
        }
UNCOV
684
        SubCommands::From(args) => {
×
UNCOV
685
            from::main(cli.args, args)?;
×
686
        }
687
        SubCommands::Perm(args) => {
2✔
688
            perm::main(cli.args, args)?;
6✔
689
        }
690
        SubCommands::Run(args) => {
1✔
691
            run::main(cli.args, args)?;
3✔
692
        }
693
        SubCommands::To(args) => {
1✔
694
            to::main(cli.args, args)?;
3✔
695
        }
696
        SubCommands::Transform(args) => {
1✔
697
            transform::main(cli.args, args)?;
3✔
698
        }
699
    }
700

701
    log::info!(
12✔
702
        "The command took {}",
12✔
703
        pretty_print_elapsed(start.elapsed().as_secs_f64())
36✔
704
    );
705

UNCOV
706
    Ok(())
×
707
}
708

709
/// Pretty prints seconds in a humanly readable format.
710
fn pretty_print_elapsed(elapsed: f64) -> String {
48✔
711
    let mut result = String::new();
96✔
712
    let mut elapsed_seconds = elapsed as u64;
96✔
713
    let weeks = elapsed_seconds / (60 * 60 * 24 * 7);
96✔
714
    elapsed_seconds %= 60 * 60 * 24 * 7;
48✔
715
    let days = elapsed_seconds / (60 * 60 * 24);
96✔
716
    elapsed_seconds %= 60 * 60 * 24;
48✔
717
    let hours = elapsed_seconds / (60 * 60);
96✔
718
    elapsed_seconds %= 60 * 60;
48✔
719
    let minutes = elapsed_seconds / 60;
96✔
720
    //elapsed_seconds %= 60;
721

722
    match weeks {
48✔
723
        0 => {}
48✔
724
        1 => result.push_str("1 week "),
×
UNCOV
725
        _ => result.push_str(&format!("{} weeks ", weeks)),
×
726
    }
727
    match days {
48✔
728
        0 => {}
48✔
729
        1 => result.push_str("1 day "),
×
UNCOV
730
        _ => result.push_str(&format!("{} days ", days)),
×
731
    }
732
    match hours {
48✔
733
        0 => {}
48✔
734
        1 => result.push_str("1 hour "),
×
UNCOV
735
        _ => result.push_str(&format!("{} hours ", hours)),
×
736
    }
737
    match minutes {
48✔
738
        0 => {}
44✔
739
        1 => result.push_str("1 minute "),
12✔
UNCOV
740
        _ => result.push_str(&format!("{} minutes ", minutes)),
×
741
    }
742

743
    result.push_str(&format!("{:.3} seconds ({}s)", elapsed % 60.0, elapsed));
192✔
744
    result
48✔
745
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc