• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 11842754306

14 Nov 2024 05:53PM UTC coverage: 53.619% (+0.1%) from 53.511%
11842754306

push

github

vigna
ErdosRenyi is now sorted

8 of 9 new or added lines in 1 file covered. (88.89%)

1 existing line in 1 file now uncovered.

2378 of 4435 relevant lines covered (53.62%)

23493306.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

55.26
/src/cli/mod.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
//! Command-line interface structs, functions, and methods.
9
//!
10
//! Each module correspond to a group of commands, and each command is
11
//! implemented as a submodule.
12

13
use crate::build_info;
14
use crate::graphs::bvgraph::Code;
15
use crate::prelude::CompFlags;
16
use anyhow::{anyhow, ensure, Context, Result};
17
use clap::{Args, Command, ValueEnum};
18
use common_traits::UnsignedInt;
19
use std::path::{Path, PathBuf};
20
use sysinfo::System;
21

22
pub mod analyze;
23
pub mod bench;
24
pub mod build;
25
pub mod check;
26
pub mod from;
27
pub mod perm;
28
pub mod run;
29
pub mod to;
30
pub mod transform;
31

32
pub const DEFAULT_STACK_SIZE: usize = 64 * 1024 * 1024;
33

34
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
35
/// Enum for instantanous codes.
36
///
37
/// It is used to implement [`ValueEnum`] here instead of in [`dsi_bitstream`].
38
pub enum PrivCode {
39
    Unary,
40
    Gamma,
41
    Delta,
42
    Zeta1,
43
    Zeta2,
44
    Zeta3,
45
    Zeta4,
46
    Zeta5,
47
    Zeta6,
48
    Zeta7,
49
}
50

51
impl From<PrivCode> for Code {
52
    fn from(value: PrivCode) -> Self {
20✔
53
        match value {
20✔
54
            PrivCode::Unary => Code::Unary,
4✔
55
            PrivCode::Gamma => Code::Gamma,
12✔
56
            PrivCode::Delta => Code::Delta,
×
57
            PrivCode::Zeta1 => Code::Zeta { k: 1 },
58
            PrivCode::Zeta2 => Code::Zeta { k: 2 },
59
            PrivCode::Zeta3 => Code::Zeta { k: 3 },
60
            PrivCode::Zeta4 => Code::Zeta { k: 4 },
61
            PrivCode::Zeta5 => Code::Zeta { k: 5 },
62
            PrivCode::Zeta6 => Code::Zeta { k: 6 },
63
            PrivCode::Zeta7 => Code::Zeta { k: 7 },
64
        }
65
    }
66
}
67

68
#[derive(Args, Debug)]
69
/// Shared CLI arguments for reading files containing arcs.
70
pub struct ArcsArgs {
71
    #[arg(long, default_value_t = '#')]
72
    /// Ignore lines that start with this symbol.
73
    pub line_comment_simbol: char,
74

75
    #[arg(long, default_value_t = 0)]
76
    /// How many lines to skip, ignoring comment lines.
77
    pub lines_to_skip: usize,
78

79
    #[arg(long)]
80
    /// How many lines to parse, after skipping the first lines_to_skip and
81
    /// ignoring comment lines.
82
    pub max_lines: Option<usize>,
83

84
    #[arg(long, default_value_t = ',')]
85
    /// The column separator.
86
    pub separator: char,
87

88
    #[arg(long, default_value_t = 0)]
89
    /// The index of the column containing the source node of an arc.
90
    pub source_column: usize,
91

92
    #[arg(long, default_value_t = 1)]
93
    /// The index of the column containing the target node of an arc.
94
    pub target_column: usize,
95

96
    #[arg(long, default_value_t = false)]
97
    /// Source and destinations are node identifiers.
98
    pub exact: bool,
99
}
100

101
/// Shared CLI arguments for commands that specify a number of threads.
102
#[derive(Args, Debug)]
103
pub struct NumThreadsArg {
104
    #[arg(short = 'j', long, default_value_t = rayon::current_num_threads().max(1))]
105
    /// The number of threads to use
106
    pub num_threads: usize,
107
}
108

109
/// Shared CLI arguments for commands that specify a batch size.
110
#[derive(Args, Debug)]
111
pub struct BatchSizeArg {
112
    #[clap(short = 'b', long, value_parser = batch_size, default_value = "50%")]
113
    /// The number of pairs to be used in batches. Two times this number of
114
    /// `usize` will be allocated to sort pairs. You can use the SI and NIST
115
    /// multipliers k, M, G, T, P, ki, Mi, Gi, Ti, and Pi. You can also use a
116
    /// percentage of the available memory by appending a `%` to the number.
117
    pub batch_size: usize,
118
}
119

120
/// Parses a batch size.
121
///
122
/// This function accepts either a number (possibly followed by a
123
/// SI or NIST multiplier k, M, G, T, P, ki, Mi, Gi, Ti, or Pi), or a percentage
124
/// (followed by a `%`) that is interpreted as a percentage of the core
125
/// memory. The function returns the number of pairs to be used for batches.
126
pub fn batch_size(arg: &str) -> anyhow::Result<usize> {
4✔
127
    const PREF_SYMS: [(&str, u64); 10] = [
128
        ("k", 1E3 as u64),
129
        ("m", 1E6 as u64),
130
        ("g", 1E9 as u64),
131
        ("t", 1E12 as u64),
132
        ("p", 1E15 as u64),
133
        ("ki", 1 << 10),
134
        ("mi", 1 << 20),
135
        ("gi", 1 << 30),
136
        ("ti", 1 << 40),
137
        ("pi", 1 << 50),
138
    ];
139
    let arg = arg.trim().to_ascii_lowercase();
4✔
140
    ensure!(!arg.is_empty(), "empty string");
4✔
141

142
    if arg.ends_with('%') {
4✔
143
        let perc = arg[..arg.len() - 1].parse::<f64>()?;
8✔
144
        ensure!(perc >= 0.0 || perc <= 100.0, "percentage out of range");
×
145
        let mut system = System::new();
4✔
146
        system.refresh_memory();
4✔
147
        let num_pairs: usize = (((system.total_memory() as f64) * (perc / 100.0)
4✔
148
            / (std::mem::size_of::<(usize, usize)>() as f64))
149
            as u64)
150
            .try_into()?;
151
        // TODO: try_align_to when available
152
        return Ok(num_pairs.align_to(1 << 20)); // Round up to MiBs
4✔
153
    }
154

155
    arg.chars().position(|c| c.is_alphabetic()).map_or_else(
×
156
        || Ok(arg.parse::<usize>()?),
×
157
        |pos| {
×
158
            let (num, pref_sym) = arg.split_at(pos);
×
159
            let multiplier = PREF_SYMS
×
160
                .iter()
×
161
                .find(|(x, _)| *x == pref_sym)
×
162
                .map(|(_, m)| m)
×
163
                .ok_or(anyhow!("invalid prefix symbol"))?;
×
164

165
            Ok((num.parse::<u64>()? * multiplier).try_into()?)
×
166
        },
167
    )
168
}
169

170
#[derive(Args, Debug)]
171
/// Shared CLI arguments for compression.
172
pub struct CompressArgs {
173
    /// The endianness of the graph to write
174
    #[clap(short = 'E', long)]
175
    pub endianness: Option<String>,
176

177
    /// The compression windows
178
    #[clap(short = 'w', long, default_value_t = 7)]
179
    pub compression_window: usize,
180
    /// The minimum interval length
181
    #[clap(short = 'i', long, default_value_t = 4)]
182
    pub min_interval_length: usize,
183
    /// The maximum recursion depth for references (-1 for infinite recursion depth)
184
    #[clap(short = 'r', long, default_value_t = 3)]
185
    pub max_ref_count: isize,
186

187
    #[arg(value_enum)]
188
    #[clap(long, default_value = "gamma")]
189
    /// The code to use for the outdegree
190
    pub outdegrees: PrivCode,
191

192
    #[arg(value_enum)]
193
    #[clap(long, default_value = "unary")]
194
    /// The code to use for the reference offsets
195
    pub references: PrivCode,
196

197
    #[arg(value_enum)]
198
    #[clap(long, default_value = "gamma")]
199
    /// The code to use for the blocks
200
    pub blocks: PrivCode,
201

202
    #[arg(value_enum)]
203
    #[clap(long, default_value = "zeta3")]
204
    /// The code to use for the residuals
205
    pub residuals: PrivCode,
206
}
207

208
impl From<CompressArgs> for CompFlags {
209
    fn from(value: CompressArgs) -> Self {
4✔
210
        CompFlags {
211
            outdegrees: value.outdegrees.into(),
4✔
212
            references: value.references.into(),
4✔
213
            blocks: value.blocks.into(),
4✔
214
            intervals: PrivCode::Gamma.into(),
4✔
215
            residuals: value.residuals.into(),
4✔
216
            min_interval_length: value.min_interval_length,
4✔
217
            compression_window: value.compression_window,
4✔
218
            max_ref_count: match value.max_ref_count {
4✔
219
                -1 => usize::MAX,
220
                _ => value.max_ref_count as usize,
221
            },
222
        }
223
    }
224
}
225

226
/// Create a threadpool with the given number of threads and set the stack to either the env var or to
227
/// the default stack size `DEFAULT_STACK_SIZE`.
228
pub fn get_thread_pool(num_threads: usize) -> rayon::ThreadPool {
4✔
229
    rayon::ThreadPoolBuilder::new()
4✔
230
        .num_threads(num_threads)
4✔
231
        .stack_size(
232
            std::env::var("RUST_MIN_STACK")
4✔
233
                .map(|x| dbg!(x.parse::<usize>().unwrap()))
12✔
234
                .unwrap_or(crate::cli::DEFAULT_STACK_SIZE),
4✔
235
        )
236
        .build()
237
        .expect("Failed to create thread pool")
238
}
239

240
/// Appends a string to the filename of a path.
241
///
242
/// # Panics
243
/// * Will panic if there is no filename.
244
/// * Will panic in test mode if the path has an extension.
245
pub fn append(path: impl AsRef<Path>, s: impl AsRef<str>) -> PathBuf {
×
246
    debug_assert!(path.as_ref().extension().is_none());
×
247
    let mut path_buf = path.as_ref().to_owned();
×
248
    let mut filename = path_buf.file_name().unwrap().to_owned();
×
249
    filename.push(s.as_ref());
×
250
    path_buf.push(filename);
×
251
    path_buf
×
252
}
253

254
/// Creates all parent directories of the given file path.
255
pub fn create_parent_dir(file_path: impl AsRef<Path>) -> Result<()> {
10✔
256
    // ensure that the dst directory exists
257
    if let Some(parent_dir) = file_path.as_ref().parent() {
20✔
258
        std::fs::create_dir_all(parent_dir).with_context(|| {
×
259
            format!(
×
260
                "Failed to create the directory {:?}",
×
261
                parent_dir.to_string_lossy()
×
262
            )
263
        })?;
264
    }
265
    Ok(())
10✔
266
}
267

268
/// The entry point of the command-line interface.
269
pub fn main<I, T>(args: I) -> Result<()>
9✔
270
where
271
    I: IntoIterator<Item = T>,
272
    T: Into<std::ffi::OsString> + Clone,
273
{
274
    let start = std::time::Instant::now();
9✔
275
    // it's ok to fail since this might be called multiple times in tests
276
    let _ = env_logger::builder()
9✔
277
        .filter_level(log::LevelFilter::Debug)
9✔
278
        .try_init();
279

280
    let command = Command::new("webgraph")
9✔
281
        .about("Webgraph tools to build, convert, modify, and analyze webgraph files.")
282
        .version(build_info::version_string())
9✔
283
        .subcommand_required(true)
284
        .arg_required_else_help(true)
285
        .after_help(
286
            "Environment (noteworthy environment variables used):
287
RUST_MIN_STACK: minimum thread stack size (in bytes)
288
TMPDIR: where to store temporary files (potentially very large ones)
289
",
290
        );
291

292
    macro_rules! impl_dispatch {
×
293
        ($command:expr, $($module:ident),*) => {{
×
294
            let command = build::cli($command);
×
295
            $(
×
296
                let command = $module::cli(command);
×
297
            )*
×
298
            let command = command.display_order(0); // sort args alphabetically
299
            let mut completion_command = command.clone();
×
300
            let matches = command.get_matches_from(args);
×
301
            let subcommand = matches.subcommand();
×
302
            // if no command is specified, print the help message
303
            if subcommand.is_none() {
×
304
                completion_command.print_help().unwrap();
×
305
                return Ok(());
×
306
            }
307
            match subcommand.unwrap() {
×
308
                (build::COMMAND_NAME, sub_m) => build::main(sub_m, &mut completion_command),
×
309
                $(
×
310
                    ($module::COMMAND_NAME, sub_m) => $module::main(sub_m),
×
311
                )*
×
312
                (command_name, _) => {
×
313
                    // this shouldn't happen as clap should catch this
314
                    eprintln!("Unknown command: {:?}", command_name);
×
315
                    completion_command.print_help().unwrap();
×
316
                    std::process::exit(1);
×
317
                }
318
            }
319
        }};
320
    }
321

322
    impl_dispatch!(command, analyze, bench, check, from, perm, run, to, transform)?;
9✔
323

324
    log::info!(
9✔
325
        "The command took {}",
9✔
326
        pretty_print_elapsed(start.elapsed().as_secs_f64())
9✔
327
    );
328

329
    Ok(())
9✔
330
}
331

332
/// Pretty prints seconds in a humanly readable format.
333
fn pretty_print_elapsed(elapsed: f64) -> String {
18✔
334
    let mut result = String::new();
18✔
335
    let mut elapsed_seconds = elapsed as u64;
18✔
336
    let weeks = elapsed_seconds / (60 * 60 * 24 * 7);
18✔
337
    elapsed_seconds %= 60 * 60 * 24 * 7;
18✔
338
    let days = elapsed_seconds / (60 * 60 * 24);
18✔
339
    elapsed_seconds %= 60 * 60 * 24;
18✔
340
    let hours = elapsed_seconds / (60 * 60);
18✔
341
    elapsed_seconds %= 60 * 60;
18✔
342
    let minutes = elapsed_seconds / 60;
18✔
343
    //elapsed_seconds %= 60;
344

345
    match weeks {
18✔
346
        0 => {}
18✔
347
        1 => result.push_str("1 week "),
×
348
        _ => result.push_str(&format!("{} weeks ", weeks)),
×
349
    }
350
    match days {
18✔
351
        0 => {}
18✔
352
        1 => result.push_str("1 day "),
×
353
        _ => result.push_str(&format!("{} days ", days)),
×
354
    }
355
    match hours {
18✔
356
        0 => {}
18✔
357
        1 => result.push_str("1 hour "),
×
358
        _ => result.push_str(&format!("{} hours ", hours)),
×
359
    }
360
    match minutes {
18✔
361
        0 => {}
16✔
UNCOV
362
        1 => result.push_str("1 minute "),
×
363
        _ => result.push_str(&format!("{} minutes ", minutes)),
2✔
364
    }
365

366
    result.push_str(&format!("{:.3} seconds ({}s)", elapsed % 60.0, elapsed));
18✔
367
    result
18✔
368
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc