14130263389

Committed 28 Mar 2025 01:40PM UTC coverage: 49.654% (-0.1%) from 49.798%

Build # 14130263389

Build Type

push

github

Committed by

zommiommy

Commit Message

fixed llp wrong combine types

Coverage Stats

22 of 41 new or added lines in 4 files covered. (53.66%)

1019 existing lines in 41 files now uncovered.

2437 of 4908 relevant lines covered (49.65%)

18919274.03 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

45.71

/src/cli/mod.rs

/*
 * SPDX-FileCopyrightText: 2023 Inria
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
 *
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
 */

//! Command-line interface structs, functions, and methods.
//!
//! Each module correspond to a group of commands, and each command is
//! implemented as a submodule.

use crate::prelude::CompFlags;
use crate::{build_info, utils::Granularity};
use anyhow::{anyhow, bail, ensure, Context, Result};
use clap::{Arg, Args, Command, ValueEnum};
use common_traits::UnsignedInt;
use dsi_bitstream::dispatch::Codes;
use jiff::fmt::friendly::{Designator, Spacing, SpanPrinter};
use jiff::SpanRound;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::time::Duration;
use std::time::SystemTime;
use sysinfo::System;

pub mod analyze;
pub mod bench;
pub mod build;
pub mod check;
pub mod from;
pub mod perm;
pub mod run;
pub mod to;
pub mod transform;

#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
/// Enum for instantaneous codes.
///
/// It is used to implement [`ValueEnum`] here instead of in [`dsi_bitstream`].
pub enum PrivCode {
    Unary,
    Gamma,
    Delta,
    Zeta1,
    Zeta2,
    Zeta3,
    Zeta4,
    Zeta5,
    Zeta6,
    Zeta7,
}

impl From<PrivCode> for Codes {
    fn from(value: PrivCode) -> Self {
        match value {
            PrivCode::Unary => Codes::Unary,
            PrivCode::Gamma => Codes::Gamma,
            PrivCode::Delta => Codes::Delta,
            PrivCode::Zeta1 => Codes::Zeta { k: 1 },
            PrivCode::Zeta2 => Codes::Zeta { k: 2 },
            PrivCode::Zeta3 => Codes::Zeta { k: 3 },
            PrivCode::Zeta4 => Codes::Zeta { k: 4 },
            PrivCode::Zeta5 => Codes::Zeta { k: 5 },
            PrivCode::Zeta6 => Codes::Zeta { k: 6 },
            PrivCode::Zeta7 => Codes::Zeta { k: 7 },
        }
    }
}

#[derive(Args, Debug)]
/// Shared CLI arguments for reading files containing arcs.
pub struct ArcsArgs {
    #[arg(long, default_value_t = '#')]
    /// Ignore lines that start with this symbol.
    pub line_comment_symbol: char,

    #[arg(long, default_value_t = 0)]
    /// How many lines to skip, ignoring comment lines.
    pub lines_to_skip: usize,

    #[arg(long)]
    /// How many lines to parse, after skipping the first lines_to_skip and
    /// ignoring comment lines.
    pub max_arcs: Option<usize>,

    #[arg(long, default_value_t = '\t')]
    /// The column separator.
    pub separator: char,

    #[arg(long, default_value_t = 0)]
    /// The index of the column containing the source node of an arc.
    pub source_column: usize,

    #[arg(long, default_value_t = 1)]
    /// The index of the column containing the target node of an arc.
    pub target_column: usize,

    #[arg(long, default_value_t = false)]
    /// Source and destinations are node identifiers.
    pub exact: bool,
}

/// Shared CLI arguments for commands that specify a number of threads.
#[derive(Args, Debug)]
pub struct NumThreadsArg {
    #[arg(short = 'j', long, default_value_t = rayon::current_num_threads().max(1))]
    /// The number of threads to use
    pub num_threads: usize,
}

/// Shared CLI arguments for commands that specify a granularity.
#[derive(Args, Debug)]
pub struct GranularityArgs {
    #[arg(long, conflicts_with("node_granularity"))]
    /// The tentative number of arcs used define the size of a parallel job
    /// (advanced option).
    pub arc_granularity: Option<u64>,

    #[arg(long, conflicts_with("arc_granularity"))]
    /// The tentative number of nodes used define the size of a parallel job
    /// (advanced option).
    pub node_granularity: Option<usize>,
}

impl GranularityArgs {
    pub fn into_granularity(&self) -> Granularity {
        match (self.arc_granularity, self.node_granularity) {
            (Some(_), Some(_)) => unreachable!(),
            (Some(arc_granularity), None) => Granularity::Arcs(arc_granularity),
            (None, Some(node_granularity)) => Granularity::Nodes(node_granularity),
            (None, None) => Granularity::default(),
        }
    }
}

/// Shared CLI arguments for commands that specify a batch size.
#[derive(Args, Debug)]
pub struct BatchSizeArg {
    #[clap(short = 'b', long, value_parser = batch_size, default_value = "50%")]
    /// The number of pairs to be used in batches. Two times this number of
    /// `usize` will be allocated to sort pairs. You can use the SI and NIST
    /// multipliers k, M, G, T, P, ki, Mi, Gi, Ti, and Pi. You can also use a
    /// percentage of the available memory by appending a `%` to the number.
    pub batch_size: usize,
}

/// Parses a batch size.
///
/// This function accepts either a number (possibly followed by a
/// SI or NIST multiplier k, M, G, T, P, ki, Mi, Gi, Ti, or Pi), or a percentage
/// (followed by a `%`) that is interpreted as a percentage of the core
/// memory. The function returns the number of pairs to be used for batches.
pub fn batch_size(arg: &str) -> anyhow::Result<usize> {
    const PREF_SYMS: [(&str, u64); 10] = [
        ("k", 1E3 as u64),
        ("m", 1E6 as u64),
        ("g", 1E9 as u64),
        ("t", 1E12 as u64),
        ("p", 1E15 as u64),
        ("ki", 1 << 10),
        ("mi", 1 << 20),
        ("gi", 1 << 30),
        ("ti", 1 << 40),
        ("pi", 1 << 50),
    ];
    let arg = arg.trim().to_ascii_lowercase();
    ensure!(!arg.is_empty(), "empty string");

    if arg.ends_with('%') {
        let perc = arg[..arg.len() - 1].parse::<f64>()?;
        ensure!(perc >= 0.0 || perc <= 100.0, "percentage out of range");
        let mut system = System::new();
        system.refresh_memory();
        let num_pairs: usize = (((system.total_memory() as f64) * (perc / 100.0)
            / (std::mem::size_of::<(usize, usize)>() as f64))
            as u64)
            .try_into()?;
        // TODO: try_align_to when available
        return Ok(num_pairs.align_to(1 << 20)); // Round up to MiBs
    }

    arg.chars().position(|c| c.is_alphabetic()).map_or_else(
        || Ok(arg.parse::<usize>()?),
        |pos| {
            let (num, pref_sym) = arg.split_at(pos);
            let multiplier = PREF_SYMS
                .iter()
                .find(|(x, _)| *x == pref_sym)
                .map(|(_, m)| m)
                .ok_or(anyhow!("invalid prefix symbol"))?;

            Ok((num.parse::<u64>()? * multiplier).try_into()?)
        },
    )
}

#[derive(Args, Debug)]
/// Shared CLI arguments for compression.
pub struct CompressArgs {
    /// The endianness of the graph to write
    #[clap(short = 'E', long)]
    pub endianness: Option<String>,

    /// The compression windows
    #[clap(short = 'w', long, default_value_t = 7)]
    pub compression_window: usize,
    /// The minimum interval length
    #[clap(short = 'i', long, default_value_t = 4)]
    pub min_interval_length: usize,
    /// The maximum recursion depth for references (-1 for infinite recursion depth)
    #[clap(short = 'r', long, default_value_t = 3)]
    pub max_ref_count: isize,

    #[arg(value_enum)]
    #[clap(long, default_value = "gamma")]
    /// The code to use for the outdegree
    pub outdegrees: PrivCode,

    #[arg(value_enum)]
    #[clap(long, default_value = "unary")]
    /// The code to use for the reference offsets
    pub references: PrivCode,

    #[arg(value_enum)]
    #[clap(long, default_value = "gamma")]
    /// The code to use for the blocks
    pub blocks: PrivCode,

    #[arg(value_enum)]
    #[clap(long, default_value = "zeta3")]
    /// The code to use for the residuals
    pub residuals: PrivCode,
}

impl From<CompressArgs> for CompFlags {
    fn from(value: CompressArgs) -> Self {
        CompFlags {
            outdegrees: value.outdegrees.into(),
            references: value.references.into(),
            blocks: value.blocks.into(),
            intervals: PrivCode::Gamma.into(),
            residuals: value.residuals.into(),
            min_interval_length: value.min_interval_length,
            compression_window: value.compression_window,
            max_ref_count: match value.max_ref_count {
                -1 => usize::MAX,
                _ => value.max_ref_count as usize,
            },
        }
    }
}

/// Creates a threadpool with the given number of threads
pub fn get_thread_pool(num_threads: usize) -> rayon::ThreadPool {
    rayon::ThreadPoolBuilder::new()
        .num_threads(num_threads)
        .build()
        .expect("Failed to create thread pool")
}

/// Appends a string to the filename of a path.
///
/// # Panics
/// * Will panic if there is no filename.
/// * Will panic in test mode if the path has an extension.
pub fn append(path: impl AsRef<Path>, s: impl AsRef<str>) -> PathBuf {
    debug_assert!(path.as_ref().extension().is_none());
    let mut path_buf = path.as_ref().to_owned();
    let mut filename = path_buf.file_name().unwrap().to_owned();
    filename.push(s.as_ref());
    path_buf.push(filename);
    path_buf
}

/// Creates all parent directories of the given file path.
pub fn create_parent_dir(file_path: impl AsRef<Path>) -> Result<()> {
    // ensure that the dst directory exists
    if let Some(parent_dir) = file_path.as_ref().parent() {
        std::fs::create_dir_all(parent_dir).with_context(|| {
            format!(
                "Failed to create the directory {:?}",
                parent_dir.to_string_lossy()
            )
        })?;
    }
    Ok(())
}

/// Parse a duration from a string.
/// For compatibility with Java, if no suffix is given, it is assumed to be in milliseconds.
/// You can use suffixes, the available ones are:
/// - `s` for seconds
/// - `m` for minutes
/// - `h` for hours
/// - `d` for days
///
/// Example: `1d2h3m4s567` this is parsed as: 1 day, 2 hours, 3 minutes, 4 seconds, and 567 milliseconds.
fn parse_duration(value: &str) -> Result<Duration> {
    if value.is_empty() {
        bail!("Empty duration string, if you want every 0 milliseconds use `0`.");
    }
    let mut duration = Duration::from_secs(0);
    let mut acc = String::new();
    for c in value.chars() {
        if c.is_ascii_digit() {
            acc.push(c);
        } else if c.is_whitespace() {
            continue;
        } else {
            let dur = acc.parse::<u64>()?;
            match c {
                's' => duration += Duration::from_secs(dur),
                'm' => duration += Duration::from_secs(dur * 60),
                'h' => duration += Duration::from_secs(dur * 60 * 60),
                'd' => duration += Duration::from_secs(dur * 60 * 60 * 24),
                _ => return Err(anyhow!("Invalid duration suffix: {}", c)),
            }
            acc.clear();
        }
    }
    if !acc.is_empty() {
        let dur = acc.parse::<u64>()?;
        duration += Duration::from_millis(dur);
    }
    Ok(duration)
}

pub fn init_envlogger() -> Result<()> {
    let mut builder =
        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"));

    let start = std::time::Instant::now();
    let printer = SpanPrinter::new()
        .spacing(Spacing::None)
        .designator(Designator::Compact);
    let span_round = SpanRound::new()
        .largest(jiff::Unit::Day)
        .smallest(jiff::Unit::Millisecond)
        .days_are_24_hours();

    builder.format(move |buf, record| {
        let Ok(ts) = jiff::Timestamp::try_from(SystemTime::now()) else {
            return Err(std::io::Error::other("Failed to get timestamp"));
        };
        let style = buf.default_level_style(record.level());
        let elapsed = start.elapsed();
        let span = jiff::Span::new()
            .seconds(elapsed.as_secs() as i64)
            .milliseconds(elapsed.subsec_millis() as i64);
        let span = span.round(span_round).expect("Failed to round span");
        writeln!(
            buf,
            "{} {} {style}{}{style:#} [{:?}] {} - {}",
            ts.strftime("%F %T%.3f"),
            printer.span_to_string(&span),
            record.level(),
            std::thread::current().id(),
            record.target(),
            record.args()
        )
    });
    builder.init();
    Ok(())
}

/// The entry point of the command-line interface.
pub fn main<I, T>(args: I) -> Result<()>
where
    I: IntoIterator<Item = T>,
    T: Into<std::ffi::OsString> + Clone,
{
    let start = std::time::Instant::now();

    let command = Command::new("webgraph")
        .about("Webgraph tools to build, convert, modify, and analyze webgraph files.")
        .version(build_info::version_string())
        .subcommand_required(true)
        .arg_required_else_help(true)
        .arg(
            Arg::new("log-interval")
                .short('l')
                .long("log-interval")
                .value_parser(parse_duration)
                .help(
                    "How often to log progress. Default is 10s. You can use the suffixes `s` for seconds, `m` for minutes, `h` for hours, and `d` for days. If no suffix is provided it is assumed to be in milliseconds. Example: `1d2h3m4s567` is parsed as 1 day + 2 hours + 3 minutes + 4 seconds + 567 milliseconds = 93784567 milliseconds.",)
                .global(true),
        )
        .after_help(
            "Environment (noteworthy environment variables used):
RUST_MIN_STACK: minimum thread stack size (in bytes)
    we suggest RUST_MIN_STACK=8388608 which is the maximum allowed by linux.
TMPDIR: where to store temporary files (potentially very large ones)
RUST_LOG: configuration for env_logger, pass `info` to see the progress of the
  compression, `debug` to see the progress of the decompression, and `trace` to see all the
  details. You can also use `RUST_LOG=webgraph=debug` to see only the webgraph logs.
",
        );

    macro_rules! impl_dispatch {
        ($command:expr, $($module:ident),*) => {{
            let command = build::cli($command);
            $(
                let command = $module::cli(command);
            )*
            let command = command.display_order(0); // sort args alphabetically
            let mut completion_command = command.clone();
            let matches = command.get_matches_from(args);

            let subcommand = matches.subcommand();
            // if no command is specified, print the help message
            if subcommand.is_none() {
                completion_command.print_help().unwrap();
                return Ok(());
            }
            match subcommand.unwrap() {
                (build::COMMAND_NAME, sub_m) => build::main(sub_m, &mut completion_command),
                $(
                    ($module::COMMAND_NAME, sub_m) => $module::main(sub_m),
                )*
                (command_name, _) => {
                    // this shouldn't happen as clap should catch this
                    eprintln!("Unknown command: {:?}", command_name);
                    completion_command.print_help().unwrap();
                    std::process::exit(1);
                }
            }
        }};
    }

    impl_dispatch!(command, analyze, bench, check, from, perm, run, to, transform)?;

    log::info!(
        "The command took {}",
        pretty_print_elapsed(start.elapsed().as_secs_f64())
    );

    Ok(())
}

/// Pretty prints seconds in a humanly readable format.
fn pretty_print_elapsed(elapsed: f64) -> String {
    let mut result = String::new();
    let mut elapsed_seconds = elapsed as u64;
    let weeks = elapsed_seconds / (60 * 60 * 24 * 7);
    elapsed_seconds %= 60 * 60 * 24 * 7;
    let days = elapsed_seconds / (60 * 60 * 24);
    elapsed_seconds %= 60 * 60 * 24;
    let hours = elapsed_seconds / (60 * 60);
    elapsed_seconds %= 60 * 60;
    let minutes = elapsed_seconds / 60;
    //elapsed_seconds %= 60;

    match weeks {
        0 => {}
        1 => result.push_str("1 week "),
        _ => result.push_str(&format!("{} weeks ", weeks)),
    }
    match days {
        0 => {}
        1 => result.push_str("1 day "),
        _ => result.push_str(&format!("{} days ", days)),
    }
    match hours {
        0 => {}
        1 => result.push_str("1 hour "),
        _ => result.push_str(&format!("{} hours ", hours)),
    }
    match minutes {
        0 => {}
        1 => result.push_str("1 minute "),
        _ => result.push_str(&format!("{} minutes ", minutes)),
    }

    result.push_str(&format!("{:.3} seconds ({}s)", elapsed % 60.0, elapsed));
    result
}

1	/*
2	* SPDX-FileCopyrightText: 2023 Inria
3	* SPDX-FileCopyrightText: 2023 Tommaso Fontana
4	*
5	* SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6	*/
7
8	//! Command-line interface structs, functions, and methods.
9	//!
10	//! Each module correspond to a group of commands, and each command is
11	//! implemented as a submodule.
12
13	use crate::prelude::CompFlags;
14	use crate::{build_info, utils::Granularity};
15	use anyhow::{anyhow, bail, ensure, Context, Result};
16	use clap::{Arg, Args, Command, ValueEnum};
17	use common_traits::UnsignedInt;
18	use dsi_bitstream::dispatch::Codes;
19	use jiff::fmt::friendly::{Designator, Spacing, SpanPrinter};
20	use jiff::SpanRound;
21	use std::io::Write;
22	use std::path::{Path, PathBuf};
23	use std::time::Duration;
24	use std::time::SystemTime;
25	use sysinfo::System;
26
27	pub mod analyze;
28	pub mod bench;
29	pub mod build;
30	pub mod check;
31	pub mod from;
32	pub mod perm;
33	pub mod run;
34	pub mod to;
35	pub mod transform;
36
37	#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
38	/// Enum for instantaneous codes.
39	///
40	/// It is used to implement [`ValueEnum`] here instead of in [`dsi_bitstream`].
41	pub enum PrivCode {
42	Unary,
43	Gamma,
44	Delta,
45	Zeta1,
46	Zeta2,
47	Zeta3,
48	Zeta4,
49	Zeta5,
50	Zeta6,
51	Zeta7,
52	}
53
54	impl From<PrivCode> for Codes {
55	fn from(value: PrivCode) -> Self {	20✔
56	match value {	20✔
57	PrivCode::Unary => Codes::Unary,	4✔
58	PrivCode::Gamma => Codes::Gamma,	12✔
UNCOV 59	PrivCode::Delta => Codes::Delta,	×
60	PrivCode::Zeta1 => Codes::Zeta { k: 1 },
61	PrivCode::Zeta2 => Codes::Zeta { k: 2 },
62	PrivCode::Zeta3 => Codes::Zeta { k: 3 },
63	PrivCode::Zeta4 => Codes::Zeta { k: 4 },
64	PrivCode::Zeta5 => Codes::Zeta { k: 5 },
65	PrivCode::Zeta6 => Codes::Zeta { k: 6 },
66	PrivCode::Zeta7 => Codes::Zeta { k: 7 },
67	}
68	}
69	}
70
71	#[derive(Args, Debug)]
72	/// Shared CLI arguments for reading files containing arcs.
73	pub struct ArcsArgs {
74	#[arg(long, default_value_t = '#')]
75	/// Ignore lines that start with this symbol.
76	pub line_comment_symbol: char,
77
78	#[arg(long, default_value_t = 0)]
79	/// How many lines to skip, ignoring comment lines.
80	pub lines_to_skip: usize,
81
82	#[arg(long)]
83	/// How many lines to parse, after skipping the first lines_to_skip and
84	/// ignoring comment lines.
85	pub max_arcs: Option<usize>,
86
87	#[arg(long, default_value_t = '\t')]
88	/// The column separator.
89	pub separator: char,
90
91	#[arg(long, default_value_t = 0)]
92	/// The index of the column containing the source node of an arc.
93	pub source_column: usize,
94
95	#[arg(long, default_value_t = 1)]
96	/// The index of the column containing the target node of an arc.
97	pub target_column: usize,
98
99	#[arg(long, default_value_t = false)]
100	/// Source and destinations are node identifiers.
101	pub exact: bool,
102	}
103
104	/// Shared CLI arguments for commands that specify a number of threads.
105	#[derive(Args, Debug)]
106	pub struct NumThreadsArg {
107	#[arg(short = 'j', long, default_value_t = rayon::current_num_threads().max(1))]
108	/// The number of threads to use
109	pub num_threads: usize,
110	}
111
112	/// Shared CLI arguments for commands that specify a granularity.
113	#[derive(Args, Debug)]
114	pub struct GranularityArgs {
115	#[arg(long, conflicts_with("node_granularity"))]
116	/// The tentative number of arcs used define the size of a parallel job
117	/// (advanced option).
118	pub arc_granularity: Option<u64>,
119
120	#[arg(long, conflicts_with("arc_granularity"))]
121	/// The tentative number of nodes used define the size of a parallel job
122	/// (advanced option).
123	pub node_granularity: Option<usize>,
124	}
125
126	impl GranularityArgs {
127	pub fn into_granularity(&self) -> Granularity {	2✔
128	match (self.arc_granularity, self.node_granularity) {	2✔
129	(Some(_), Some(_)) => unreachable!(),
UNCOV 130	(Some(arc_granularity), None) => Granularity::Arcs(arc_granularity),	×
UNCOV 131	(None, Some(node_granularity)) => Granularity::Nodes(node_granularity),	×
132	(None, None) => Granularity::default(),	2✔
133	}
134	}
135	}
136
137	/// Shared CLI arguments for commands that specify a batch size.
138	#[derive(Args, Debug)]
139	pub struct BatchSizeArg {
140	#[clap(short = 'b', long, value_parser = batch_size, default_value = "50%")]
141	/// The number of pairs to be used in batches. Two times this number of
142	/// `usize` will be allocated to sort pairs. You can use the SI and NIST
143	/// multipliers k, M, G, T, P, ki, Mi, Gi, Ti, and Pi. You can also use a
144	/// percentage of the available memory by appending a `%` to the number.
145	pub batch_size: usize,
146	}
147
148	/// Parses a batch size.
149	///
150	/// This function accepts either a number (possibly followed by a
151	/// SI or NIST multiplier k, M, G, T, P, ki, Mi, Gi, Ti, or Pi), or a percentage
152	/// (followed by a `%`) that is interpreted as a percentage of the core
153	/// memory. The function returns the number of pairs to be used for batches.
154	pub fn batch_size(arg: &str) -> anyhow::Result<usize> {	4✔
155	const PREF_SYMS: [(&str, u64); 10] = [
156	("k", 1E3 as u64),
157	("m", 1E6 as u64),
158	("g", 1E9 as u64),
159	("t", 1E12 as u64),
160	("p", 1E15 as u64),
161	("ki", 1 << 10),
162	("mi", 1 << 20),
163	("gi", 1 << 30),
164	("ti", 1 << 40),
165	("pi", 1 << 50),
166	];
167	let arg = arg.trim().to_ascii_lowercase();	4✔
168	ensure!(!arg.is_empty(), "empty string");	4✔
169
170	if arg.ends_with('%') {	4✔
171	let perc = arg[..arg.len() - 1].parse::<f64>()?;	8✔
UNCOV 172	ensure!(perc >= 0.0 \|\| perc <= 100.0, "percentage out of range");	×
173	let mut system = System::new();	4✔
174	system.refresh_memory();	4✔
175	let num_pairs: usize = (((system.total_memory() as f64) * (perc / 100.0)	4✔
UNCOV 176	/ (std::mem::size_of::<(usize, usize)>() as f64))	×
UNCOV 177	as u64)	×
178	.try_into()?;
179	// TODO: try_align_to when available
UNCOV 180	return Ok(num_pairs.align_to(1 << 20)); // Round up to MiBs	×
181	}
182
UNCOV 183	arg.chars().position(\|c\| c.is_alphabetic()).map_or_else(	×
UNCOV 184	\|\| Ok(arg.parse::<usize>()?),	×
UNCOV 185	\|pos\| {	×
UNCOV 186	let (num, pref_sym) = arg.split_at(pos);	×
UNCOV 187	let multiplier = PREF_SYMS	×
UNCOV 188	.iter()	×
UNCOV 189	.find(\|(x, _)\| *x == pref_sym)	×
UNCOV 190	.map(\|(_, m)\| m)	×
UNCOV 191	.ok_or(anyhow!("invalid prefix symbol"))?;	×
192
UNCOV 193	Ok((num.parse::<u64>()? * multiplier).try_into()?)	×
194	},
195	)
196	}
197
198	#[derive(Args, Debug)]
199	/// Shared CLI arguments for compression.
200	pub struct CompressArgs {
201	/// The endianness of the graph to write
202	#[clap(short = 'E', long)]
203	pub endianness: Option<String>,
204
205	/// The compression windows
206	#[clap(short = 'w', long, default_value_t = 7)]
207	pub compression_window: usize,
208	/// The minimum interval length
209	#[clap(short = 'i', long, default_value_t = 4)]
210	pub min_interval_length: usize,
211	/// The maximum recursion depth for references (-1 for infinite recursion depth)
212	#[clap(short = 'r', long, default_value_t = 3)]
213	pub max_ref_count: isize,
214
215	#[arg(value_enum)]
216	#[clap(long, default_value = "gamma")]
217	/// The code to use for the outdegree
218	pub outdegrees: PrivCode,
219
220	#[arg(value_enum)]
221	#[clap(long, default_value = "unary")]
222	/// The code to use for the reference offsets
223	pub references: PrivCode,
224
225	#[arg(value_enum)]
226	#[clap(long, default_value = "gamma")]
227	/// The code to use for the blocks
228	pub blocks: PrivCode,
229
230	#[arg(value_enum)]
231	#[clap(long, default_value = "zeta3")]
232	/// The code to use for the residuals
233	pub residuals: PrivCode,
234	}
235
236	impl From<CompressArgs> for CompFlags {
237	fn from(value: CompressArgs) -> Self {	4✔
238	CompFlags {
239	outdegrees: value.outdegrees.into(),	4✔
240	references: value.references.into(),	4✔
241	blocks: value.blocks.into(),	4✔
242	intervals: PrivCode::Gamma.into(),	4✔
243	residuals: value.residuals.into(),	4✔
244	min_interval_length: value.min_interval_length,	4✔
245	compression_window: value.compression_window,	4✔
246	max_ref_count: match value.max_ref_count {	4✔
247	-1 => usize::MAX,
248	_ => value.max_ref_count as usize,
249	},
250	}
251	}
252	}
253
254	/// Creates a threadpool with the given number of threads
255	pub fn get_thread_pool(num_threads: usize) -> rayon::ThreadPool {	6✔
256	rayon::ThreadPoolBuilder::new()	6✔
257	.num_threads(num_threads)	6✔
258	.build()
259	.expect("Failed to create thread pool")
260	}
261
262	/// Appends a string to the filename of a path.
263	///
264	/// # Panics
265	/// * Will panic if there is no filename.
266	/// * Will panic in test mode if the path has an extension.
UNCOV 267	pub fn append(path: impl AsRef<Path>, s: impl AsRef<str>) -> PathBuf {	×
UNCOV 268	debug_assert!(path.as_ref().extension().is_none());	×
UNCOV 269	let mut path_buf = path.as_ref().to_owned();	×
UNCOV 270	let mut filename = path_buf.file_name().unwrap().to_owned();	×
UNCOV 271	filename.push(s.as_ref());	×
UNCOV 272	path_buf.push(filename);	×
UNCOV 273	path_buf	×
274	}
275
276	/// Creates all parent directories of the given file path.
277	pub fn create_parent_dir(file_path: impl AsRef<Path>) -> Result<()> {	10✔
278	// ensure that the dst directory exists
279	if let Some(parent_dir) = file_path.as_ref().parent() {	20✔
UNCOV 280	std::fs::create_dir_all(parent_dir).with_context(\|\| {	×
UNCOV 281	format!(	×
UNCOV 282	"Failed to create the directory {:?}",	×
UNCOV 283	parent_dir.to_string_lossy()	×
284	)
285	})?;
286	}
287	Ok(())	10✔
288	}
289
290	/// Parse a duration from a string.
291	/// For compatibility with Java, if no suffix is given, it is assumed to be in milliseconds.
292	/// You can use suffixes, the available ones are:
293	/// - `s` for seconds
294	/// - `m` for minutes
295	/// - `h` for hours
296	/// - `d` for days
297	///
298	/// Example: `1d2h3m4s567` this is parsed as: 1 day, 2 hours, 3 minutes, 4 seconds, and 567 milliseconds.
299	fn parse_duration(value: &str) -> Result<Duration> {	×
300	if value.is_empty() {	×
301	bail!("Empty duration string, if you want every 0 milliseconds use `0`.");	×
302	}
303	let mut duration = Duration::from_secs(0);	×
304	let mut acc = String::new();	×
305	for c in value.chars() {	×
UNCOV 306	if c.is_ascii_digit() {	×
307	acc.push(c);	×
308	} else if c.is_whitespace() {	×
309	continue;	×
310	} else {
311	let dur = acc.parse::<u64>()?;	×
312	match c {	×
313	's' => duration += Duration::from_secs(dur),	×
314	'm' => duration += Duration::from_secs(dur * 60),	×
315	'h' => duration += Duration::from_secs(dur * 60 * 60),	×
316	'd' => duration += Duration::from_secs(dur * 60 * 60 * 24),	×
UNCOV 317	_ => return Err(anyhow!("Invalid duration suffix: {}", c)),	×
318	}
319	acc.clear();	×
320	}
321	}
UNCOV 322	if !acc.is_empty() {	×
UNCOV 323	let dur = acc.parse::<u64>()?;	×
UNCOV 324	duration += Duration::from_millis(dur);	×
325	}
UNCOV 326	Ok(duration)	×
327	}
328
329	pub fn init_envlogger() -> Result<()> {	2✔
330	let mut builder =	2✔
331	env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"));	2✔
332
333	let start = std::time::Instant::now();	2✔
334	let printer = SpanPrinter::new()	2✔
335	.spacing(Spacing::None)	2✔
336	.designator(Designator::Compact);	2✔
337	let span_round = SpanRound::new()	2✔
338	.largest(jiff::Unit::Day)	2✔
339	.smallest(jiff::Unit::Millisecond)	2✔
340	.days_are_24_hours();
341
342	builder.format(move \|buf, record\| {	1,866✔
343	let Ok(ts) = jiff::Timestamp::try_from(SystemTime::now()) else {	3,728✔
NEW 344	return Err(std::io::Error::other("Failed to get timestamp"));	×
345	};
NEW 346	let style = buf.default_level_style(record.level());	×
NEW 347	let elapsed = start.elapsed();	×
NEW 348	let span = jiff::Span::new()	×
NEW 349	.seconds(elapsed.as_secs() as i64)	×
NEW 350	.milliseconds(elapsed.subsec_millis() as i64);	×
NEW 351	let span = span.round(span_round).expect("Failed to round span");	×
NEW 352	writeln!(	×
NEW 353	buf,	×
NEW 354	"{} {} {style}{}{style:#} [{:?}] {} - {}",	×
NEW 355	ts.strftime("%F %T%.3f"),	×
NEW 356	printer.span_to_string(&span),	×
NEW 357	record.level(),	×
NEW 358	std::thread::current().id(),	×
NEW 359	record.target(),	×
NEW 360	record.args()	×
361	)
362	});
363	builder.init();	2✔
364	Ok(())	2✔
365	}
366
367	/// The entry point of the command-line interface.
368	pub fn main<I, T>(args: I) -> Result<()>	9✔
369	where
370	I: IntoIterator<Item = T>,
371	T: Into<std::ffi::OsString> + Clone,
372	{
373	let start = std::time::Instant::now();	9✔
374
375	let command = Command::new("webgraph")	9✔
376	.about("Webgraph tools to build, convert, modify, and analyze webgraph files.")
377	.version(build_info::version_string())	9✔
378	.subcommand_required(true)
379	.arg_required_else_help(true)
380	.arg(
381	Arg::new("log-interval")	9✔
382	.short('l')	9✔
383	.long("log-interval")	9✔
384	.value_parser(parse_duration)	9✔
385	.help(	9✔
386	"How often to log progress. Default is 10s. You can use the suffixes `s` for seconds, `m` for minutes, `h` for hours, and `d` for days. If no suffix is provided it is assumed to be in milliseconds. Example: `1d2h3m4s567` is parsed as 1 day + 2 hours + 3 minutes + 4 seconds + 567 milliseconds = 93784567 milliseconds.",)	9✔
387	.global(true),	9✔
388	)
389	.after_help(
390	"Environment (noteworthy environment variables used):
391	RUST_MIN_STACK: minimum thread stack size (in bytes)
392	we suggest RUST_MIN_STACK=8388608 which is the maximum allowed by linux.
393	TMPDIR: where to store temporary files (potentially very large ones)
394	RUST_LOG: configuration for env_logger, pass `info` to see the progress of the
395	compression, `debug` to see the progress of the decompression, and `trace` to see all the
396	details. You can also use `RUST_LOG=webgraph=debug` to see only the webgraph logs.
397	",
398	);
399
400	macro_rules! impl_dispatch {	×
UNCOV 401	($command:expr, $($module:ident),*) => {{	×
UNCOV 402	let command = build::cli($command);	×
UNCOV 403	$(	×
UNCOV 404	let command = $module::cli(command);	×
405	)*	×
406	let command = command.display_order(0); // sort args alphabetically
UNCOV 407	let mut completion_command = command.clone();	×
UNCOV 408	let matches = command.get_matches_from(args);	×
409
UNCOV 410	let subcommand = matches.subcommand();	×
411	// if no command is specified, print the help message
UNCOV 412	if subcommand.is_none() {	×
UNCOV 413	completion_command.print_help().unwrap();	×
UNCOV 414	return Ok(());	×
415	}
UNCOV 416	match subcommand.unwrap() {	×
UNCOV 417	(build::COMMAND_NAME, sub_m) => build::main(sub_m, &mut completion_command),	×
UNCOV 418	$(	×
UNCOV 419	($module::COMMAND_NAME, sub_m) => $module::main(sub_m),	×
UNCOV 420	)*	×
UNCOV 421	(command_name, _) => {	×
422	// this shouldn't happen as clap should catch this
UNCOV 423	eprintln!("Unknown command: {:?}", command_name);	×
UNCOV 424	completion_command.print_help().unwrap();	×
UNCOV 425	std::process::exit(1);	×
426	}
427	}
428	}};
429	}
430
431	impl_dispatch!(command, analyze, bench, check, from, perm, run, to, transform)?;	9✔
432
433	log::info!(	9✔
434	"The command took {}",	9✔
435	pretty_print_elapsed(start.elapsed().as_secs_f64())	9✔
436	);
437
UNCOV 438	Ok(())	×
439	}
440
441	/// Pretty prints seconds in a humanly readable format.
442	fn pretty_print_elapsed(elapsed: f64) -> String {	18✔
443	let mut result = String::new();	18✔
444	let mut elapsed_seconds = elapsed as u64;	18✔
445	let weeks = elapsed_seconds / (60 * 60 * 24 * 7);	18✔
446	elapsed_seconds %= 60 * 60 * 24 * 7;	18✔
447	let days = elapsed_seconds / (60 * 60 * 24);	18✔
448	elapsed_seconds %= 60 * 60 * 24;	18✔
449	let hours = elapsed_seconds / (60 * 60);	18✔
450	elapsed_seconds %= 60 * 60;	18✔
451	let minutes = elapsed_seconds / 60;	18✔
452	//elapsed_seconds %= 60;
453
454	match weeks {	18✔
455	0 => {}	18✔
UNCOV 456	1 => result.push_str("1 week "),	×
UNCOV 457	_ => result.push_str(&format!("{} weeks ", weeks)),	×
458	}
459	match days {	18✔
460	0 => {}	18✔
UNCOV 461	1 => result.push_str("1 day "),	×
UNCOV 462	_ => result.push_str(&format!("{} days ", days)),	×
463	}
464	match hours {	18✔
465	0 => {}	18✔
UNCOV 466	1 => result.push_str("1 hour "),	×
UNCOV 467	_ => result.push_str(&format!("{} hours ", hours)),	×
468	}
469	match minutes {	18✔
470	0 => {}	16✔
471	1 => result.push_str("1 minute "),	2✔
UNCOV 472	_ => result.push_str(&format!("{} minutes ", minutes)),	×
473	}
474
475	result.push_str(&format!("{:.3} seconds ({}s)", elapsed % 60.0, elapsed));	18✔
476	result	18✔
477	}

vigna / webgraph-rs / 14130263389

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous