• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 23365769388

20 Mar 2026 10:52PM UTC coverage: 68.228% (-3.0%) from 71.245%
23365769388

push

github

vigna
No le_bins,be_bins for webgraph

6655 of 9754 relevant lines covered (68.23%)

46582760.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.86
/cli/src/run/llp.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
5
 *
6
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7
 */
8

9
use crate::GranularityArgs;
10
use crate::IntSliceFormat;
11
use crate::NumThreadsArg;
12
use crate::create_parent_dir;
13
use crate::get_thread_pool;
14
use anyhow::{Context, Result, bail};
15
use clap::Parser;
16
use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
17
use dsi_bitstream::prelude::*;
18
use epserde::prelude::*;
19
use webgraph::prelude::*;
20
use webgraph_algo::llp::preds::{MaxUpdates, MinAvgImprov, MinGain, MinModified, PercModified};
21
use webgraph_algo::{combine_labels, labels_to_ranks};
22

23
use predicates::prelude::*;
24
use std::path::PathBuf;
25
use tempfile::tempdir;
26

27
#[derive(Parser, Debug)]
28
#[command(name = "llp", about = "Computes a permutation of a graph using Layered Label Propagation.", long_about = None, next_line_help = true)]
29
pub struct CliArgs {
30
    /// The basename of the graph.​
31
    pub basename: PathBuf,
32

33
    /// Output filename for the LLP permutation. If not provided, only labels
34
    /// are computed without combining them into a permutation. In that case,
35
    /// --work-dir should be set to preserve the labels.​
36
    pub perm: Option<PathBuf>,
37

38
    /// The folder where the LLP labels are stored in Java format (big-endian
39
    /// 64-bit integers). If not specified, a temporary directory is used and
40
    /// deleted at the end; the parent folder for temporary directories can be
41
    /// set with the TMPDIR environment variable. A work directory serves to
42
    /// save the labels and to resume the computation of gammas, whose
43
    /// computation on large graphs might take days. Similar nodes will be
44
    /// assigned the same label. To resume computation, compute the remaining
45
    /// gammas without passing "perm", and then run "llp-combine" to combine
46
    /// all labels in the folder into a final permutation.​
47
    #[arg(short, long)]
48
    pub work_dir: Option<PathBuf>,
49

50
    #[arg(long, value_enum, default_value_t)]
51
    /// The format of the permutation file.​
52
    pub fmt: IntSliceFormat,
53

54
    #[arg(short, long, allow_hyphen_values = true, use_value_delimiter = true, value_delimiter = ',', default_values_t = vec!["-0".to_string(), "-1".to_string(), "-2".to_string(), "-3".to_string(), "-4".to_string(), "-5".to_string(), "-6".to_string(), "-7".to_string(), "-8".to_string(), "-9".to_string(), "-10".to_string()])]
55
    /// The ɣ's to use in LLP, separated by commas. The format is given by an
56
    /// integer numerator (if missing, assumed to be one), a dash, and then a
57
    /// power-of-two exponent for the denominator. For example, -2 is 1/4, and
58
    /// 0-0 is 0.​
59
    pub gammas: Vec<String>,
60

61
    #[arg(short = 'u', long, default_value_t = 100)]
62
    /// Maximum number of updates for a given ɣ.​
63
    pub max_updates: usize,
64

65
    #[arg(short = 'M', long)]
66
    /// Stop updates when the number of modified nodes falls below the square
67
    /// root of the number of nodes.​
68
    pub modified: bool,
69

70
    #[arg(short = 'p', long)]
71
    /// Stop updates when the fraction of modified nodes falls below this
72
    /// percentage.​
73
    pub perc_modified: Option<f64>,
74

75
    #[arg(short = 't', long, default_value_t = MinGain::DEFAULT_THRESHOLD)]
76
    /// The gain threshold used to stop the computation (0 to disable).​
77
    pub gain_threshold: f64,
78

79
    #[arg(short = 'i', long, default_value_t = MinAvgImprov::DEFAULT_THRESHOLD)]
80
    /// The threshold on the average (over the last ten updates) gain
81
    /// improvement used to stop the computation (-Inf to disable).​
82
    pub improv_threshold: f64,
83

84
    #[clap(flatten)]
85
    pub num_threads: NumThreadsArg,
86

87
    #[arg(short, long, default_value_t = 0)]
88
    /// The seed to use for the PRNG.​
89
    pub seed: u64,
90

91
    #[clap(flatten)]
92
    pub granularity: GranularityArgs,
93

94
    #[arg(long)]
95
    /// The chunk size used to localize the random permutation
96
    /// (advanced option).​
97
    pub chunk_size: Option<usize>,
98
}
99

100
/// Stores a permutation using the given format.
101
pub fn store_perm(
5✔
102
    data: &[usize],
103
    perm: impl AsRef<std::path::Path>,
104
    fmt: IntSliceFormat,
105
) -> Result<()> {
106
    fmt.store(perm, data, None)
25✔
107
}
108

109
pub fn main(args: CliArgs) -> Result<()> {
5✔
110
    if args.perm.is_none() && args.work_dir.is_none() {
10✔
111
        log::warn!(concat!(
×
112
            "If `perm` is not set the llp will just compute the labels and not produce the final permutation. ",
113
            "But you didn't set `work_dir` so the labels will be stored in a temp dir that will be deleted at the end of computation. ",
114
            "Either set `perm` if you want to compute the permutation, or `work_dir` if you want the labels and combine them later."
115
        ));
116
        return Ok(());
×
117
    }
118

119
    if let Some(perm) = &args.perm {
10✔
120
        create_parent_dir(perm)?;
10✔
121
    }
122

123
    match get_endianness(&args.basename)?.as_str() {
15✔
124
        #[cfg(feature = "be_bins")]
125
        BE::NAME => llp::<BE>(args),
15✔
126
        #[cfg(feature = "le_bins")]
127
        LE::NAME => llp::<LE>(args),
×
128
        e => panic!("Unknown endianness: {}", e),
×
129
    }
130
}
131

132
pub fn llp<E: Endianness + 'static + Send + Sync>(args: CliArgs) -> Result<()>
5✔
133
where
134
    MemoryFactory<E, MmapHelper<u32>>: CodesReaderFactoryHelper<E>,
135
    for<'a> LoadModeCodesReader<'a, E, LoadMmap>: BitSeek,
136
{
137
    let start = std::time::Instant::now();
10✔
138
    // due to ownership problems, we always create the temp dir, but only use it
139
    // if the user didn't provide a work_dir
140
    let temp_dir = tempdir()?;
10✔
141
    let work_dir = args.work_dir.as_deref().unwrap_or(temp_dir.path());
25✔
142
    log::info!("Using workdir: {}", work_dir.display());
15✔
143

144
    // Load the graph in THP memory
145
    log::info!(
5✔
146
        "Loading graph {} in THP memory...",
×
147
        args.basename.to_string_lossy()
5✔
148
    );
149
    let graph = BvGraph::with_basename(&args.basename)
15✔
150
        .mode::<LoadMmap>()
151
        .flags(MemoryFlags::TRANSPARENT_HUGE_PAGES | MemoryFlags::RANDOM_ACCESS)
10✔
152
        .endianness::<E>()
153
        .load()?;
154

155
    // Load degree cumulative function in THP memory
156
    log::info!("Loading DCF in THP memory...");
5✔
157
    let deg_cumul = unsafe {
158
        DCF::load_mmap(
159
            args.basename.with_extension(DEG_CUMUL_EXTENSION),
5✔
160
            Flags::TRANSPARENT_HUGE_PAGES | Flags::RANDOM_ACCESS,
5✔
161
        )
162
        .with_context(|| {
5✔
163
            format!(
×
164
                "Could not load degree cumulative function for basename {}",
×
165
                args.basename.display()
×
166
            )
167
        })
168
    }?;
169

170
    // parse the gamma format
171
    let mut gammas = vec![];
10✔
172
    for gamma in args.gammas {
60✔
173
        let t: Vec<_> = gamma.split('-').collect();
220✔
174
        if t.len() != 2 {
55✔
175
            bail!("Invalid gamma: {}", gamma);
×
176
        }
177
        gammas.push(
110✔
178
            if t[0].is_empty() {
110✔
179
                1.0
55✔
180
            } else {
181
                t[0].parse::<usize>()? as f64
×
182
            } * (0.5_f64).powf(t[1].parse::<usize>()? as f64),
165✔
183
        );
184
    }
185

186
    gammas.sort_by(|a, b| a.total_cmp(b));
835✔
187

188
    let mut predicate = MinGain::try_from(args.gain_threshold)?.boxed();
20✔
189
    predicate = predicate
10✔
190
        .or(MinAvgImprov::try_from(args.improv_threshold)?)
15✔
191
        .boxed();
5✔
192
    predicate = predicate.or(MaxUpdates::from(args.max_updates)).boxed();
30✔
193

194
    if args.modified {
5✔
195
        predicate = predicate.or(MinModified::default()).boxed();
×
196
    }
197

198
    if let Some(perc_modified) = args.perc_modified {
5✔
199
        predicate = predicate.or(PercModified::try_from(perc_modified)?).boxed();
×
200
    }
201

202
    let granularity = args.granularity.into_granularity();
15✔
203

204
    let thread_pool = get_thread_pool(args.num_threads.num_threads);
15✔
205
    thread_pool.install(|| -> Result<()> {
15✔
206
        // compute the LLP
207
        webgraph_algo::llp::layered_label_propagation_labels_only(
5✔
208
            graph,
5✔
209
            deg_cumul.uncase(),
5✔
210
            gammas,
5✔
211
            args.chunk_size,
5✔
212
            granularity,
5✔
213
            args.seed,
5✔
214
            predicate,
5✔
215
            work_dir,
5✔
216
        )
217
        .context("Could not compute LLP")?;
5✔
218

219
        log::info!("Elapsed: {}", start.elapsed().as_secs_f64());
15✔
220

221
        if let Some(perm_path) = args.perm {
10✔
222
            let labels = combine_labels(work_dir)?;
15✔
223
            log::info!("Combined labels...");
5✔
224
            let rank_perm = labels_to_ranks(&labels);
15✔
225
            log::info!("Saving permutation...");
5✔
226
            store_perm(&rank_perm, perm_path, args.fmt)?;
20✔
227
        }
228

229
        Ok(())
5✔
230
    })
231
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc