• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 14130263389

28 Mar 2025 01:40PM UTC coverage: 49.654% (-0.1%) from 49.798%
14130263389

push

github

zommiommy
fixed llp wrong combine types

22 of 41 new or added lines in 4 files covered. (53.66%)

1019 existing lines in 41 files now uncovered.

2437 of 4908 relevant lines covered (49.65%)

18919274.03 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

13.64
/src/cli/transform/simplify.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use crate::cli::*;
9
use crate::graphs::union_graph::UnionGraph;
10
use crate::prelude::*;
11
use anyhow::Result;
12
use clap::{ArgMatches, Args, Command, FromArgMatches};
13
use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
14
use mmap_rs::MmapFlags;
15
use std::path::PathBuf;
16
use tempfile::Builder;
17

18
pub const COMMAND_NAME: &str = "simplify";
19

20
#[derive(Args, Debug)]
21
#[command(about = "Makes a BvGraph simple (undirected and loopless) by adding missing arcs and removing loops, optionally applying a permutation.", long_about = None)]
22
pub struct CliArgs {
23
    /// The basename of the graph.
24
    pub src: PathBuf,
25
    /// The basename of the simplified graph.
26
    pub dst: PathBuf,
27

28
    #[arg(long)]
29
    /// The basename of a pre-computed transposed version of the source graph, which
30
    /// will be use to speed up the simplification.
31
    pub transposed: Option<PathBuf>,
32

33
    #[clap(flatten)]
34
    pub num_threads: NumThreadsArg,
35

36
    #[clap(flatten)]
37
    pub batch_size: BatchSizeArg,
38

39
    #[clap(flatten)]
40
    pub ca: CompressArgs,
41

42
    #[arg(long)]
43
    /// The path to an optional permutation in binary big-endian format to apply to the graph.
44
    pub permutation: Option<PathBuf>,
45
}
46

47
pub fn cli(command: Command) -> Command {
18✔
48
    command.subcommand(CliArgs::augment_args(Command::new(COMMAND_NAME)).display_order(0))
18✔
49
}
50

51
pub fn main(submatches: &ArgMatches) -> Result<()> {
2✔
52
    let args = CliArgs::from_arg_matches(submatches)?;
4✔
53

54
    create_parent_dir(&args.dst)?;
×
55

56
    match get_endianness(&args.src)?.as_str() {
4✔
57
        #[cfg(any(
58
            feature = "be_bins",
59
            not(any(feature = "be_bins", feature = "le_bins"))
60
        ))]
61
        BE::NAME => simplify::<BE>(args),
4✔
62
        #[cfg(any(
63
            feature = "le_bins",
64
            not(any(feature = "be_bins", feature = "le_bins"))
65
        ))]
66
        LE::NAME => simplify::<LE>(args),
×
67
        e => panic!("Unknown endianness: {}", e),
×
68
    }
69
}
70

UNCOV
71
fn no_ef_warn(basepath: impl AsRef<std::path::Path>) {
×
UNCOV
72
    log::warn!("The .ef file was not found so the simplification will proceed sequentially. This may be slow. To speed it up, you can use `webgraph build ef {}` which would allow us create batches in parallel", basepath.as_ref().display());
×
73
}
74

75
pub fn simplify<E: Endianness>(args: CliArgs) -> Result<()>
2✔
76
where
77
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
78
    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
79
{
80
    // TODO!: speed it up by using random access graph if possible
81
    let thread_pool = crate::cli::get_thread_pool(args.num_threads.num_threads);
2✔
82

83
    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
6✔
84

85
    let dir = Builder::new().prefix("transform_simplify_").tempdir()?;
4✔
86

UNCOV
87
    match (args.permutation, args.transposed) {
×
88
        // load the transposed graph and use it to directly compress the graph
89
        // without doing any sorting
90
        (None, Some(t_path)) => {
×
UNCOV
91
            log::info!("Transposed graph provided, using it to simplify the graph");
×
92

93
            let has_ef_graph =
×
94
                std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file());
×
95
            let has_ef_t_graph =
×
UNCOV
96
                std::fs::metadata(t_path.with_extension("ef")).is_ok_and(|x| x.is_file());
×
97

98
            match (has_ef_graph, has_ef_t_graph) {
×
99
                (true, true) => {
×
UNCOV
100
                    log::info!("Both .ef files found, using simplify split");
×
101

102
                    let graph =
×
UNCOV
103
                        crate::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
104
                            .endianness::<E>()
105
                            .load()?;
106
                    let num_nodes = graph.num_nodes();
×
107
                    let graph_t =
×
UNCOV
108
                        crate::graphs::bvgraph::random_access::BvGraph::with_basename(&t_path)
×
109
                            .endianness::<E>()
110
                            .load()?;
111

112
                    if graph_t.num_nodes() != num_nodes {
×
UNCOV
113
                        anyhow::bail!("The number of nodes in the graph and its transpose do not match! {} != {}", num_nodes, graph_t.num_nodes());
×
114
                    }
115

UNCOV
116
                    let sorted = NoSelfLoopsGraph(UnionGraph(graph, graph_t));
×
117

118
                    BvComp::parallel_endianness(
119
                        &args.dst,
×
120
                        &sorted,
×
121
                        num_nodes,
×
122
                        args.ca.into(),
×
123
                        &thread_pool,
×
124
                        dir,
×
UNCOV
125
                        &target_endianness,
×
126
                    )?;
127

UNCOV
128
                    return Ok(());
×
129
                }
130
                (true, false) => {
×
UNCOV
131
                    no_ef_warn(&args.src);
×
132
                }
133
                (false, true) => {
×
UNCOV
134
                    no_ef_warn(&t_path);
×
135
                }
136
                (false, false) => {
×
137
                    no_ef_warn(&args.src);
×
UNCOV
138
                    no_ef_warn(&t_path);
×
139
                }
140
            }
141

142
            let seq_graph =
×
UNCOV
143
                crate::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
144
                    .endianness::<E>()
145
                    .load()?;
146
            let num_nodes = seq_graph.num_nodes();
×
147
            let seq_graph_t =
×
UNCOV
148
                crate::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&t_path)
×
149
                    .endianness::<E>()
150
                    .load()?;
151

152
            if seq_graph_t.num_nodes() != num_nodes {
×
153
                anyhow::bail!(
×
154
                    "The number of nodes in the graph and its transpose do not match! {} != {}",
×
155
                    num_nodes,
×
UNCOV
156
                    seq_graph_t.num_nodes()
×
157
                );
158
            }
159

UNCOV
160
            let sorted = NoSelfLoopsGraph(UnionGraph(seq_graph, seq_graph_t));
×
161

162
            BvComp::parallel_endianness(
163
                &args.dst,
×
164
                &sorted,
×
165
                num_nodes,
×
166
                args.ca.into(),
×
167
                &thread_pool,
×
168
                dir,
×
UNCOV
169
                &target_endianness,
×
170
            )?;
171
        }
172
        // apply the permutation, don't care if the transposed graph is already computed
173
        // as we cannot really exploit it
174
        (Some(perm_path), None | Some(_)) => {
2✔
175
            log::info!("Permutation provided, applying it to the graph");
4✔
176

177
            let perm = JavaPermutation::mmap(perm_path, MmapFlags::RANDOM_ACCESS)?;
4✔
178

179
            // if the .ef file exists, we can use the simplify split
180
            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
2✔
181
                log::info!(".ef file found, using simplify split");
4✔
182
                let graph =
2✔
183
                    crate::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
2✔
184
                        .endianness::<E>()
185
                        .load()?;
186

187
                let perm_graph = PermutedGraph {
188
                    graph: &graph,
×
UNCOV
189
                    perm: &perm,
×
190
                };
191

192
                let sorted = crate::transform::simplify_split(
193
                    &perm_graph,
×
194
                    args.batch_size.batch_size,
×
UNCOV
195
                    &thread_pool,
×
196
                )?;
197

198
                BvComp::parallel_endianness(
199
                    &args.dst,
×
200
                    &sorted,
×
201
                    graph.num_nodes(),
×
202
                    args.ca.into(),
×
203
                    &thread_pool,
×
204
                    dir,
×
UNCOV
205
                    &target_endianness,
×
206
                )?;
207

208
                return Ok(());
2✔
209
            }
210

UNCOV
211
            no_ef_warn(&args.src);
×
212

213
            let seq_graph =
×
UNCOV
214
                crate::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
215
                    .endianness::<E>()
216
                    .load()?;
217

218
            let perm_graph = PermutedGraph {
219
                graph: &seq_graph,
×
UNCOV
220
                perm: &perm,
×
221
            };
222

223
            // simplify the graph
224
            let sorted =
×
UNCOV
225
                crate::transform::simplify(&perm_graph, args.batch_size.batch_size).unwrap();
×
226

227
            BvComp::parallel_endianness(
228
                &args.dst,
×
229
                &sorted,
×
230
                sorted.num_nodes(),
×
231
                args.ca.into(),
×
232
                &thread_pool,
×
233
                dir,
×
UNCOV
234
                &target_endianness,
×
235
            )?;
236
        }
237
        // just compute the transpose on the fly
238
        (None, None) => {
×
239
            log::info!(
×
UNCOV
240
                "No permutation or transposed graph provided, computing the transpose on the fly"
×
241
            );
242
            // if the .ef file exists, we can use the simplify split
243
            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
×
UNCOV
244
                log::info!(".ef file found, using simplify split");
×
245

246
                let graph =
×
UNCOV
247
                    crate::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
248
                        .endianness::<E>()
249
                        .load()?;
250

251
                let sorted = crate::transform::simplify_split(
252
                    &graph,
×
253
                    args.batch_size.batch_size,
×
UNCOV
254
                    &thread_pool,
×
255
                )?;
256

257
                BvComp::parallel_endianness(
258
                    &args.dst,
×
259
                    &sorted,
×
260
                    graph.num_nodes(),
×
261
                    args.ca.into(),
×
262
                    &thread_pool,
×
263
                    dir,
×
UNCOV
264
                    &target_endianness,
×
265
                )?;
266

UNCOV
267
                return Ok(());
×
268
            }
269

UNCOV
270
            no_ef_warn(&args.src);
×
271

272
            let seq_graph =
×
UNCOV
273
                crate::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
274
                    .endianness::<E>()
275
                    .load()?;
276

UNCOV
277
            let num_nodes = seq_graph.num_nodes();
×
278
            // transpose the graph
279
            let sorted =
×
UNCOV
280
                crate::transform::simplify_sorted(seq_graph, args.batch_size.batch_size).unwrap();
×
281

282
            BvComp::parallel_endianness(
283
                &args.dst,
×
284
                &sorted,
×
285
                num_nodes,
×
286
                args.ca.into(),
×
287
                &thread_pool,
×
288
                dir,
×
UNCOV
289
                &target_endianness,
×
290
            )?;
291
        }
292
    }
293

UNCOV
294
    Ok(())
×
295
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc