• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 18761474399

23 Oct 2025 08:43PM UTC coverage: 61.976% (-0.06%) from 62.035%
18761474399

push

github

vigna
Goodbye le_bins/be_bins

5201 of 8392 relevant lines covered (61.98%)

27888380.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

21.64
/cli/src/transform/simplify.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use crate::*;
9
use anyhow::Result;
10
use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
11
use mmap_rs::MmapFlags;
12
use std::path::PathBuf;
13
use tempfile::Builder;
14
use webgraph::graphs::union_graph::UnionGraph;
15
use webgraph::prelude::*;
16

17
#[derive(Parser, Debug)]
18
#[command(name = "simplify", about = "Makes a BvGraph simple (undirected and loopless) by adding missing arcs and removing loops, optionally applying a permutation.", long_about = None)]
19
pub struct CliArgs {
20
    /// The basename of the graph.
21
    pub src: PathBuf,
22
    /// The basename of the simplified graph.
23
    pub dst: PathBuf,
24

25
    #[arg(long)]
26
    /// The basename of a pre-computed transposed version of the source graph, which
27
    /// will be use to speed up the simplification.
28
    pub transposed: Option<PathBuf>,
29

30
    #[clap(flatten)]
31
    pub num_threads: NumThreadsArg,
32

33
    #[clap(flatten)]
34
    pub memory_usage: MemoryUsageArg,
35

36
    #[clap(flatten)]
37
    pub ca: CompressArgs,
38

39
    #[arg(long)]
40
    /// The path to an optional permutation in binary big-endian format to apply to the graph.
41
    pub permutation: Option<PathBuf>,
42
}
43

44
pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
4✔
45
    create_parent_dir(&args.dst)?;
8✔
46

47
    match get_endianness(&args.src)?.as_str() {
12✔
48
        BE::NAME => simplify::<BE>(global_args, args),
16✔
49
        LE::NAME => simplify::<LE>(global_args, args),
×
50
        e => panic!("Unknown endianness: {}", e),
×
51
    }
52
}
53

54
fn no_ef_warn(basepath: impl AsRef<std::path::Path>) {
×
55
    log::warn!(
×
56
        "The .ef file was not found so the simplification will proceed sequentially. This may be slow. To speed it up, you can use `webgraph build ef {}` which would allow us create batches in parallel",
×
57
        basepath.as_ref().display()
×
58
    );
59
}
60

61
pub fn simplify<E: Endianness>(_global_args: GlobalArgs, args: CliArgs) -> Result<()>
4✔
62
where
63
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
64
    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
65
{
66
    // TODO!: speed it up by using random access graph if possible
67
    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
12✔
68

69
    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
24✔
70

71
    let dir = Builder::new().prefix("transform_simplify_").tempdir()?;
16✔
72

73
    match (args.permutation, args.transposed) {
8✔
74
        // load the transposed graph and use it to directly compress the graph
75
        // without doing any sorting
76
        (None, Some(t_path)) => {
×
77
            log::info!("Transposed graph provided, using it to simplify the graph");
×
78

79
            let has_ef_graph =
×
80
                std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file());
×
81
            let has_ef_t_graph =
×
82
                std::fs::metadata(t_path.with_extension("ef")).is_ok_and(|x| x.is_file());
×
83

84
            match (has_ef_graph, has_ef_t_graph) {
×
85
                (true, true) => {
×
86
                    log::info!("Both .ef files found, using simplify split");
×
87

88
                    let graph =
×
89
                        webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
90
                            .endianness::<E>()
91
                            .load()?;
92
                    let num_nodes = graph.num_nodes();
×
93
                    let graph_t =
×
94
                        webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&t_path)
×
95
                            .endianness::<E>()
96
                            .load()?;
97

98
                    if graph_t.num_nodes() != num_nodes {
×
99
                        anyhow::bail!(
×
100
                            "The number of nodes in the graph and its transpose do not match! {} != {}",
×
101
                            num_nodes,
×
102
                            graph_t.num_nodes()
×
103
                        );
104
                    }
105

106
                    let sorted = NoSelfLoopsGraph(UnionGraph(graph, graph_t));
×
107

108
                    BvComp::parallel_endianness(
109
                        &args.dst,
×
110
                        &sorted,
×
111
                        num_nodes,
×
112
                        args.ca.into(),
×
113
                        &thread_pool,
×
114
                        dir,
×
115
                        &target_endianness,
×
116
                    )?;
117

118
                    return Ok(());
×
119
                }
120
                (true, false) => {
×
121
                    no_ef_warn(&args.src);
×
122
                }
123
                (false, true) => {
×
124
                    no_ef_warn(&t_path);
×
125
                }
126
                (false, false) => {
×
127
                    no_ef_warn(&args.src);
×
128
                    no_ef_warn(&t_path);
×
129
                }
130
            }
131

132
            let seq_graph =
×
133
                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
134
                    .endianness::<E>()
135
                    .load()?;
136
            let num_nodes = seq_graph.num_nodes();
×
137
            let seq_graph_t =
×
138
                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&t_path)
×
139
                    .endianness::<E>()
140
                    .load()?;
141

142
            if seq_graph_t.num_nodes() != num_nodes {
×
143
                anyhow::bail!(
×
144
                    "The number of nodes in the graph and its transpose do not match! {} != {}",
×
145
                    num_nodes,
×
146
                    seq_graph_t.num_nodes()
×
147
                );
148
            }
149

150
            let sorted = NoSelfLoopsGraph(UnionGraph(seq_graph, seq_graph_t));
×
151

152
            BvComp::parallel_endianness(
153
                &args.dst,
×
154
                &sorted,
×
155
                num_nodes,
×
156
                args.ca.into(),
×
157
                &thread_pool,
×
158
                dir,
×
159
                &target_endianness,
×
160
            )?;
161
        }
162
        // apply the permutation, don't care if the transposed graph is already computed
163
        // as we cannot really exploit it
164
        (Some(perm_path), None | Some(_)) => {
4✔
165
            log::info!("Permutation provided, applying it to the graph");
8✔
166

167
            let perm = JavaPermutation::mmap(perm_path, MmapFlags::RANDOM_ACCESS)?;
12✔
168

169
            // if the .ef file exists, we can use the simplify split
170
            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
20✔
171
                log::info!(".ef file found, using simplify split");
8✔
172
                let graph =
4✔
173
                    webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
8✔
174
                        .endianness::<E>()
175
                        .load()?;
176

177
                let perm_graph = PermutedGraph {
178
                    graph: &graph,
4✔
179
                    perm: &perm,
4✔
180
                };
181

182
                let sorted = webgraph::transform::simplify_split(
183
                    &perm_graph,
4✔
184
                    args.memory_usage.memory_usage,
4✔
185
                    &thread_pool,
4✔
186
                )?;
187

188
                BvComp::parallel_endianness(
189
                    &args.dst,
4✔
190
                    &sorted,
4✔
191
                    graph.num_nodes(),
8✔
192
                    args.ca.into(),
8✔
193
                    &thread_pool,
4✔
194
                    dir,
4✔
195
                    &target_endianness,
4✔
196
                )?;
197

198
                return Ok(());
4✔
199
            }
200

201
            no_ef_warn(&args.src);
×
202

203
            let seq_graph =
×
204
                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
205
                    .endianness::<E>()
206
                    .load()?;
207

208
            let perm_graph = PermutedGraph {
209
                graph: &seq_graph,
×
210
                perm: &perm,
×
211
            };
212

213
            // simplify the graph
214
            let sorted =
×
215
                webgraph::transform::simplify(&perm_graph, args.memory_usage.memory_usage).unwrap();
×
216

217
            BvComp::parallel_endianness(
218
                &args.dst,
×
219
                &sorted,
×
220
                sorted.num_nodes(),
×
221
                args.ca.into(),
×
222
                &thread_pool,
×
223
                dir,
×
224
                &target_endianness,
×
225
            )?;
226
        }
227
        // just compute the transpose on the fly
228
        (None, None) => {
×
229
            log::info!(
×
230
                "No permutation or transposed graph provided, computing the transpose on the fly"
×
231
            );
232
            // if the .ef file exists, we can use the simplify split
233
            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
×
234
                log::info!(".ef file found, using simplify split");
×
235

236
                let graph =
×
237
                    webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
238
                        .endianness::<E>()
239
                        .load()?;
240

241
                let sorted = webgraph::transform::simplify_split(
242
                    &graph,
×
243
                    args.memory_usage.memory_usage,
×
244
                    &thread_pool,
×
245
                )?;
246

247
                BvComp::parallel_endianness(
248
                    &args.dst,
×
249
                    &sorted,
×
250
                    graph.num_nodes(),
×
251
                    args.ca.into(),
×
252
                    &thread_pool,
×
253
                    dir,
×
254
                    &target_endianness,
×
255
                )?;
256

257
                return Ok(());
×
258
            }
259

260
            no_ef_warn(&args.src);
×
261

262
            let seq_graph =
×
263
                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
264
                    .endianness::<E>()
265
                    .load()?;
266

267
            let num_nodes = seq_graph.num_nodes();
×
268
            // transpose the graph
269
            let sorted =
×
270
                webgraph::transform::simplify_sorted(seq_graph, args.memory_usage.memory_usage)
×
271
                    .unwrap();
272

273
            BvComp::parallel_endianness(
274
                &args.dst,
×
275
                &sorted,
×
276
                num_nodes,
×
277
                args.ca.into(),
×
278
                &thread_pool,
×
279
                dir,
×
280
                &target_endianness,
×
281
            )?;
282
        }
283
    }
284

285
    Ok(())
×
286
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc