• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 14398898444

11 Apr 2025 08:24AM UTC coverage: 56.011% (-0.4%) from 56.361%
14398898444

push

github

vigna
Fixed test

8 of 8 new or added lines in 1 file covered. (100.0%)

477 existing lines in 19 files now uncovered.

3993 of 7129 relevant lines covered (56.01%)

20186811.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

11.94
/cli/src/transform/simplify.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use crate::*;
9
use anyhow::Result;
10
use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
11
use mmap_rs::MmapFlags;
12
use std::path::PathBuf;
13
use tempfile::Builder;
14
use webgraph::graphs::union_graph::UnionGraph;
15
use webgraph::prelude::*;
16

17
#[derive(Parser, Debug)]
18
#[command(name = "simplify", about = "Makes a BvGraph simple (undirected and loopless) by adding missing arcs and removing loops, optionally applying a permutation.", long_about = None)]
19
pub struct CliArgs {
20
    /// The basename of the graph.
21
    pub src: PathBuf,
22
    /// The basename of the simplified graph.
23
    pub dst: PathBuf,
24

25
    #[arg(long)]
26
    /// The basename of a pre-computed transposed version of the source graph, which
27
    /// will be use to speed up the simplification.
28
    pub transposed: Option<PathBuf>,
29

30
    #[clap(flatten)]
31
    pub num_threads: NumThreadsArg,
32

33
    #[clap(flatten)]
34
    pub batch_size: BatchSizeArg,
35

36
    #[clap(flatten)]
37
    pub ca: CompressArgs,
38

39
    #[arg(long)]
40
    /// The path to an optional permutation in binary big-endian format to apply to the graph.
41
    pub permutation: Option<PathBuf>,
42
}
43

44
pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
3✔
45
    create_parent_dir(&args.dst)?;
3✔
46

47
    match get_endianness(&args.src)?.as_str() {
6✔
48
        #[cfg(feature = "be_bins")]
49
        BE::NAME => simplify::<BE>(global_args, args),
6✔
50
        #[cfg(feature = "le_bins")]
UNCOV
51
        LE::NAME => simplify::<LE>(global_args, args),
×
UNCOV
52
        e => panic!("Unknown endianness: {}", e),
×
53
    }
54
}
55

UNCOV
56
fn no_ef_warn(basepath: impl AsRef<std::path::Path>) {
×
57
    log::warn!(
×
58
        "The .ef file was not found so the simplification will proceed sequentially. This may be slow. To speed it up, you can use `webgraph build ef {}` which would allow us create batches in parallel",
×
UNCOV
59
        basepath.as_ref().display()
×
60
    );
61
}
62

63
pub fn simplify<E: Endianness>(_global_args: GlobalArgs, args: CliArgs) -> Result<()>
3✔
64
where
65
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
66
    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
67
{
68
    // TODO!: speed it up by using random access graph if possible
69
    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
3✔
70

71
    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
6✔
72

73
    let dir = Builder::new().prefix("transform_simplify_").tempdir()?;
6✔
74

UNCOV
75
    match (args.permutation, args.transposed) {
×
76
        // load the transposed graph and use it to directly compress the graph
77
        // without doing any sorting
UNCOV
78
        (None, Some(t_path)) => {
×
UNCOV
79
            log::info!("Transposed graph provided, using it to simplify the graph");
×
80

81
            let has_ef_graph =
×
UNCOV
82
                std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file());
×
UNCOV
83
            let has_ef_t_graph =
×
84
                std::fs::metadata(t_path.with_extension("ef")).is_ok_and(|x| x.is_file());
×
85

UNCOV
86
            match (has_ef_graph, has_ef_t_graph) {
×
87
                (true, true) => {
×
88
                    log::info!("Both .ef files found, using simplify split");
×
89

90
                    let graph =
×
UNCOV
91
                        webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
92
                            .endianness::<E>()
93
                            .load()?;
94
                    let num_nodes = graph.num_nodes();
×
UNCOV
95
                    let graph_t =
×
96
                        webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&t_path)
×
97
                            .endianness::<E>()
98
                            .load()?;
99

100
                    if graph_t.num_nodes() != num_nodes {
×
101
                        anyhow::bail!(
×
102
                            "The number of nodes in the graph and its transpose do not match! {} != {}",
×
UNCOV
103
                            num_nodes,
×
UNCOV
104
                            graph_t.num_nodes()
×
105
                        );
106
                    }
107

108
                    let sorted = NoSelfLoopsGraph(UnionGraph(graph, graph_t));
×
109

110
                    BvComp::parallel_endianness(
UNCOV
111
                        &args.dst,
×
UNCOV
112
                        &sorted,
×
UNCOV
113
                        num_nodes,
×
114
                        args.ca.into(),
×
UNCOV
115
                        &thread_pool,
×
UNCOV
116
                        dir,
×
117
                        &target_endianness,
×
118
                    )?;
119

120
                    return Ok(());
×
121
                }
122
                (true, false) => {
×
123
                    no_ef_warn(&args.src);
×
124
                }
UNCOV
125
                (false, true) => {
×
126
                    no_ef_warn(&t_path);
×
127
                }
128
                (false, false) => {
×
129
                    no_ef_warn(&args.src);
×
UNCOV
130
                    no_ef_warn(&t_path);
×
131
                }
132
            }
133

134
            let seq_graph =
×
135
                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
136
                    .endianness::<E>()
137
                    .load()?;
UNCOV
138
            let num_nodes = seq_graph.num_nodes();
×
UNCOV
139
            let seq_graph_t =
×
140
                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&t_path)
×
141
                    .endianness::<E>()
142
                    .load()?;
143

144
            if seq_graph_t.num_nodes() != num_nodes {
×
145
                anyhow::bail!(
×
146
                    "The number of nodes in the graph and its transpose do not match! {} != {}",
×
UNCOV
147
                    num_nodes,
×
UNCOV
148
                    seq_graph_t.num_nodes()
×
149
                );
150
            }
151

152
            let sorted = NoSelfLoopsGraph(UnionGraph(seq_graph, seq_graph_t));
×
153

154
            BvComp::parallel_endianness(
UNCOV
155
                &args.dst,
×
UNCOV
156
                &sorted,
×
UNCOV
157
                num_nodes,
×
158
                args.ca.into(),
×
UNCOV
159
                &thread_pool,
×
UNCOV
160
                dir,
×
161
                &target_endianness,
×
162
            )?;
163
        }
164
        // apply the permutation, don't care if the transposed graph is already computed
165
        // as we cannot really exploit it
166
        (Some(perm_path), None | Some(_)) => {
3✔
167
            log::info!("Permutation provided, applying it to the graph");
6✔
168

169
            let perm = JavaPermutation::mmap(perm_path, MmapFlags::RANDOM_ACCESS)?;
6✔
170

171
            // if the .ef file exists, we can use the simplify split
172
            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
3✔
173
                log::info!(".ef file found, using simplify split");
6✔
174
                let graph =
3✔
175
                    webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
3✔
176
                        .endianness::<E>()
177
                        .load()?;
178

179
                let perm_graph = PermutedGraph {
UNCOV
180
                    graph: &graph,
×
UNCOV
181
                    perm: &perm,
×
182
                };
183

184
                let sorted = webgraph::transform::simplify_split(
UNCOV
185
                    &perm_graph,
×
186
                    args.batch_size.batch_size,
×
187
                    &thread_pool,
×
188
                )?;
189

190
                BvComp::parallel_endianness(
191
                    &args.dst,
×
192
                    &sorted,
×
193
                    graph.num_nodes(),
×
UNCOV
194
                    args.ca.into(),
×
UNCOV
195
                    &thread_pool,
×
UNCOV
196
                    dir,
×
197
                    &target_endianness,
×
198
                )?;
199

200
                return Ok(());
3✔
201
            }
202

203
            no_ef_warn(&args.src);
×
204

UNCOV
205
            let seq_graph =
×
UNCOV
206
                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
207
                    .endianness::<E>()
208
                    .load()?;
209

210
            let perm_graph = PermutedGraph {
211
                graph: &seq_graph,
×
212
                perm: &perm,
×
213
            };
214

215
            // simplify the graph
UNCOV
216
            let sorted =
×
217
                webgraph::transform::simplify(&perm_graph, args.batch_size.batch_size).unwrap();
×
218

219
            BvComp::parallel_endianness(
UNCOV
220
                &args.dst,
×
UNCOV
221
                &sorted,
×
222
                sorted.num_nodes(),
×
223
                args.ca.into(),
×
UNCOV
224
                &thread_pool,
×
UNCOV
225
                dir,
×
226
                &target_endianness,
×
227
            )?;
228
        }
229
        // just compute the transpose on the fly
230
        (None, None) => {
×
231
            log::info!(
×
232
                "No permutation or transposed graph provided, computing the transpose on the fly"
×
233
            );
234
            // if the .ef file exists, we can use the simplify split
UNCOV
235
            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
×
236
                log::info!(".ef file found, using simplify split");
×
237

238
                let graph =
×
UNCOV
239
                    webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
240
                        .endianness::<E>()
241
                        .load()?;
242

243
                let sorted = webgraph::transform::simplify_split(
244
                    &graph,
×
245
                    args.batch_size.batch_size,
×
UNCOV
246
                    &thread_pool,
×
247
                )?;
248

249
                BvComp::parallel_endianness(
250
                    &args.dst,
×
251
                    &sorted,
×
252
                    graph.num_nodes(),
×
UNCOV
253
                    args.ca.into(),
×
UNCOV
254
                    &thread_pool,
×
UNCOV
255
                    dir,
×
256
                    &target_endianness,
×
257
                )?;
258

259
                return Ok(());
×
260
            }
261

262
            no_ef_warn(&args.src);
×
263

UNCOV
264
            let seq_graph =
×
265
                webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
266
                    .endianness::<E>()
267
                    .load()?;
268

UNCOV
269
            let num_nodes = seq_graph.num_nodes();
×
270
            // transpose the graph
271
            let sorted =
×
UNCOV
272
                webgraph::transform::simplify_sorted(seq_graph, args.batch_size.batch_size)
×
273
                    .unwrap();
274

275
            BvComp::parallel_endianness(
UNCOV
276
                &args.dst,
×
277
                &sorted,
×
278
                num_nodes,
×
UNCOV
279
                args.ca.into(),
×
UNCOV
280
                &thread_pool,
×
UNCOV
281
                dir,
×
282
                &target_endianness,
×
283
            )?;
284
        }
285
    }
286

287
    Ok(())
×
288
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc