• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 14134523230

28 Mar 2025 05:27PM UTC coverage: 49.303% (-0.4%) from 49.654%
14134523230

push

github

zommiommy
fixed cli endpoint names so they are as they were before the derive refactor

1 of 2 new or added lines in 2 files covered. (50.0%)

612 existing lines in 31 files now uncovered.

2335 of 4736 relevant lines covered (49.3%)

20371538.12 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

12.4
/src/cli/transform/simplify.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4
 *
5
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6
 */
7

8
use crate::cli::*;
9
use crate::graphs::union_graph::UnionGraph;
10
use crate::prelude::*;
11
use anyhow::Result;
12
use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
13
use mmap_rs::MmapFlags;
14
use std::path::PathBuf;
15
use tempfile::Builder;
16

17
#[derive(Parser, Debug)]
18
#[command(name = "simplify", about = "Makes a BvGraph simple (undirected and loopless) by adding missing arcs and removing loops, optionally applying a permutation.", long_about = None)]
19
pub struct CliArgs {
20
    /// The basename of the graph.
21
    pub src: PathBuf,
22
    /// The basename of the simplified graph.
23
    pub dst: PathBuf,
24

25
    #[arg(long)]
26
    /// The basename of a pre-computed transposed version of the source graph, which
27
    /// will be use to speed up the simplification.
28
    pub transposed: Option<PathBuf>,
29

30
    #[clap(flatten)]
31
    pub num_threads: NumThreadsArg,
32

33
    #[clap(flatten)]
34
    pub batch_size: BatchSizeArg,
35

36
    #[clap(flatten)]
37
    pub ca: CompressArgs,
38

39
    #[arg(long)]
40
    /// The path to an optional permutation in binary big-endian format to apply to the graph.
41
    pub permutation: Option<PathBuf>,
42
}
43

44
pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
2✔
45
    create_parent_dir(&args.dst)?;
2✔
46

47
    match get_endianness(&args.src)?.as_str() {
4✔
48
        #[cfg(any(
49
            feature = "be_bins",
50
            not(any(feature = "be_bins", feature = "le_bins"))
51
        ))]
52
        BE::NAME => simplify::<BE>(global_args, args),
4✔
53
        #[cfg(any(
54
            feature = "le_bins",
55
            not(any(feature = "be_bins", feature = "le_bins"))
56
        ))]
UNCOV
57
        LE::NAME => simplify::<LE>(global_args, args),
×
UNCOV
58
        e => panic!("Unknown endianness: {}", e),
×
59
    }
60
}
61

UNCOV
62
fn no_ef_warn(basepath: impl AsRef<std::path::Path>) {
×
UNCOV
63
    log::warn!("The .ef file was not found so the simplification will proceed sequentially. This may be slow. To speed it up, you can use `webgraph build ef {}` which would allow us create batches in parallel", basepath.as_ref().display());
×
64
}
65

66
pub fn simplify<E: Endianness>(_global_args: GlobalArgs, args: CliArgs) -> Result<()>
2✔
67
where
68
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
69
    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
70
{
71
    // TODO!: speed it up by using random access graph if possible
72
    let thread_pool = crate::cli::get_thread_pool(args.num_threads.num_threads);
2✔
73

74
    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
6✔
75

76
    let dir = Builder::new().prefix("transform_simplify_").tempdir()?;
4✔
77

UNCOV
78
    match (args.permutation, args.transposed) {
×
79
        // load the transposed graph and use it to directly compress the graph
80
        // without doing any sorting
UNCOV
81
        (None, Some(t_path)) => {
×
UNCOV
82
            log::info!("Transposed graph provided, using it to simplify the graph");
×
83

UNCOV
84
            let has_ef_graph =
×
UNCOV
85
                std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file());
×
UNCOV
86
            let has_ef_t_graph =
×
87
                std::fs::metadata(t_path.with_extension("ef")).is_ok_and(|x| x.is_file());
×
88

UNCOV
89
            match (has_ef_graph, has_ef_t_graph) {
×
90
                (true, true) => {
×
91
                    log::info!("Both .ef files found, using simplify split");
×
92

93
                    let graph =
×
94
                        crate::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
95
                            .endianness::<E>()
96
                            .load()?;
UNCOV
97
                    let num_nodes = graph.num_nodes();
×
98
                    let graph_t =
×
99
                        crate::graphs::bvgraph::random_access::BvGraph::with_basename(&t_path)
×
100
                            .endianness::<E>()
101
                            .load()?;
102

103
                    if graph_t.num_nodes() != num_nodes {
×
UNCOV
104
                        anyhow::bail!("The number of nodes in the graph and its transpose do not match! {} != {}", num_nodes, graph_t.num_nodes());
×
105
                    }
106

107
                    let sorted = NoSelfLoopsGraph(UnionGraph(graph, graph_t));
×
108

109
                    BvComp::parallel_endianness(
UNCOV
110
                        &args.dst,
×
UNCOV
111
                        &sorted,
×
112
                        num_nodes,
×
113
                        args.ca.into(),
×
UNCOV
114
                        &thread_pool,
×
UNCOV
115
                        dir,
×
116
                        &target_endianness,
×
117
                    )?;
118

119
                    return Ok(());
×
120
                }
121
                (true, false) => {
×
122
                    no_ef_warn(&args.src);
×
123
                }
124
                (false, true) => {
×
125
                    no_ef_warn(&t_path);
×
126
                }
UNCOV
127
                (false, false) => {
×
128
                    no_ef_warn(&args.src);
×
UNCOV
129
                    no_ef_warn(&t_path);
×
130
                }
131
            }
132

133
            let seq_graph =
×
134
                crate::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
135
                    .endianness::<E>()
136
                    .load()?;
137
            let num_nodes = seq_graph.num_nodes();
×
138
            let seq_graph_t =
×
UNCOV
139
                crate::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&t_path)
×
140
                    .endianness::<E>()
141
                    .load()?;
142

143
            if seq_graph_t.num_nodes() != num_nodes {
×
UNCOV
144
                anyhow::bail!(
×
UNCOV
145
                    "The number of nodes in the graph and its transpose do not match! {} != {}",
×
146
                    num_nodes,
×
147
                    seq_graph_t.num_nodes()
×
148
                );
149
            }
150

UNCOV
151
            let sorted = NoSelfLoopsGraph(UnionGraph(seq_graph, seq_graph_t));
×
152

153
            BvComp::parallel_endianness(
154
                &args.dst,
×
155
                &sorted,
×
156
                num_nodes,
×
UNCOV
157
                args.ca.into(),
×
UNCOV
158
                &thread_pool,
×
UNCOV
159
                dir,
×
160
                &target_endianness,
×
161
            )?;
162
        }
163
        // apply the permutation, don't care if the transposed graph is already computed
164
        // as we cannot really exploit it
165
        (Some(perm_path), None | Some(_)) => {
2✔
166
            log::info!("Permutation provided, applying it to the graph");
4✔
167

168
            let perm = JavaPermutation::mmap(perm_path, MmapFlags::RANDOM_ACCESS)?;
4✔
169

170
            // if the .ef file exists, we can use the simplify split
171
            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
2✔
172
                log::info!(".ef file found, using simplify split");
4✔
173
                let graph =
2✔
174
                    crate::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
2✔
175
                        .endianness::<E>()
176
                        .load()?;
177

178
                let perm_graph = PermutedGraph {
UNCOV
179
                    graph: &graph,
×
UNCOV
180
                    perm: &perm,
×
181
                };
182

183
                let sorted = crate::transform::simplify_split(
UNCOV
184
                    &perm_graph,
×
UNCOV
185
                    args.batch_size.batch_size,
×
UNCOV
186
                    &thread_pool,
×
187
                )?;
188

189
                BvComp::parallel_endianness(
UNCOV
190
                    &args.dst,
×
UNCOV
191
                    &sorted,
×
UNCOV
192
                    graph.num_nodes(),
×
193
                    args.ca.into(),
×
194
                    &thread_pool,
×
195
                    dir,
×
UNCOV
196
                    &target_endianness,
×
197
                )?;
198

199
                return Ok(());
2✔
200
            }
201

202
            no_ef_warn(&args.src);
×
203

204
            let seq_graph =
×
205
                crate::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
206
                    .endianness::<E>()
207
                    .load()?;
208

209
            let perm_graph = PermutedGraph {
UNCOV
210
                graph: &seq_graph,
×
211
                perm: &perm,
×
212
            };
213

214
            // simplify the graph
UNCOV
215
            let sorted =
×
UNCOV
216
                crate::transform::simplify(&perm_graph, args.batch_size.batch_size).unwrap();
×
217

218
            BvComp::parallel_endianness(
219
                &args.dst,
×
220
                &sorted,
×
UNCOV
221
                sorted.num_nodes(),
×
UNCOV
222
                args.ca.into(),
×
UNCOV
223
                &thread_pool,
×
224
                dir,
×
225
                &target_endianness,
×
226
            )?;
227
        }
228
        // just compute the transpose on the fly
229
        (None, None) => {
×
230
            log::info!(
×
231
                "No permutation or transposed graph provided, computing the transpose on the fly"
×
232
            );
233
            // if the .ef file exists, we can use the simplify split
234
            if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
×
UNCOV
235
                log::info!(".ef file found, using simplify split");
×
236

UNCOV
237
                let graph =
×
238
                    crate::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
239
                        .endianness::<E>()
240
                        .load()?;
241

242
                let sorted = crate::transform::simplify_split(
243
                    &graph,
×
244
                    args.batch_size.batch_size,
×
UNCOV
245
                    &thread_pool,
×
246
                )?;
247

248
                BvComp::parallel_endianness(
UNCOV
249
                    &args.dst,
×
UNCOV
250
                    &sorted,
×
UNCOV
251
                    graph.num_nodes(),
×
252
                    args.ca.into(),
×
253
                    &thread_pool,
×
254
                    dir,
×
UNCOV
255
                    &target_endianness,
×
256
                )?;
257

258
                return Ok(());
×
259
            }
260

261
            no_ef_warn(&args.src);
×
262

263
            let seq_graph =
×
264
                crate::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
265
                    .endianness::<E>()
266
                    .load()?;
267

UNCOV
268
            let num_nodes = seq_graph.num_nodes();
×
269
            // transpose the graph
270
            let sorted =
×
UNCOV
271
                crate::transform::simplify_sorted(seq_graph, args.batch_size.batch_size).unwrap();
×
272

273
            BvComp::parallel_endianness(
UNCOV
274
                &args.dst,
×
UNCOV
275
                &sorted,
×
UNCOV
276
                num_nodes,
×
277
                args.ca.into(),
×
UNCOV
278
                &thread_pool,
×
279
                dir,
×
280
                &target_endianness,
×
281
            )?;
282
        }
283
    }
284

285
    Ok(())
×
286
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc