• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 25054814280

28 Apr 2026 01:11PM UTC coverage: 69.158% (-0.04%) from 69.201%
25054814280

push

github

vigna
Further cleanup of method names; sorted symmetrization now uses IntoParLenders, too

24 of 59 new or added lines in 12 files covered. (40.68%)

761 existing lines in 28 files now uncovered.

7505 of 10852 relevant lines covered (69.16%)

50040578.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/cli/src/transform/map.rs
1
/*
2
 * SPDX-FileCopyrightText: 2026 Sebastiano Vigna
3
 *
4
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5
 */
6

7
use crate::*;
8
use anyhow::Result;
9
use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
10
use dsi_progress_logger::prelude::*;
11
use std::path::PathBuf;
12
use tempfile::Builder;
13
use value_traits::slices::SliceByValue;
14
use webgraph::prelude::*;
15

16
#[derive(Parser, Debug)]
17
#[command(name = "map", about = "Maps a graph in the BV format through an arbitrary function on nodes, deduplicating arcs.", long_about = None, next_line_help = true)]
18
pub struct CliArgs {
19
    /// The basename of the source graph.​
20
    pub src: PathBuf,
21
    /// The basename of the mapped graph.​
22
    pub dst: PathBuf,
23

24
    /// The path to the map to apply to the graph.​
25
    pub map: PathBuf,
26

27
    #[arg(long, value_enum, default_value_t)]
28
    /// The format of the map file.​
29
    pub fmt: IntSliceFormat,
30

31
    #[arg(long)]
32
    /// The number of nodes of the resulting graph; if not specified, it is
33
    /// computed as one plus the maximum value in the map; if specified, it must
34
    /// be strictly greater than the maximum value in the map.​
35
    pub num_nodes: Option<usize>,
36

37
    #[arg(short, long)]
38
    /// Uses the sequential algorithm (does not need offsets).​
39
    pub sequential: bool,
40

41
    #[clap(flatten)]
42
    pub num_threads: NumThreadsArg,
43

44
    #[clap(flatten)]
45
    pub memory_usage: MemoryUsageArg,
46

47
    #[clap(flatten)]
48
    pub ca: CompressArgs,
49

50
    #[arg(long, conflicts_with = "sequential")]
51
    /// Uses the degree cumulative function to balance work by arcs rather than
52
    /// by nodes; the DCF must have been pre-built with `webgraph build dcf`.​
53
    pub dcf: bool,
54

55
    #[clap(flatten)]
56
    pub log_interval: LogIntervalArg,
57
}
58

UNCOV
59
pub fn main(args: CliArgs) -> Result<()> {
×
60
    create_parent_dir(&args.dst)?;
×
61

62
    match get_endianness(&args.src)?.as_str() {
×
63
        #[cfg(feature = "be_bins")]
64
        BE::NAME => {
×
UNCOV
65
            if args.sequential {
×
NEW
66
                map_seq::<BE>(args)
×
67
            } else {
NEW
68
                map_par::<BE>(args)
×
69
            }
70
        }
71
        #[cfg(feature = "le_bins")]
72
        LE::NAME => {
×
UNCOV
73
            if args.sequential {
×
NEW
74
                map_seq::<LE>(args)
×
75
            } else {
NEW
76
                map_par::<LE>(args)
×
77
            }
78
        }
UNCOV
79
        e => panic!("Unknown endianness: {}", e),
×
80
    }
81
}
82

83
/// Computes the number of nodes of the mapped graph.
84
fn mapped_num_nodes<P: SliceByValue<Value = usize>>(
×
85
    node_map: &P,
86
    cli_num_nodes: Option<usize>,
87
) -> Result<usize> {
88
    let max_mapped = (0..node_map.len())
×
89
        .map(|i| node_map.index_value(i))
×
90
        .max()
91
        .unwrap_or(0);
92
    match cli_num_nodes {
×
93
        Some(n) => {
×
94
            anyhow::ensure!(
×
UNCOV
95
                n > max_mapped,
×
96
                "The specified number of nodes ({}) is not strictly greater than the maximum mapped node ({})",
×
UNCOV
97
                n,
×
98
                max_mapped,
×
99
            );
UNCOV
100
            Ok(n)
×
101
        }
102
        None => Ok(max_mapped + 1),
×
103
    }
104
}
105

106
/// Parallel version of [`map_seq`].
NEW
107
pub fn map_par<E: Endianness>(args: CliArgs) -> Result<()>
×
108
where
109
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
110
    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
111
{
112
    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
×
UNCOV
113
    let use_dcf = args.dcf;
×
114
    let src = args.src.clone();
×
115
    let log_interval = args.log_interval.log_interval;
×
116

117
    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
×
118

119
    let dir = Builder::new().prefix("transform_map_").tempdir()?;
×
UNCOV
120
    let chunk_size = args.ca.chunk_size;
×
121
    let bvgraphz = args.ca.bvgraphz;
×
122
    let mut builder = BvCompConf::new(&args.dst)
×
UNCOV
123
        .comp_flags(args.ca.into())
×
UNCOV
124
        .tmp_dir(&dir);
×
125

126
    if bvgraphz {
×
127
        builder = builder.chunk_size(chunk_size);
×
128
    }
129

130
    let loaded = args.fmt.load(&args.map)?;
×
UNCOV
131
    let cli_num_nodes = args.num_nodes;
×
132
    let memory_usage = args.memory_usage.memory_usage;
×
133

134
    dispatch_int_slice!(loaded, |node_map| {
×
UNCOV
135
        let num_nodes = mapped_num_nodes(&node_map, cli_num_nodes)?;
×
136

137
        let graph = webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
138
            .endianness::<E>()
×
139
            .load()?;
×
140

141
        let graph_num_nodes = graph.num_nodes();
×
142
        let graph_num_arcs_hint = graph.num_arcs_hint();
×
143
        let cp = crate::cutpoints(&src, graph_num_nodes, graph_num_arcs_hint, use_dcf)?;
×
144
        let par_graph = webgraph::graphs::par_graphs::ParGraph::with_cutpoints(graph, cp);
×
145

146
        thread_pool.install(|| {
×
147
            log::info!("Mapping graph with memory usage {}", memory_usage);
×
148
            let mut pl = progress_logger![display_memory = true, log_interval = log_interval];
×
UNCOV
149
            let start = std::time::Instant::now();
×
NEW
150
            let sorted = webgraph::transform::map_par(
×
151
                &par_graph,
×
UNCOV
152
                &node_map,
×
UNCOV
153
                num_nodes,
×
154
                memory_usage,
×
UNCOV
155
                &mut pl,
×
156
            )?;
UNCOV
157
            log::info!(
×
158
                "Mapped the graph. It took {:.3} seconds",
×
UNCOV
159
                start.elapsed().as_secs_f64()
×
160
            );
161

162
            let mut builder = builder.progress_logger(&mut pl);
×
UNCOV
163
            par_comp!(builder, sorted, target_endianness)
×
164
        })?;
165

166
        Ok(())
×
167
    })
168
}
169

170
/// Sequential version of [`map_par`].
NEW
171
pub fn map_seq<E: Endianness>(args: CliArgs) -> Result<()>
×
172
where
173
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
174
{
175
    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
×
UNCOV
176
    let log_interval = args.log_interval.log_interval;
×
177

178
    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
×
179

180
    let dir = Builder::new().prefix("transform_map_").tempdir()?;
×
UNCOV
181
    let chunk_size = args.ca.chunk_size;
×
182
    let bvgraphz = args.ca.bvgraphz;
×
183
    let mut builder = BvCompConf::new(&args.dst)
×
UNCOV
184
        .comp_flags(args.ca.into())
×
185
        .tmp_dir(&dir);
×
186

187
    if bvgraphz {
×
UNCOV
188
        builder = builder.chunk_size(chunk_size);
×
189
    }
190

191
    let loaded = args.fmt.load(&args.map)?;
×
192
    let cli_num_nodes = args.num_nodes;
×
193
    let memory_usage = args.memory_usage.memory_usage;
×
194

UNCOV
195
    dispatch_int_slice!(loaded, |node_map| {
×
UNCOV
196
        let num_nodes = mapped_num_nodes(&node_map, cli_num_nodes)?;
×
197

UNCOV
198
        let seq_graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
199
            .endianness::<E>()
×
UNCOV
200
            .load()?;
×
201

UNCOV
202
        log::info!("Mapping graph with memory usage {}", memory_usage);
×
UNCOV
203
        let mut pl = progress_logger![display_memory = true, log_interval = log_interval];
×
UNCOV
204
        let start = std::time::Instant::now();
×
UNCOV
205
        let sorted =
×
UNCOV
206
            webgraph::transform::map(&seq_graph, &node_map, num_nodes, memory_usage, &mut pl)?;
×
UNCOV
207
        log::info!(
×
UNCOV
208
            "Mapped the graph. It took {:.3} seconds",
×
UNCOV
209
            start.elapsed().as_secs_f64()
×
210
        );
211

UNCOV
212
        let mut builder = builder.progress_logger(&mut pl);
×
UNCOV
213
        thread_pool.install(|| par_comp!(builder, sorted, target_endianness))?;
×
214

UNCOV
215
        Ok(())
×
216
    })
217
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc