• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 24635666654

19 Apr 2026 06:07PM UTC coverage: 66.542% (-0.4%) from 66.986%
24635666654

push

github

vigna
Refined methods

7 of 25 new or added lines in 5 files covered. (28.0%)

670 existing lines in 36 files now uncovered.

6752 of 10147 relevant lines covered (66.54%)

46688126.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/cli/src/transform/map.rs
1
/*
2
 * SPDX-FileCopyrightText: 2026 Sebastiano Vigna
3
 *
4
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5
 */
6

7
use crate::*;
8
use anyhow::Result;
9
use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
10
use std::path::PathBuf;
11
use tempfile::Builder;
12
use value_traits::slices::SliceByValue;
13
use webgraph::prelude::*;
14

15
#[derive(Parser, Debug)]
16
#[command(name = "map", about = "Maps a graph in the BV format through an arbitrary function on nodes, deduplicating arcs.", long_about = None, next_line_help = true)]
17
pub struct CliArgs {
18
    /// The basename of the source graph.​
19
    pub src: PathBuf,
20
    /// The basename of the mapped graph.​
21
    pub dst: PathBuf,
22

23
    /// The path to the map to apply to the graph.​
24
    pub map: PathBuf,
25

26
    #[arg(long, value_enum, default_value_t)]
27
    /// The format of the map file.​
28
    pub fmt: IntSliceFormat,
29

30
    #[arg(long)]
31
    /// The number of nodes of the resulting graph; if not specified, it is
32
    /// computed as one plus the maximum value in the map; if specified, it must
33
    /// be strictly greater than the maximum value in the map.​
34
    pub num_nodes: Option<usize>,
35

36
    #[arg(short, long)]
37
    /// Uses the sequential algorithm (does not need offsets).​
38
    pub sequential: bool,
39

40
    #[clap(flatten)]
41
    pub num_threads: NumThreadsArg,
42

43
    #[clap(flatten)]
44
    pub memory_usage: MemoryUsageArg,
45

46
    #[clap(flatten)]
47
    pub ca: CompressArgs,
48

49
    #[arg(long, conflicts_with = "sequential")]
50
    /// Uses the degree cumulative function to balance work by arcs rather than
51
    /// by nodes; the DCF must have been pre-built with `webgraph build dcf`.​
52
    pub dcf: bool,
53
}
54

55
pub fn main(args: CliArgs) -> Result<()> {
×
56
    create_parent_dir(&args.dst)?;
×
57

58
    match get_endianness(&args.src)?.as_str() {
×
59
        #[cfg(feature = "be_bins")]
60
        BE::NAME => {
×
61
            if args.sequential {
×
62
                seq_map::<BE>(args)
×
63
            } else {
64
                par_map::<BE>(args)
×
65
            }
66
        }
67
        #[cfg(feature = "le_bins")]
68
        LE::NAME => {
×
69
            if args.sequential {
×
70
                seq_map::<LE>(args)
×
71
            } else {
72
                par_map::<LE>(args)
×
73
            }
74
        }
75
        e => panic!("Unknown endianness: {}", e),
×
76
    }
77
}
78

79
/// Computes the number of nodes of the mapped graph.
80
fn mapped_num_nodes<P: SliceByValue<Value = usize>>(
×
81
    node_map: &P,
82
    cli_num_nodes: Option<usize>,
83
) -> Result<usize> {
84
    let max_mapped = (0..node_map.len())
×
85
        .map(|i| node_map.index_value(i))
×
86
        .max()
87
        .unwrap_or(0);
88
    match cli_num_nodes {
×
89
        Some(n) => {
×
90
            anyhow::ensure!(
×
91
                n > max_mapped,
×
92
                "The specified number of nodes ({}) is not strictly greater than the maximum mapped node ({})",
×
93
                n,
×
94
                max_mapped,
×
95
            );
96
            Ok(n)
×
97
        }
98
        None => Ok(max_mapped + 1),
×
99
    }
100
}
101

102
pub fn par_map<E: Endianness>(args: CliArgs) -> Result<()>
×
103
where
104
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
105
    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
106
{
107
    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
×
108
    let use_dcf = args.dcf;
×
109
    let src = args.src.clone();
×
110

111
    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
×
112

113
    let dir = Builder::new().prefix("transform_map_").tempdir()?;
×
114
    let chunk_size = args.ca.chunk_size;
×
115
    let bvgraphz = args.ca.bvgraphz;
×
116
    let mut builder = BvCompConfig::new(&args.dst)
×
117
        .with_comp_flags(args.ca.into())
×
118
        .with_tmp_dir(&dir);
×
119

120
    if bvgraphz {
×
121
        builder = builder.with_chunk_size(chunk_size);
×
122
    }
123

124
    let loaded = args.fmt.load(&args.map)?;
×
125
    let cli_num_nodes = args.num_nodes;
×
126
    let memory_usage = args.memory_usage.memory_usage;
×
127

128
    dispatch_int_slice!(loaded, |node_map| {
×
129
        let num_nodes = mapped_num_nodes(&node_map, cli_num_nodes)?;
×
130

131
        let graph = webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
×
132
            .endianness::<E>()
×
133
            .load()?;
×
134

135
        let cp = crate::cutpoints(&src, graph.num_nodes(), graph.num_arcs_hint(), use_dcf)?;
×
136

137
        thread_pool.install(|| {
×
138
            log::info!("Mapping graph with memory usage {}", memory_usage);
×
139
            let start = std::time::Instant::now();
×
140
            let sorted = webgraph::transform::map_split(
×
141
                &graph,
×
142
                &node_map,
×
143
                num_nodes,
×
144
                memory_usage,
×
145
                Some(cp),
×
146
            )?;
147
            log::info!(
×
148
                "Mapped the graph. It took {:.3} seconds",
×
149
                start.elapsed().as_secs_f64()
×
150
            );
151

152
            par_comp!(builder, sorted, target_endianness)
×
153
        })?;
154

UNCOV
155
        Ok(())
×
156
    })
157
}
158

UNCOV
159
pub fn seq_map<E: Endianness>(args: CliArgs) -> Result<()>
×
160
where
161
    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
162
{
UNCOV
163
    let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
×
164

UNCOV
165
    let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
×
166

UNCOV
167
    let dir = Builder::new().prefix("transform_map_").tempdir()?;
×
168
    let chunk_size = args.ca.chunk_size;
×
169
    let bvgraphz = args.ca.bvgraphz;
×
170
    let mut builder = BvCompConfig::new(&args.dst)
×
171
        .with_comp_flags(args.ca.into())
×
172
        .with_tmp_dir(&dir);
×
173

UNCOV
174
    if bvgraphz {
×
175
        builder = builder.with_chunk_size(chunk_size);
×
176
    }
177

UNCOV
178
    let loaded = args.fmt.load(&args.map)?;
×
179
    let cli_num_nodes = args.num_nodes;
×
180
    let memory_usage = args.memory_usage.memory_usage;
×
181

UNCOV
182
    dispatch_int_slice!(loaded, |node_map| {
×
183
        let num_nodes = mapped_num_nodes(&node_map, cli_num_nodes)?;
×
184

UNCOV
185
        let seq_graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
×
186
            .endianness::<E>()
×
187
            .load()?;
×
188

UNCOV
189
        log::info!("Mapping graph with memory usage {}", memory_usage);
×
190
        let start = std::time::Instant::now();
×
191
        let sorted = webgraph::transform::map(&seq_graph, &node_map, num_nodes, memory_usage)?;
×
192
        log::info!(
×
193
            "Mapped the graph. It took {:.3} seconds",
×
194
            start.elapsed().as_secs_f64()
×
195
        );
196

UNCOV
197
        thread_pool.install(|| par_comp!(builder, &sorted, target_endianness))?;
×
198

199
        Ok(())
×
200
    })
201
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc