• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

vigna / webgraph-rs / 18279564945

06 Oct 2025 11:36AM UTC coverage: 48.839% (-0.2%) from 49.006%
18279564945

push

github

vigna
Moved SortPairs to MemoryUsage

12 of 23 new or added lines in 6 files covered. (52.17%)

14 existing lines in 3 files now uncovered.

3890 of 7965 relevant lines covered (48.84%)

23444471.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

50.85
/webgraph/src/transform/simplify.rs
1
/*
2
 * SPDX-FileCopyrightText: 2023 Inria
3
 *
4
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5
 */
6

7
use crate::graphs::{
8
    arc_list_graph, no_selfloops_graph::NoSelfLoopsGraph, union_graph::UnionGraph,
9
};
10
use crate::labels::Left;
11
use crate::traits::{LenderIntoIter, SequentialGraph, SortedIterator, SortedLender, SplitLabeling};
12
use crate::utils::sort_pairs::{BatchIterator, KMergeIters, SortPairs};
13
use crate::utils::MemoryUsage;
14
use anyhow::{Context, Result};
15
use dsi_progress_logger::prelude::*;
16
use itertools::Itertools;
17
use lender::*;
18
use rayon::ThreadPool;
19
use tempfile::Builder;
20

21
use super::transpose;
22

23
/// Returns a simplified (i.e., undirected and loopless) version of the provided
24
/// sorted (both on nodes and successors) graph as a [sequential
25
/// graph](crate::traits::SequentialGraph).
26
///
27
/// This method exploits the fact that the input graph is already sorted,
28
/// sorting half the number of arcs of
29
/// [`simplify`](crate::transform::simplify::simplify).
30
#[allow(clippy::type_complexity)]
31
pub fn simplify_sorted<G: SequentialGraph>(
1✔
32
    graph: G,
33
    batch_size: usize,
34
) -> Result<
35
    NoSelfLoopsGraph<
36
        UnionGraph<G, Left<arc_list_graph::ArcListGraph<KMergeIters<BatchIterator<()>, ()>>>>,
37
    >,
38
>
39
where
40
    for<'a> G::Lender<'a>: SortedLender,
41
    for<'a, 'b> LenderIntoIter<'a, G::Lender<'b>>: SortedIterator,
42
{
43
    let transpose = transpose(&graph, batch_size).context("Could not transpose the graph")?;
5✔
44
    Ok(NoSelfLoopsGraph(UnionGraph(graph, transpose)))
×
45
}
46

47
/// Returns a simplified (i.e., undirected and loopless) version of the provided
48
/// graph as a [sequential graph](crate::traits::SequentialGraph).
49
///
50
/// Note that if the graph is sorted (both on nodes and successors), it is
51
/// recommended to use [`simplify_sorted`](crate::transform::simplify::simplify_sorted).
52
///
53
/// For the meaning of the additional parameter, see
54
/// [`SortPairs`](crate::prelude::sort_pairs::SortPairs).
55
#[allow(clippy::type_complexity)]
56
pub fn simplify(
×
57
    graph: &impl SequentialGraph,
58
    batch_size: usize,
59
) -> Result<
60
    Left<
61
        arc_list_graph::ArcListGraph<
62
            impl Iterator<Item = (usize, usize, ())> + Clone + Send + Sync + 'static,
63
        >,
64
    >,
65
> {
66
    let dir = Builder::new().prefix("simplify_").tempdir()?;
×
NEW
67
    let mut sorted = SortPairs::new(MemoryUsage::BatchSize(batch_size), dir.path())?;
×
68

69
    let mut pl = ProgressLogger::default();
×
70
    pl.item_name("node")
×
71
        .expected_updates(Some(graph.num_nodes()));
×
72
    pl.start("Creating batches...");
×
73
    // create batches of sorted edges
74
    let mut iter = graph.iter();
×
75
    while let Some((src, succ)) = iter.next() {
×
76
        for dst in succ {
×
77
            if src != dst {
×
78
                sorted.push(src, dst)?;
×
79
                sorted.push(dst, src)?;
×
80
            }
81
        }
82
        pl.light_update();
×
83
    }
84
    // merge the batches
85
    let map: fn((usize, usize, ())) -> (usize, usize) = |(src, dst, _)| (src, dst);
×
86
    let filter: fn(&(usize, usize)) -> bool = |(src, dst)| src != dst;
×
87
    let iter = Itertools::dedup(sorted.iter()?.map(map).filter(filter));
×
88
    let sorted = arc_list_graph::ArcListGraph::new(graph.num_nodes(), iter);
×
89
    pl.done();
×
90

91
    Ok(sorted)
×
92
}
93

94
/// Returns a simplified (i.e., undirected and loopless) version of the provided
95
/// graph as a [sequential graph](crate::traits::SequentialGraph).
96
///
97
/// This method uses splitting to sort in parallel different parts of the graph.
98
///
99
/// For the meaning of the additional parameter, see
100
/// [`SortPairs`](crate::prelude::sort_pairs::SortPairs).
101
#[allow(clippy::type_complexity)]
102
pub fn simplify_split<S>(
4✔
103
    graph: &S,
104
    batch_size: usize,
105
    threads: &ThreadPool,
106
) -> Result<Left<arc_list_graph::ArcListGraph<itertools::Dedup<KMergeIters<BatchIterator<()>, ()>>>>>
107
where
108
    S: SequentialGraph + SplitLabeling,
109
{
110
    let num_threads = threads.current_num_threads();
12✔
111
    let (tx, rx) = std::sync::mpsc::channel();
12✔
112

113
    let mut dirs = vec![];
8✔
114

115
    threads.in_place_scope(|scope| {
12✔
116
        let mut thread_id = 0;
8✔
117
        #[allow(clippy::explicit_counter_loop)] // enumerate requires some extra bounds here
118
        for iter in graph.split_iter(num_threads) {
28✔
119
            let tx = tx.clone();
×
120
            let dir = Builder::new()
×
121
                .prefix(&format!("simplify_split_{thread_id}_"))
×
122
                .tempdir()
×
123
                .expect("Could not create a temporary directory");
×
124
            let dir_path = dir.path().to_path_buf();
×
125
            dirs.push(dir);
×
126
            scope.spawn(move |_| {
16✔
127
                log::debug!("Spawned thread {thread_id}");
16✔
128
                let mut sorted =
16✔
129
                    SortPairs::new(MemoryUsage::BatchSize(batch_size / num_threads), dir_path)
48✔
130
                        .unwrap();
16✔
131
                for_!( (src, succ) in iter {
1,302,244✔
132
                    for dst in succ {
27,031,444✔
133
                        if src != dst {
12,514,840✔
134
                            sorted.push(src, dst).unwrap();
75,089,040✔
135
                            sorted.push(dst, src).unwrap();
50,059,360✔
136
                        }
137
                    }
138
                });
139
                let result = sorted.iter().context("Could not read arcs").unwrap();
80✔
140
                tx.send(result).expect("Could not send the sorted pairs");
80✔
141
                log::debug!("Thread {thread_id} finished");
16✔
142
            });
143
            thread_id += 1;
×
144
        }
145
    });
146
    drop(tx);
8✔
147

148
    // get a graph on the sorted data
149
    log::debug!("Waiting for threads to finish");
4✔
150
    let edges: KMergeIters<BatchIterator> = rx.iter().sum();
20✔
151
    let edges = edges.dedup();
12✔
152
    log::debug!("All threads finished");
4✔
153
    let sorted = arc_list_graph::ArcListGraph::new_labeled(graph.num_nodes(), edges);
20✔
154

155
    drop(dirs);
8✔
156
    Ok(Left(sorted))
4✔
157
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc