• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 22118953296

17 Feb 2026 10:53PM UTC coverage: 66.459% (+0.02%) from 66.441%
22118953296

Pull #530

github

web-flow
Merge 059dda85b into 3b93d04b5
Pull Request #530: adds map fusion transformation

242 of 353 new or added lines in 4 files covered. (68.56%)

2 existing lines in 1 file now uncovered.

23823 of 35846 relevant lines covered (66.46%)

375.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.9
/sdfg/src/transformations/map_fusion.cpp
1
#include "sdfg/transformations/map_fusion.h"
2

3
#include "sdfg/analysis/arguments_analysis.h"
4
#include "sdfg/analysis/scope_analysis.h"
5
#include "sdfg/analysis/users.h"
6
#include "sdfg/control_flow/interstate_edge.h"
7
#include "sdfg/data_flow/data_flow_graph.h"
8
#include "sdfg/structured_control_flow/block.h"
9
#include "sdfg/structured_control_flow/for.h"
10
#include "sdfg/symbolic/polynomials.h"
11

12
namespace sdfg {
13
namespace transformations {
14

15
MapFusion::MapFusion(structured_control_flow::Map& first_map, structured_control_flow::StructuredLoop& second_loop)
16
    : first_map_(first_map), second_loop_(second_loop) {}
18✔
17

18
std::string MapFusion::name() const { return "MapFusion"; }
2✔
19

20
bool MapFusion::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
16✔
21
    fusion_candidates_.clear();
16✔
22

23
    // Criterion: Get parent scope and verify both loops are sequential children
24
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
16✔
25
    auto* first_parent = scope_analysis.parent_scope(&first_map_);
16✔
26
    auto* second_parent = scope_analysis.parent_scope(&second_loop_);
16✔
27

28
    if (first_parent == nullptr || second_parent == nullptr) {
16✔
NEW
29
        return false;
×
NEW
30
    }
×
31

32
    // Both must have the same parent sequence
33
    if (first_parent != second_parent) {
16✔
NEW
34
        return false;
×
NEW
35
    }
×
36

37
    auto* parent_sequence = dynamic_cast<structured_control_flow::Sequence*>(first_parent);
16✔
38
    if (parent_sequence == nullptr) {
16✔
NEW
39
        return false;
×
NEW
40
    }
×
41

42
    // Criterion: first_map must immediately precede second_loop in the sequence
43
    int first_index = parent_sequence->index(first_map_);
16✔
44
    int second_index = parent_sequence->index(second_loop_);
16✔
45

46
    if (first_index == -1 || second_index == -1) {
16✔
NEW
47
        return false;
×
NEW
48
    }
×
49

50
    if (second_index != first_index + 1) {
16✔
51
        return false;
1✔
52
    }
1✔
53

54
    // Criterion: Transition between maps should have no assignments
55
    auto& transition = parent_sequence->at(first_index).second;
15✔
56
    if (!transition.empty()) {
15✔
NEW
57
        return false;
×
NEW
58
    }
×
59

60
    // Criterion: First map must have simple body (single block)
61
    // This ensures no WAR/WAW hazards when replicating the producer
62
    if (first_map_.root().size() != 1) {
15✔
NEW
63
        return false;
×
NEW
64
    }
×
65

66
    auto* first_block = dynamic_cast<structured_control_flow::Block*>(&first_map_.root().at(0).first);
15✔
67
    if (first_block == nullptr) {
15✔
NEW
68
        return false;
×
NEW
69
    }
×
70

71
    // Criterion: First block's transition should be empty
72
    if (!first_map_.root().at(0).second.empty()) {
15✔
NEW
73
        return false;
×
NEW
74
    }
×
75

76
    // Get arguments analysis to identify inputs/outputs of each loop
77
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
15✔
78
    auto first_args = arguments_analysis.arguments(analysis_manager, first_map_);
15✔
79
    auto second_args = arguments_analysis.arguments(analysis_manager, second_loop_);
15✔
80

81
    // Find containers that are outputs of first map and inputs of second map
82
    std::unordered_set<std::string> first_outputs;
15✔
83
    for (const auto& [name, arg] : first_args) {
45✔
84
        if (arg.is_output) {
45✔
85
            first_outputs.insert(name);
15✔
86
        }
15✔
87
    }
45✔
88

89
    std::unordered_set<std::string> fusion_containers;
15✔
90
    for (const auto& [name, arg] : second_args) {
47✔
91
        if (arg.is_input && first_outputs.contains(name)) {
47✔
92
            // Criterion: Skip scalars - only fuse array/pointer accesses
93
            if (arg.is_scalar) {
14✔
NEW
94
                continue;
×
NEW
95
            }
×
96
            fusion_containers.insert(name);
14✔
97
        }
14✔
98
    }
47✔
99
    if (fusion_containers.empty()) {
15✔
100
        return false;
1✔
101
    }
1✔
102

103
    // Analyze memory access patterns for each fusion candidate
104
    auto& first_dataflow = first_block->dataflow();
14✔
105

106
    auto first_indvar = first_map_.indvar();
14✔
107
    auto second_indvar = second_loop_.indvar();
14✔
108

109
    // For each fusion container, find the producer memlet and collect unique consumer subsets
110
    for (const auto& container : fusion_containers) {
14✔
111
        // Find unique producer in first map (producer)
112
        data_flow::Memlet* producer_memlet = nullptr;
14✔
113

114
        for (auto& node : first_dataflow.nodes()) {
44✔
115
            auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
44✔
116
            if (access == nullptr || access->data() != container) {
44✔
117
                continue;
30✔
118
            }
30✔
119
            if (first_dataflow.in_degree(*access) != 1 || first_dataflow.out_degree(*access) != 0) {
14✔
NEW
120
                return false;
×
NEW
121
            }
×
122
            auto& iedge = *first_dataflow.in_edges(*access).begin();
14✔
123
            if (iedge.type() != data_flow::MemletType::Computational) {
14✔
NEW
124
                return false;
×
NEW
125
            }
×
126
            if (producer_memlet != nullptr) {
14✔
NEW
127
                return false;
×
NEW
128
            }
×
129
            producer_memlet = &iedge;
14✔
130
        }
14✔
131
        if (producer_memlet == nullptr) {
14✔
NEW
132
            return false;
×
NEW
133
        }
×
134

135
        const auto& producer_subset = producer_memlet->subset();
14✔
136
        // Assume linearized subset
137
        if (producer_subset.size() != 1) {
14✔
NEW
138
            return false;
×
NEW
139
        }
×
140

141
        // Extract affine coefficients for producer (done once per container)
142
        auto producer_expr = producer_subset.at(0);
14✔
143
        symbolic::SymbolVec producer_symbols = {first_indvar};
14✔
144
        auto producer_poly = symbolic::polynomial(producer_expr, producer_symbols);
14✔
145
        if (producer_poly.is_null()) {
14✔
NEW
146
            return false; // Not a polynomial
×
NEW
147
        }
×
148
        auto producer_coeffs = symbolic::affine_coefficients(producer_poly, producer_symbols);
14✔
149
        if (producer_coeffs.empty()) {
14✔
NEW
150
            return false; // Not affine
×
NEW
151
        }
×
152
        auto first_coeff = producer_coeffs[first_indvar];
14✔
153
        if (symbolic::eq(first_coeff, symbolic::zero())) {
14✔
NEW
154
            return false;
×
NEW
155
        }
×
156
        auto producer_constant = producer_coeffs[symbolic::symbol("__daisy_constant__")];
14✔
157

158
        // Collect all unique (container, subset) pairs from consumer blocks
159
        // A subset is considered unique if it's not symbolically equal to any existing one
160
        symbolic::ExpressionSet unique_subsets;
14✔
161

162
        for (size_t i = 0; i < second_loop_.root().size(); ++i) {
29✔
163
            auto* block = dynamic_cast<structured_control_flow::Block*>(&second_loop_.root().at(i).first);
15✔
164
            if (block == nullptr) {
15✔
NEW
165
                return false;
×
NEW
166
            }
×
167

168
            auto& dataflow = block->dataflow();
15✔
169
            for (auto& node : dataflow.nodes()) {
55✔
170
                auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
55✔
171
                if (access == nullptr || access->data() != container) {
55✔
172
                    continue;
37✔
173
                }
37✔
174
                if (dataflow.in_degree(*access) != 0 || dataflow.out_degree(*access) == 0) {
18✔
NEW
175
                    return false;
×
NEW
176
                }
×
177

178
                // Check all read memlets from this access
179
                for (auto& memlet : dataflow.out_edges(*access)) {
19✔
180
                    if (memlet.type() != data_flow::MemletType::Computational) {
19✔
NEW
181
                        return false;
×
NEW
182
                    }
×
183

184
                    auto& consumer_subset = memlet.subset();
19✔
185
                    // Assume linearized subset
186
                    if (consumer_subset.size() != 1) {
19✔
NEW
187
                        return false;
×
NEW
188
                    }
×
189
                    unique_subsets.insert(consumer_subset.at(0));
19✔
190
                }
19✔
191
            }
18✔
192
        }
15✔
193

194
        // For each unique subset, compute index_mapping and create a FusionCandidate
195
        for (const auto& consumer_expr : unique_subsets) {
17✔
196
            // index_mapping = (consumer_expr - producer_constant) / first_coeff
197
            auto numerator = symbolic::sub(consumer_expr, producer_constant);
17✔
198
            auto index_mapping = symbolic::div(numerator, first_coeff);
17✔
199
            index_mapping = symbolic::expand(index_mapping);
17✔
200

201
            // Verify the mapping is valid (contains only second_indvar and constants)
202
            bool valid_mapping = true;
17✔
203
            for (const auto& atom : symbolic::atoms(index_mapping)) {
17✔
204
                std::string name = atom->get_name();
17✔
205
                if (name != second_indvar->get_name()) {
17✔
206
                    // Check if it's a parameter (constant for both loops)
NEW
207
                    bool is_param = false;
×
NEW
208
                    for (const auto& [arg_name, arg] : second_args) {
×
NEW
209
                        if (arg_name == name && arg.is_scalar && !arg.is_output) {
×
NEW
210
                            is_param = true;
×
NEW
211
                            break;
×
NEW
212
                        }
×
NEW
213
                    }
×
NEW
214
                    if (!is_param) {
×
NEW
215
                        for (const auto& [arg_name, arg] : first_args) {
×
NEW
216
                            if (arg_name == name && arg.is_scalar && !arg.is_output) {
×
NEW
217
                                is_param = true;
×
NEW
218
                                break;
×
NEW
219
                            }
×
NEW
220
                        }
×
NEW
221
                    }
×
NEW
222
                    if (!is_param) {
×
NEW
223
                        valid_mapping = false;
×
NEW
224
                        break;
×
NEW
225
                    }
×
NEW
226
                }
×
227
            }
17✔
228

229
            if (!valid_mapping) {
17✔
NEW
230
                return false;
×
NEW
231
            }
×
232

233
            // Store the fusion candidate
234
            FusionCandidate candidate;
17✔
235
            candidate.container = container;
17✔
236
            candidate.consumer_subset = {consumer_expr};
17✔
237
            candidate.index_mapping = index_mapping;
17✔
238

239
            fusion_candidates_.push_back(candidate);
17✔
240
        }
17✔
241
    }
14✔
242

243
    // Criterion: At least one valid fusion candidate
244
    return !fusion_candidates_.empty();
14✔
245
}
14✔
246

247
void MapFusion::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
9✔
248
    auto& sdfg = builder.subject();
9✔
249

250
    // Get the producer block from first map
251
    auto* first_block = dynamic_cast<structured_control_flow::Block*>(&first_map_.root().at(0).first);
9✔
252
    auto& first_dataflow = first_block->dataflow();
9✔
253

254
    auto first_indvar = first_map_.indvar();
9✔
255

256
    auto& second_root = second_loop_.root();
9✔
257

258
    // For each fusion candidate (unique container+subset pair), create a temp and insert a producer block
259
    // Track: candidate_index -> temp_name
260
    std::vector<std::string> candidate_temps;
9✔
261

262
    for (size_t cand_idx = 0; cand_idx < fusion_candidates_.size(); ++cand_idx) {
19✔
263
        auto& candidate = fusion_candidates_[cand_idx];
10✔
264

265
        // Create a temp scalar for this candidate
266
        auto& container_type = sdfg.type(candidate.container);
10✔
267
        std::string temp_name = builder.find_new_name("_fused_tmp");
10✔
268
        types::Scalar tmp_type(container_type.primitive_type());
10✔
269
        builder.add_container(temp_name, tmp_type);
10✔
270
        candidate_temps.push_back(temp_name);
10✔
271

272
        // Insert a producer block at the beginning of the second loop's body
273
        auto& first_child = second_root.at(0).first;
10✔
274
        control_flow::Assignments empty_assignments;
10✔
275
        auto& producer_block = builder.add_block_before(second_root, first_child, empty_assignments);
10✔
276

277
        // Deep copy all nodes from first block to producer block
278
        std::unordered_map<const data_flow::DataFlowNode*, data_flow::DataFlowNode*> node_mapping;
10✔
279
        for (auto& node : first_dataflow.nodes()) {
32✔
280
            node_mapping[&node] = &builder.copy_node(producer_block, node);
32✔
281
            auto access = node_mapping[&node];
32✔
282
            if (auto* access_node = dynamic_cast<data_flow::AccessNode*>(access)) {
32✔
283
                if (access_node->data() == candidate.container) {
22✔
284
                    // This is the producer access for this candidate - update to temp
285
                    access_node->data(temp_name);
10✔
286
                }
10✔
287
            }
22✔
288
        }
32✔
289

290
        // Add memlets with index substitution using this candidate's index_mapping
291
        for (auto& edge : first_dataflow.edges()) {
22✔
292
            auto& src_node = edge.src();
22✔
293
            auto& dst_node = edge.dst();
22✔
294

295
            // Substitute indices in subset
296
            const types::IType* base_type = &edge.base_type();
22✔
297
            data_flow::Subset new_subset;
22✔
298
            for (const auto& dim : edge.subset()) {
22✔
299
                auto new_dim = symbolic::subs(dim, first_indvar, candidate.index_mapping);
20✔
300
                new_dim = symbolic::expand(new_dim);
20✔
301
                new_subset.push_back(new_dim);
20✔
302
            }
20✔
303

304
            // For output edges (to temp scalar), use empty subset
305
            auto* dst_access = dynamic_cast<data_flow::AccessNode*>(&dst_node);
22✔
306
            if (dst_access != nullptr && dst_access->data() == candidate.container &&
22✔
307
                first_dataflow.in_degree(*dst_access) > 0) {
22✔
308
                new_subset.clear(); // Scalar has empty subset
10✔
309
                base_type = &tmp_type;
10✔
310
            }
10✔
311

312
            builder.add_memlet(
22✔
313
                producer_block,
22✔
314
                *node_mapping[&src_node],
22✔
315
                edge.src_conn(),
22✔
316
                *node_mapping[&dst_node],
22✔
317
                edge.dst_conn(),
22✔
318
                new_subset,
22✔
319
                *base_type,
22✔
320
                edge.debug_info()
22✔
321
            );
22✔
322
        }
22✔
323
    }
10✔
324

325
    // Now update all read accesses in consumer blocks to point to the appropriate temp
326
    // We need to match each access node's memlet subset to find the right candidate
327
    size_t num_producer_blocks = fusion_candidates_.size();
9✔
328

329
    for (size_t block_idx = num_producer_blocks; block_idx < second_root.size(); ++block_idx) {
19✔
330
        auto* block = dynamic_cast<structured_control_flow::Block*>(&second_root.at(block_idx).first);
10✔
331
        if (block == nullptr) {
10✔
NEW
332
            continue;
×
NEW
333
        }
×
334

335
        auto& dataflow = block->dataflow();
10✔
336

337
        // Find all read access nodes
338
        for (auto& node : dataflow.nodes()) {
35✔
339
            auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
35✔
340
            if (access == nullptr) {
35✔
341
                continue;
11✔
342
            }
11✔
343

344
            // Only update read accesses (out_degree > 0)
345
            if (dataflow.out_degree(*access) == 0) {
24✔
346
                continue;
11✔
347
            }
11✔
348

349
            // Capture original container name before any modifications
350
            std::string original_container = access->data();
13✔
351

352
            // Check each outgoing memlet to find which candidates match
353
            for (auto& memlet : dataflow.out_edges(*access)) {
14✔
354
                if (memlet.type() != data_flow::MemletType::Computational) {
14✔
NEW
355
                    continue;
×
NEW
356
                }
×
357

358
                const auto& memlet_subset = memlet.subset();
14✔
359

360
                // Find matching candidate by container and subset
361
                for (size_t cand_idx = 0; cand_idx < fusion_candidates_.size(); ++cand_idx) {
17✔
362
                    auto& candidate = fusion_candidates_[cand_idx];
15✔
363

364
                    if (original_container != candidate.container) {
15✔
365
                        continue;
2✔
366
                    }
2✔
367

368
                    // Check if subset matches
369
                    if (memlet_subset.size() != candidate.consumer_subset.size()) {
13✔
NEW
370
                        continue;
×
NEW
371
                    }
×
372

373
                    bool subset_matches = true;
13✔
374
                    for (size_t d = 0; d < memlet_subset.size(); ++d) {
25✔
375
                        if (!symbolic::eq(memlet_subset[d], candidate.consumer_subset[d])) {
13✔
376
                            subset_matches = false;
1✔
377
                            break;
1✔
378
                        }
1✔
379
                    }
13✔
380

381
                    if (!subset_matches) {
13✔
382
                        continue;
1✔
383
                    }
1✔
384

385
                    // Found a match - update the access node and memlet
386
                    const auto& temp_name = candidate_temps[cand_idx];
12✔
387
                    auto& temp_type = sdfg.type(temp_name);
12✔
388

389
                    access->data(temp_name);
12✔
390

391
                    // Update this memlet
392
                    memlet.set_subset({});
12✔
393
                    memlet.set_base_type(temp_type);
12✔
394

395
                    // Also update any incoming edges
396
                    for (auto& in_edge : dataflow.in_edges(*access)) {
12✔
NEW
397
                        in_edge.set_subset({});
×
NEW
398
                        in_edge.set_base_type(temp_type);
×
NEW
399
                    }
×
400

401
                    break; // Found the matching candidate
12✔
402
                }
13✔
403
            }
14✔
404
        }
13✔
405
    }
10✔
406

407
    analysis_manager.invalidate_all();
9✔
408
    applied_ = true;
9✔
409
}
9✔
410

411
void MapFusion::to_json(nlohmann::json& j) const {
1✔
412
    std::string second_type = "for";
1✔
413
    if (dynamic_cast<structured_control_flow::Map*>(&second_loop_) != nullptr) {
1✔
414
        second_type = "map";
1✔
415
    }
1✔
416
    j["transformation_type"] = this->name();
1✔
417
    j["subgraph"] = {
1✔
418
        {"0", {{"element_id", first_map_.element_id()}, {"type", "map"}}},
1✔
419
        {"1", {{"element_id", second_loop_.element_id()}, {"type", second_type}}}
1✔
420
    };
1✔
421
}
1✔
422

423
MapFusion MapFusion::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
424
    auto first_map_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
425
    auto second_loop_id = desc["subgraph"]["1"]["element_id"].get<size_t>();
1✔
426

427
    auto first_element = builder.find_element_by_id(first_map_id);
1✔
428
    auto second_element = builder.find_element_by_id(second_loop_id);
1✔
429

430
    if (first_element == nullptr) {
1✔
NEW
431
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(first_map_id) + " not found.");
×
NEW
432
    }
×
433
    if (second_element == nullptr) {
1✔
NEW
434
        throw InvalidTransformationDescriptionException(
×
NEW
435
            "Element with ID " + std::to_string(second_loop_id) + " not found."
×
NEW
436
        );
×
NEW
437
    }
×
438

439
    auto* first_map = dynamic_cast<structured_control_flow::Map*>(first_element);
1✔
440
    auto* second_loop = dynamic_cast<structured_control_flow::StructuredLoop*>(second_element);
1✔
441

442
    if (first_map == nullptr) {
1✔
NEW
443
        throw InvalidTransformationDescriptionException(
×
NEW
444
            "Element with ID " + std::to_string(first_map_id) + " is not a Map."
×
NEW
445
        );
×
NEW
446
    }
×
447
    if (second_loop == nullptr) {
1✔
NEW
448
        throw InvalidTransformationDescriptionException(
×
NEW
449
            "Element with ID " + std::to_string(second_loop_id) + " is not a StructuredLoop."
×
NEW
450
        );
×
NEW
451
    }
×
452

453
    return MapFusion(*first_map, *second_loop);
1✔
454
}
1✔
455

456
} // namespace transformations
457
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc