• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 22851518064

09 Mar 2026 11:34AM UTC coverage: 64.621% (+0.2%) from 64.441%
22851518064

push

github

web-flow
Merge pull request #562 from daisytuner/map-fusion-multi-dims

Extends MapFusion with support for multi-dimensional loop nests

174 of 215 new or added lines in 3 files covered. (80.93%)

13 existing lines in 4 files now uncovered.

24651 of 38147 relevant lines covered (64.62%)

376.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.65
/sdfg/src/transformations/map_fusion.cpp
1
#include "sdfg/transformations/map_fusion.h"
2

3
#include <isl/ctx.h>
4
#include <isl/map.h>
5
#include <isl/options.h>
6
#include <isl/set.h>
7
#include <isl/space.h>
8
#include <symengine/solve.h>
9
#include "sdfg/analysis/arguments_analysis.h"
10
#include "sdfg/analysis/loop_analysis.h"
11
#include "sdfg/analysis/scope_analysis.h"
12
#include "sdfg/analysis/users.h"
13

14
#include "sdfg/analysis/assumptions_analysis.h"
15
#include "sdfg/control_flow/interstate_edge.h"
16
#include "sdfg/data_flow/data_flow_graph.h"
17
#include "sdfg/structured_control_flow/block.h"
18
#include "sdfg/structured_control_flow/for.h"
19
#include "sdfg/symbolic/utils.h"
20

21
namespace sdfg {
22
namespace transformations {
23

24
MapFusion::MapFusion(structured_control_flow::Map& first_map, structured_control_flow::StructuredLoop& second_loop)
25
    : first_map_(first_map), second_loop_(second_loop) {}
28✔
26

27
std::string MapFusion::name() const { return "MapFusion"; }
2✔
28

29
std::vector<std::pair<symbolic::Symbol, symbolic::Expression>> MapFusion::solve_subsets(
30
    const data_flow::Subset& producer_subset,
31
    const data_flow::Subset& consumer_subset,
32
    const std::vector<structured_control_flow::StructuredLoop*>& producer_loops,
33
    const std::vector<structured_control_flow::StructuredLoop*>& consumer_loops,
34
    const symbolic::Assumptions& producer_assumptions,
35
    const symbolic::Assumptions& consumer_assumptions
36
) {
26✔
37
    // Delinearize subsets to recover multi-dimensional structure from linearized accesses
38
    // e.g. T[i*N + j] with assumptions on bounds -> T[i, j]
39
    auto producer_sub = symbolic::delinearize(producer_subset, producer_assumptions);
26✔
40
    auto consumer_sub = symbolic::delinearize(consumer_subset, consumer_assumptions);
26✔
41

42
    // Subset dimensions must match
43
    if (producer_sub.size() != consumer_sub.size()) {
26✔
NEW
44
        return {};
×
NEW
45
    }
×
46
    if (producer_sub.empty()) {
26✔
NEW
47
        return {};
×
NEW
48
    }
×
49

50
    // Extract producer indvars
51
    SymEngine::vec_sym producer_vars;
26✔
52
    for (auto* loop : producer_loops) {
32✔
53
        producer_vars.push_back(SymEngine::rcp_static_cast<const SymEngine::Symbol>(loop->indvar()));
32✔
54
    }
32✔
55

56
    // Step 1: Solve the linear equation system using SymEngine
57
    // System: producer_sub[d] - consumer_sub[d] = 0, for each dimension d
58
    // Solve for producer_vars in terms of consumer_vars and parameters
59
    SymEngine::vec_basic equations;
26✔
60
    for (size_t d = 0; d < producer_sub.size(); ++d) {
58✔
61
        equations.push_back(symbolic::sub(producer_sub.at(d), consumer_sub.at(d)));
32✔
62
    }
32✔
63

64
    // Need exactly as many equations as unknowns for a unique solution.
65
    // Underdetermined systems (e.g. linearized access with multiple loop vars)
66
    // cannot be uniquely solved and would crash linsolve.
67
    if (equations.size() != producer_vars.size()) {
26✔
NEW
68
        return {};
×
NEW
69
    }
×
70

71
    SymEngine::vec_basic solution;
26✔
72
    try {
26✔
73
        solution = SymEngine::linsolve(equations, producer_vars);
26✔
74
    } catch (...) {
26✔
NEW
75
        return {};
×
NEW
76
    }
×
77
    if (solution.size() != producer_vars.size()) {
26✔
NEW
78
        return {};
×
NEW
79
    }
×
80
    // Build consumer var set for atom validation
81
    symbolic::SymbolSet consumer_var_set;
26✔
82
    for (auto* loop : consumer_loops) {
32✔
83
        consumer_var_set.insert(loop->indvar());
32✔
84
    }
32✔
85

86
    std::vector<std::pair<symbolic::Symbol, symbolic::Expression>> mappings;
26✔
87
    for (size_t i = 0; i < producer_vars.size(); ++i) {
58✔
88
        auto& sol = solution[i];
32✔
89

90
        // Check for invalid solutions
91
        if (SymEngine::is_a<SymEngine::NaN>(*sol) || SymEngine::is_a<SymEngine::Infty>(*sol)) {
32✔
NEW
92
            return {};
×
NEW
93
        }
×
94

95
        // Validate that solution atoms are consumer vars or parameters
96
        for (const auto& atom : symbolic::atoms(sol)) {
34✔
97
            if (consumer_var_set.count(atom)) {
34✔
98
                continue;
34✔
99
            }
34✔
NEW
100
            bool is_param = false;
×
NEW
101
            auto it = consumer_assumptions.find(atom);
×
NEW
102
            if (it != consumer_assumptions.end() && it->second.constant()) {
×
NEW
103
                is_param = true;
×
NEW
104
            }
×
NEW
105
            if (!is_param) {
×
NEW
106
                it = producer_assumptions.find(atom);
×
NEW
107
                if (it != producer_assumptions.end() && it->second.constant()) {
×
NEW
108
                    is_param = true;
×
NEW
109
                }
×
NEW
110
            }
×
NEW
111
            if (!is_param) {
×
NEW
112
                return {};
×
NEW
113
            }
×
NEW
114
        }
×
115

116
        mappings.push_back({symbolic::symbol(producer_vars[i]->get_name()), symbolic::expand(sol)});
32✔
117
    }
32✔
118
    // Step 2: ISL integrality validation via map composition
119
    // Build an unconstrained producer access map (no domain bounds on producer vars).
120
    // In map fusion, the producer's computation is inlined into the consumer, so
121
    // the producer's original iteration domain is irrelevant. We only need to verify
122
    // that the equation system has an INTEGER solution for every consumer point.
123
    symbolic::Assumptions unconstrained_producer;
26✔
124
    for (auto* loop : producer_loops) {
32✔
125
        symbolic::Assumption a(loop->indvar());
32✔
126
        a.constant(false);
32✔
127
        unconstrained_producer[loop->indvar()] = a;
32✔
128
    }
32✔
129
    for (const auto& [sym, assump] : producer_assumptions) {
64✔
130
        if (assump.constant() && unconstrained_producer.find(sym) == unconstrained_producer.end()) {
64✔
131
            unconstrained_producer[sym] = assump;
32✔
132
        }
32✔
133
    }
64✔
134

135
    std::string producer_map_str = symbolic::expression_to_map_str(producer_sub, unconstrained_producer);
26✔
136
    // Build consumer access map with full domain constraints
137
    std::string consumer_map_str = symbolic::expression_to_map_str(consumer_sub, consumer_assumptions);
26✔
138

139
    isl_ctx* ctx = isl_ctx_alloc();
26✔
140
    isl_options_set_on_error(ctx, ISL_ON_ERROR_CONTINUE);
26✔
141

142
    isl_map* producer_map = isl_map_read_from_str(ctx, producer_map_str.c_str());
26✔
143
    isl_map* consumer_map = isl_map_read_from_str(ctx, consumer_map_str.c_str());
26✔
144

145
    if (!producer_map || !consumer_map) {
26✔
NEW
146
        if (producer_map) isl_map_free(producer_map);
×
NEW
147
        if (consumer_map) isl_map_free(consumer_map);
×
NEW
148
        isl_ctx_free(ctx);
×
NEW
149
        return {};
×
NEW
150
    }
×
151

152
    // Align parameters between the two maps
153
    isl_space* params_p = isl_space_params(isl_map_get_space(producer_map));
26✔
154
    isl_space* params_c = isl_space_params(isl_map_get_space(consumer_map));
26✔
155
    isl_space* unified = isl_space_align_params(isl_space_copy(params_p), isl_space_copy(params_c));
26✔
156
    isl_space_free(params_p);
26✔
157
    isl_space_free(params_c);
26✔
158

159
    producer_map = isl_map_align_params(producer_map, isl_space_copy(unified));
26✔
160
    consumer_map = isl_map_align_params(consumer_map, isl_space_copy(unified));
26✔
161

162
    // Save consumer domain before consuming consumer_map in composition
163
    isl_set* consumer_domain = isl_map_domain(isl_map_copy(consumer_map));
26✔
164

165
    // Compute composition: consumer_access ∘ inverse(producer_access)
166
    // This checks whether the equation system producer_subset = consumer_subset
167
    // has an integer solution for each consumer domain point.
168
    isl_map* producer_inverse = isl_map_reverse(producer_map);
26✔
169
    isl_map* composition = isl_map_apply_range(consumer_map, producer_inverse);
26✔
170

171
    // Check single-valuedness: each consumer point maps to at most one producer point
172
    bool single_valued = isl_map_is_single_valued(composition) == isl_bool_true;
26✔
173

174
    // Check domain coverage: every consumer point has a valid integer mapping
175
    isl_set* comp_domain = isl_map_domain(composition);
26✔
176

177
    bool domain_covered = isl_set_is_subset(consumer_domain, comp_domain) == isl_bool_true;
26✔
178

179
    isl_set_free(comp_domain);
26✔
180
    isl_set_free(consumer_domain);
26✔
181

182
    // Step 3: Verify producer write range covers consumer read range.
183
    // The producer only writes a subset of the array if its loops have restricted bounds.
184
    // Fusion is invalid if the consumer reads elements the producer never writes.
185
    bool range_covered = false;
26✔
186
    if (single_valued && domain_covered) {
26✔
187
        std::string constrained_producer_map_str = symbolic::expression_to_map_str(producer_sub, producer_assumptions);
24✔
188
        isl_map* constrained_producer = isl_map_read_from_str(ctx, constrained_producer_map_str.c_str());
24✔
189
        isl_map* consumer_map_copy = isl_map_read_from_str(ctx, consumer_map_str.c_str());
24✔
190

191
        if (constrained_producer && consumer_map_copy) {
24✔
192
            constrained_producer = isl_map_align_params(constrained_producer, isl_space_copy(unified));
24✔
193
            consumer_map_copy = isl_map_align_params(consumer_map_copy, isl_space_copy(unified));
24✔
194

195
            isl_set* producer_range = isl_map_range(constrained_producer);
24✔
196
            isl_set* consumer_range = isl_map_range(consumer_map_copy);
24✔
197

198
            range_covered = isl_set_is_subset(consumer_range, producer_range) == isl_bool_true;
24✔
199

200
            isl_set_free(producer_range);
24✔
201
            isl_set_free(consumer_range);
24✔
202
        } else {
24✔
NEW
203
            if (constrained_producer) isl_map_free(constrained_producer);
×
NEW
204
            if (consumer_map_copy) isl_map_free(consumer_map_copy);
×
NEW
205
        }
×
206
    }
24✔
207

208
    isl_space_free(unified);
26✔
209
    isl_ctx_free(ctx);
26✔
210

211
    if (!single_valued || !domain_covered || !range_covered) {
26✔
212
        return {};
4✔
213
    }
4✔
214

215
    return mappings;
22✔
216
}
26✔
217

218
bool MapFusion::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
26✔
219
    fusion_candidates_.clear();
26✔
220

221
    // Criterion: Get parent scope and verify both loops are sequential children
222
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
26✔
223
    auto* first_parent = scope_analysis.parent_scope(&first_map_);
26✔
224
    auto* second_parent = scope_analysis.parent_scope(&second_loop_);
26✔
225
    if (first_parent == nullptr || second_parent == nullptr) {
26✔
UNCOV
226
        return false;
×
227
    }
×
228
    if (first_parent != second_parent) {
26✔
UNCOV
229
        return false;
×
230
    }
×
231

232
    auto* parent_sequence = dynamic_cast<structured_control_flow::Sequence*>(first_parent);
26✔
233
    if (parent_sequence == nullptr) {
26✔
234
        return false;
×
235
    }
×
236

237
    int first_index = parent_sequence->index(first_map_);
26✔
238
    int second_index = parent_sequence->index(second_loop_);
26✔
239
    if (first_index == -1 || second_index == -1) {
26✔
UNCOV
240
        return false;
×
241
    }
×
242
    if (second_index != first_index + 1) {
26✔
243
        return false;
1✔
244
    }
1✔
245

246
    // Criterion: Transition between maps should have no assignments
247
    auto& transition = parent_sequence->at(first_index).second;
25✔
248
    if (!transition.empty()) {
25✔
249
        return false;
×
250
    }
×
251
    // Criterion: First loop is perfectly nested
252
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
25✔
253
    auto first_loop_info = loop_analysis.loop_info(&first_map_);
25✔
254
    if (!first_loop_info.is_perfectly_nested) {
25✔
255
        return false;
×
256
    }
×
257
    if (!first_loop_info.is_perfectly_parallel) {
25✔
NEW
258
        return false;
×
NEW
259
    }
×
260
    std::vector<structured_control_flow::StructuredLoop*> producer_loops = {&first_map_};
25✔
261
    structured_control_flow::Sequence* producer_body = &first_map_.root();
25✔
262
    structured_control_flow::ControlFlowNode* producer_node = &first_map_.root().at(0).first;
25✔
263
    while (auto* nested = dynamic_cast<structured_control_flow::Map*>(producer_node)) {
32✔
264
        producer_loops.push_back(nested);
7✔
265
        producer_body = &nested->root();
7✔
266
        producer_node = &nested->root().at(0).first;
7✔
267
    }
7✔
268
    auto* producer_block_ptr = dynamic_cast<structured_control_flow::Block*>(producer_node);
25✔
269
    if (producer_block_ptr == nullptr) {
25✔
270
        return false;
×
271
    }
×
272

273
    // Criterion: Second loop is perfectly nested (but can have non-parallel loops)
274
    auto second_loop_info = loop_analysis.loop_info(&second_loop_);
25✔
275
    if (!second_loop_info.is_perfectly_nested) {
25✔
276
        return false;
×
277
    }
×
278
    std::vector<structured_control_flow::StructuredLoop*> consumer_loops = {&second_loop_};
25✔
279
    structured_control_flow::Sequence* consumer_body = &second_loop_.root();
25✔
280
    structured_control_flow::ControlFlowNode* consumer_node = &second_loop_.root().at(0).first;
25✔
281
    while (auto* nested = dynamic_cast<structured_control_flow::StructuredLoop*>(consumer_node)) {
31✔
282
        consumer_loops.push_back(nested);
6✔
283
        consumer_body = &nested->root();
6✔
284
        consumer_node = &nested->root().at(0).first;
6✔
285
    }
6✔
286

287
    // Get arguments analysis to identify inputs/outputs of each loop
288
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
25✔
289
    auto first_args = arguments_analysis.arguments(analysis_manager, first_map_);
25✔
290
    auto second_args = arguments_analysis.arguments(analysis_manager, second_loop_);
25✔
291

292
    std::unordered_set<std::string> first_outputs;
25✔
293
    for (const auto& [name, arg] : first_args) {
82✔
294
        if (arg.is_output) {
82✔
295
            first_outputs.insert(name);
25✔
296
        }
25✔
297
    }
82✔
298

299
    std::unordered_set<std::string> fusion_containers;
25✔
300
    for (const auto& [name, arg] : second_args) {
84✔
301
        if (first_outputs.contains(name)) {
84✔
302
            if (arg.is_output) {
24✔
NEW
303
                return false;
×
NEW
304
            }
×
305
            if (arg.is_input) {
24✔
306
                fusion_containers.insert(name);
24✔
307
            }
24✔
308
        }
24✔
309
    }
84✔
310
    if (fusion_containers.empty()) {
25✔
311
        return false;
1✔
312
    }
1✔
313
    // Get assumptions for the innermost blocks (includes all enclosing loop bounds)
314
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
24✔
315
    auto& producer_assumptions = assumptions_analysis.get(*producer_node);
24✔
316
    auto& consumer_assumptions = assumptions_analysis.get(consumer_body->at(0).first);
24✔
317

318
    // For each fusion container, find the producer memlet and collect unique consumer subsets
319
    auto& first_dataflow = producer_block_ptr->dataflow();
24✔
320
    for (const auto& container : fusion_containers) {
24✔
321
        // Find unique producer in first map (producer)
322
        data_flow::Memlet* producer_memlet = nullptr;
24✔
323

324
        for (auto& node : first_dataflow.nodes()) {
75✔
325
            auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
75✔
326
            if (access == nullptr || access->data() != container) {
75✔
327
                continue;
51✔
328
            }
51✔
329
            if (first_dataflow.in_degree(*access) != 1 || first_dataflow.out_degree(*access) != 0) {
24✔
330
                return false;
×
331
            }
×
332
            auto& iedge = *first_dataflow.in_edges(*access).begin();
24✔
333
            if (iedge.type() != data_flow::MemletType::Computational) {
24✔
334
                return false;
×
335
            }
×
336
            if (producer_memlet != nullptr) {
24✔
337
                return false;
×
338
            }
×
339
            producer_memlet = &iedge;
24✔
340
        }
24✔
341
        if (producer_memlet == nullptr) {
24✔
342
            return false;
×
343
        }
×
344

345
        const auto& producer_subset = producer_memlet->subset();
24✔
346
        if (producer_subset.empty()) {
24✔
UNCOV
347
            return false;
×
348
        }
×
349

350
        // Collect all unique subsets from consumer blocks
351
        // Use a vector of subsets and deduplicate manually
352
        std::vector<data_flow::Subset> unique_subsets;
24✔
353
        for (size_t i = 0; i < consumer_body->size(); ++i) {
48✔
354
            auto* block = dynamic_cast<structured_control_flow::Block*>(&consumer_body->at(i).first);
25✔
355
            if (block == nullptr) {
25✔
356
                return false;
×
357
            }
×
358

359
            auto& dataflow = block->dataflow();
25✔
360
            for (auto& node : dataflow.nodes()) {
86✔
361
                auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
86✔
362
                if (access == nullptr || access->data() != container) {
86✔
363
                    continue;
58✔
364
                }
58✔
365
                if (dataflow.in_degree(*access) != 0 || dataflow.out_degree(*access) == 0) {
28✔
366
                    return false;
×
367
                }
×
368

369
                // Check all read memlets from this access
370
                for (auto& memlet : dataflow.out_edges(*access)) {
29✔
371
                    if (memlet.type() != data_flow::MemletType::Computational) {
29✔
372
                        return false;
×
373
                    }
×
374

375
                    auto& consumer_subset = memlet.subset();
29✔
376
                    if (consumer_subset.size() != producer_subset.size()) {
29✔
377
                        return false; // Dimension mismatch
1✔
378
                    }
1✔
379

380
                    // Check if this subset is already in unique_subsets
381
                    bool found = false;
28✔
382
                    for (const auto& existing : unique_subsets) {
28✔
383
                        if (existing.size() != consumer_subset.size()) continue;
6✔
384
                        bool match = true;
6✔
385
                        for (size_t d = 0; d < existing.size(); ++d) {
8✔
386
                            if (!symbolic::eq(existing[d], consumer_subset[d])) {
6✔
387
                                match = false;
4✔
388
                                break;
4✔
389
                            }
4✔
390
                        }
6✔
391
                        if (match) {
6✔
392
                            found = true;
2✔
393
                            break;
2✔
394
                        }
2✔
395
                    }
6✔
396
                    if (!found) {
28✔
397
                        unique_subsets.push_back(consumer_subset);
26✔
398
                    }
26✔
399
                }
28✔
400
            }
28✔
401
        }
25✔
402

403
        // For each unique consumer subset, solve index mappings and create a FusionCandidate
404
        for (const auto& consumer_subset : unique_subsets) {
26✔
405
            auto mappings = solve_subsets(
26✔
406
                producer_subset,
26✔
407
                consumer_subset,
26✔
408
                producer_loops,
26✔
409
                consumer_loops,
26✔
410
                producer_assumptions,
26✔
411
                consumer_assumptions
26✔
412
            );
26✔
413
            if (mappings.empty()) {
26✔
414
                return false;
4✔
415
            }
4✔
416

417
            FusionCandidate candidate;
22✔
418
            candidate.container = container;
22✔
419
            candidate.consumer_subset = consumer_subset;
22✔
420
            candidate.index_mappings = std::move(mappings);
22✔
421

422
            fusion_candidates_.push_back(candidate);
22✔
423
        }
22✔
424
    }
23✔
425

426
    // Criterion: At least one valid fusion candidate
427
    return !fusion_candidates_.empty();
19✔
428
}
24✔
429

430
void MapFusion::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
11✔
431
    auto& sdfg = builder.subject();
11✔
432

433
    // Navigate to the innermost block of the first map (handling nested maps)
434
    structured_control_flow::ControlFlowNode* first_block_node = &first_map_.root().at(0).first;
11✔
435
    while (auto* nested_map = dynamic_cast<structured_control_flow::Map*>(first_block_node)) {
13✔
436
        first_block_node = &nested_map->root().at(0).first;
2✔
437
    }
2✔
438
    auto* first_block = dynamic_cast<structured_control_flow::Block*>(first_block_node);
11✔
439
    auto& first_dataflow = first_block->dataflow();
11✔
440

441
    // Navigate to the innermost consumer sequence (handling nested loops)
442
    structured_control_flow::Sequence* second_root = &second_loop_.root();
11✔
443
    structured_control_flow::ControlFlowNode* consumer_node = &second_loop_.root().at(0).first;
11✔
444
    while (auto* nested = dynamic_cast<structured_control_flow::StructuredLoop*>(consumer_node)) {
13✔
445
        second_root = &nested->root();
2✔
446
        consumer_node = &nested->root().at(0).first;
2✔
447
    }
2✔
448

449
    // For each fusion candidate (unique container+subset pair), create a temp and insert a producer block
450
    // Track: candidate_index -> temp_name
451
    std::vector<std::string> candidate_temps;
11✔
452

453
    for (size_t cand_idx = 0; cand_idx < fusion_candidates_.size(); ++cand_idx) {
23✔
454
        auto& candidate = fusion_candidates_[cand_idx];
12✔
455

456
        // Create a temp scalar for this candidate
457
        auto& container_type = sdfg.type(candidate.container);
12✔
458
        std::string temp_name = builder.find_new_name("_fused_tmp");
12✔
459
        types::Scalar tmp_type(container_type.primitive_type());
12✔
460
        builder.add_container(temp_name, tmp_type);
12✔
461
        candidate_temps.push_back(temp_name);
12✔
462

463
        // Insert a producer block at the beginning of the consumer's innermost body
464
        auto& first_child = second_root->at(0).first;
12✔
465
        control_flow::Assignments empty_assignments;
12✔
466
        auto& producer_block = builder.add_block_before(*second_root, first_child, empty_assignments);
12✔
467

468
        // Deep copy all nodes from first block to producer block
469
        std::unordered_map<const data_flow::DataFlowNode*, data_flow::DataFlowNode*> node_mapping;
12✔
470
        for (auto& node : first_dataflow.nodes()) {
39✔
471
            node_mapping[&node] = &builder.copy_node(producer_block, node);
39✔
472
            auto access = node_mapping[&node];
39✔
473
            if (auto* access_node = dynamic_cast<data_flow::AccessNode*>(access)) {
39✔
474
                if (access_node->data() == candidate.container) {
27✔
475
                    // This is the producer access for this candidate - update to temp
476
                    access_node->data(temp_name);
12✔
477
                }
12✔
478
            }
27✔
479
        }
39✔
480

481
        // Add memlets with index substitution using this candidate's index_mappings
482
        for (auto& edge : first_dataflow.edges()) {
27✔
483
            auto& src_node = edge.src();
27✔
484
            auto& dst_node = edge.dst();
27✔
485

486
            // Substitute all producer indvars in subset
487
            const types::IType* base_type = &edge.base_type();
27✔
488
            data_flow::Subset new_subset;
27✔
489
            for (const auto& dim : edge.subset()) {
28✔
490
                auto new_dim = dim;
28✔
491
                for (const auto& [pvar, mapping] : candidate.index_mappings) {
36✔
492
                    new_dim = symbolic::subs(new_dim, pvar, mapping);
36✔
493
                }
36✔
494
                new_dim = symbolic::expand(new_dim);
28✔
495
                new_subset.push_back(new_dim);
28✔
496
            }
28✔
497

498
            // For output edges (to temp scalar), use empty subset
499
            auto* dst_access = dynamic_cast<data_flow::AccessNode*>(&dst_node);
27✔
500
            if (dst_access != nullptr && dst_access->data() == candidate.container &&
27✔
501
                first_dataflow.in_degree(*dst_access) > 0) {
27✔
502
                new_subset.clear(); // Scalar has empty subset
12✔
503
                base_type = &tmp_type;
12✔
504
            }
12✔
505

506
            builder.add_memlet(
27✔
507
                producer_block,
27✔
508
                *node_mapping[&src_node],
27✔
509
                edge.src_conn(),
27✔
510
                *node_mapping[&dst_node],
27✔
511
                edge.dst_conn(),
27✔
512
                new_subset,
27✔
513
                *base_type,
27✔
514
                edge.debug_info()
27✔
515
            );
27✔
516
        }
27✔
517
    }
12✔
518

519
    // Now update all read accesses in consumer blocks to point to the appropriate temp
520
    // We need to match each access node's memlet subset to find the right candidate
521
    size_t num_producer_blocks = fusion_candidates_.size();
11✔
522

523
    for (size_t block_idx = num_producer_blocks; block_idx < second_root->size(); ++block_idx) {
23✔
524
        auto* block = dynamic_cast<structured_control_flow::Block*>(&second_root->at(block_idx).first);
12✔
525
        if (block == nullptr) {
12✔
526
            continue;
×
527
        }
×
528

529
        auto& dataflow = block->dataflow();
12✔
530

531
        // Find all read access nodes
532
        for (auto& node : dataflow.nodes()) {
42✔
533
            auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
42✔
534
            if (access == nullptr) {
42✔
535
                continue;
13✔
536
            }
13✔
537

538
            // Only update read accesses (out_degree > 0)
539
            if (dataflow.out_degree(*access) == 0) {
29✔
540
                continue;
13✔
541
            }
13✔
542

543
            // Capture original container name before any modifications
544
            std::string original_container = access->data();
16✔
545

546
            // Check each outgoing memlet to find which candidates match
547
            for (auto& memlet : dataflow.out_edges(*access)) {
17✔
548
                if (memlet.type() != data_flow::MemletType::Computational) {
17✔
549
                    continue;
×
550
                }
×
551

552
                const auto& memlet_subset = memlet.subset();
17✔
553

554
                // Find matching candidate by container and subset
555
                for (size_t cand_idx = 0; cand_idx < fusion_candidates_.size(); ++cand_idx) {
21✔
556
                    auto& candidate = fusion_candidates_[cand_idx];
18✔
557

558
                    if (original_container != candidate.container) {
18✔
559
                        continue;
3✔
560
                    }
3✔
561

562
                    // Check if subset matches
563
                    if (memlet_subset.size() != candidate.consumer_subset.size()) {
15✔
564
                        continue;
×
565
                    }
×
566

567
                    bool subset_matches = true;
15✔
568
                    for (size_t d = 0; d < memlet_subset.size(); ++d) {
31✔
569
                        if (!symbolic::eq(memlet_subset[d], candidate.consumer_subset[d])) {
17✔
570
                            subset_matches = false;
1✔
571
                            break;
1✔
572
                        }
1✔
573
                    }
17✔
574

575
                    if (!subset_matches) {
15✔
576
                        continue;
1✔
577
                    }
1✔
578

579
                    // Found a match - update the access node and memlet
580
                    const auto& temp_name = candidate_temps[cand_idx];
14✔
581
                    auto& temp_type = sdfg.type(temp_name);
14✔
582

583
                    access->data(temp_name);
14✔
584

585
                    // Update this memlet
586
                    memlet.set_subset({});
14✔
587
                    memlet.set_base_type(temp_type);
14✔
588

589
                    // Also update any incoming edges
590
                    for (auto& in_edge : dataflow.in_edges(*access)) {
14✔
591
                        in_edge.set_subset({});
×
592
                        in_edge.set_base_type(temp_type);
×
593
                    }
×
594

595
                    break; // Found the matching candidate
14✔
596
                }
15✔
597
            }
17✔
598
        }
16✔
599
    }
12✔
600

601
    analysis_manager.invalidate_all();
11✔
602
    applied_ = true;
11✔
603
}
11✔
604

605
void MapFusion::to_json(nlohmann::json& j) const {
1✔
606
    std::string second_type = "for";
1✔
607
    if (dynamic_cast<structured_control_flow::Map*>(&second_loop_) != nullptr) {
1✔
608
        second_type = "map";
1✔
609
    }
1✔
610
    j["transformation_type"] = this->name();
1✔
611
    j["subgraph"] = {
1✔
612
        {"0", {{"element_id", first_map_.element_id()}, {"type", "map"}}},
1✔
613
        {"1", {{"element_id", second_loop_.element_id()}, {"type", second_type}}}
1✔
614
    };
1✔
615
}
1✔
616

617
MapFusion MapFusion::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
618
    auto first_map_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
619
    auto second_loop_id = desc["subgraph"]["1"]["element_id"].get<size_t>();
1✔
620

621
    auto first_element = builder.find_element_by_id(first_map_id);
1✔
622
    auto second_element = builder.find_element_by_id(second_loop_id);
1✔
623

624
    if (first_element == nullptr) {
1✔
625
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(first_map_id) + " not found.");
×
626
    }
×
627
    if (second_element == nullptr) {
1✔
628
        throw InvalidTransformationDescriptionException(
×
629
            "Element with ID " + std::to_string(second_loop_id) + " not found."
×
630
        );
×
631
    }
×
632

633
    auto* first_map = dynamic_cast<structured_control_flow::Map*>(first_element);
1✔
634
    auto* second_loop = dynamic_cast<structured_control_flow::StructuredLoop*>(second_element);
1✔
635

636
    if (first_map == nullptr) {
1✔
637
        throw InvalidTransformationDescriptionException(
×
638
            "Element with ID " + std::to_string(first_map_id) + " is not a Map."
×
639
        );
×
640
    }
×
641
    if (second_loop == nullptr) {
1✔
642
        throw InvalidTransformationDescriptionException(
×
643
            "Element with ID " + std::to_string(second_loop_id) + " is not a StructuredLoop."
×
644
        );
×
645
    }
×
646

647
    return MapFusion(*first_map, *second_loop);
1✔
648
}
1✔
649

650
} // namespace transformations
651
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc