• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 23087278095

14 Mar 2026 11:44AM UTC coverage: 63.927% (+0.3%) from 63.617%
23087278095

push

github

web-flow
Merge pull request #568 from daisytuner/dead-data-elimination

Working on memory ownership & escape analysis

475 of 637 new or added lines in 28 files covered. (74.57%)

6 existing lines in 3 files now uncovered.

26010 of 40687 relevant lines covered (63.93%)

402.05 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.66
/sdfg/src/transformations/map_fusion.cpp
1
#include "sdfg/transformations/map_fusion.h"
2

3
#include <isl/ctx.h>
4
#include <isl/map.h>
5
#include <isl/options.h>
6
#include <isl/set.h>
7
#include <isl/space.h>
8
#include <symengine/solve.h>
9
#include "sdfg/analysis/arguments_analysis.h"
10
#include "sdfg/analysis/loop_analysis.h"
11
#include "sdfg/analysis/scope_analysis.h"
12
#include "sdfg/analysis/users.h"
13

14
#include "sdfg/analysis/assumptions_analysis.h"
15
#include "sdfg/control_flow/interstate_edge.h"
16
#include "sdfg/data_flow/data_flow_graph.h"
17
#include "sdfg/structured_control_flow/block.h"
18
#include "sdfg/structured_control_flow/for.h"
19
#include "sdfg/symbolic/utils.h"
20

21
namespace sdfg {
22
namespace transformations {
23

24
MapFusion::MapFusion(structured_control_flow::Map& first_map, structured_control_flow::StructuredLoop& second_loop)
25
    : first_map_(first_map), second_loop_(second_loop) {}
29✔
26

27
std::string MapFusion::name() const { return "MapFusion"; }
2✔
28

29
std::vector<std::pair<symbolic::Symbol, symbolic::Expression>> MapFusion::solve_subsets(
30
    const data_flow::Subset& producer_subset,
31
    const data_flow::Subset& consumer_subset,
32
    const std::vector<structured_control_flow::StructuredLoop*>& producer_loops,
33
    const std::vector<structured_control_flow::StructuredLoop*>& consumer_loops,
34
    const symbolic::Assumptions& producer_assumptions,
35
    const symbolic::Assumptions& consumer_assumptions
36
) {
26✔
37
    // Delinearize subsets to recover multi-dimensional structure from linearized accesses
38
    // e.g. T[i*N + j] with assumptions on bounds -> T[i, j]
39
    auto producer_sub = symbolic::delinearize(producer_subset, producer_assumptions);
26✔
40
    auto consumer_sub = symbolic::delinearize(consumer_subset, consumer_assumptions);
26✔
41

42
    // Subset dimensions must match
43
    if (producer_sub.size() != consumer_sub.size()) {
26✔
44
        return {};
×
45
    }
×
46
    if (producer_sub.empty()) {
26✔
47
        return {};
×
48
    }
×
49

50
    // Extract producer indvars
51
    SymEngine::vec_sym producer_vars;
26✔
52
    for (auto* loop : producer_loops) {
32✔
53
        producer_vars.push_back(SymEngine::rcp_static_cast<const SymEngine::Symbol>(loop->indvar()));
32✔
54
    }
32✔
55

56
    // Step 1: Solve the linear equation system using SymEngine
57
    // System: producer_sub[d] - consumer_sub[d] = 0, for each dimension d
58
    // Solve for producer_vars in terms of consumer_vars and parameters
59
    SymEngine::vec_basic equations;
26✔
60
    for (size_t d = 0; d < producer_sub.size(); ++d) {
58✔
61
        equations.push_back(symbolic::sub(producer_sub.at(d), consumer_sub.at(d)));
32✔
62
    }
32✔
63

64
    // Need exactly as many equations as unknowns for a unique solution.
65
    // Underdetermined systems (e.g. linearized access with multiple loop vars)
66
    // cannot be uniquely solved and would crash linsolve.
67
    if (equations.size() != producer_vars.size()) {
26✔
68
        return {};
×
69
    }
×
70

71
    SymEngine::vec_basic solution;
26✔
72
    try {
26✔
73
        solution = SymEngine::linsolve(equations, producer_vars);
26✔
74
    } catch (...) {
26✔
75
        return {};
×
76
    }
×
77
    if (solution.size() != producer_vars.size()) {
26✔
78
        return {};
×
79
    }
×
80
    // Build consumer var set for atom validation
81
    symbolic::SymbolSet consumer_var_set;
26✔
82
    for (auto* loop : consumer_loops) {
32✔
83
        consumer_var_set.insert(loop->indvar());
32✔
84
    }
32✔
85

86
    std::vector<std::pair<symbolic::Symbol, symbolic::Expression>> mappings;
26✔
87
    for (size_t i = 0; i < producer_vars.size(); ++i) {
58✔
88
        auto& sol = solution[i];
32✔
89

90
        // Check for invalid solutions
91
        if (SymEngine::is_a<SymEngine::NaN>(*sol) || SymEngine::is_a<SymEngine::Infty>(*sol)) {
32✔
92
            return {};
×
93
        }
×
94

95
        // Validate that solution atoms are consumer vars or parameters
96
        for (const auto& atom : symbolic::atoms(sol)) {
34✔
97
            if (consumer_var_set.count(atom)) {
34✔
98
                continue;
34✔
99
            }
34✔
100
            bool is_param = false;
×
101
            auto it = consumer_assumptions.find(atom);
×
102
            if (it != consumer_assumptions.end() && it->second.constant()) {
×
103
                is_param = true;
×
104
            }
×
105
            if (!is_param) {
×
106
                it = producer_assumptions.find(atom);
×
107
                if (it != producer_assumptions.end() && it->second.constant()) {
×
108
                    is_param = true;
×
109
                }
×
110
            }
×
111
            if (!is_param) {
×
112
                return {};
×
113
            }
×
114
        }
×
115

116
        mappings.push_back({symbolic::symbol(producer_vars[i]->get_name()), symbolic::expand(sol)});
32✔
117
    }
32✔
118
    // Step 2: ISL integrality validation via map composition
119
    // Build an unconstrained producer access map (no domain bounds on producer vars).
120
    // In map fusion, the producer's computation is inlined into the consumer, so
121
    // the producer's original iteration domain is irrelevant. We only need to verify
122
    // that the equation system has an INTEGER solution for every consumer point.
123
    symbolic::Assumptions unconstrained_producer;
26✔
124
    for (auto* loop : producer_loops) {
32✔
125
        symbolic::Assumption a(loop->indvar());
32✔
126
        a.constant(false);
32✔
127
        unconstrained_producer[loop->indvar()] = a;
32✔
128
    }
32✔
129
    for (const auto& [sym, assump] : producer_assumptions) {
64✔
130
        if (assump.constant() && unconstrained_producer.find(sym) == unconstrained_producer.end()) {
64✔
131
            unconstrained_producer[sym] = assump;
32✔
132
        }
32✔
133
    }
64✔
134

135
    std::string producer_map_str = symbolic::expression_to_map_str(producer_sub, unconstrained_producer);
26✔
136
    // Build consumer access map with full domain constraints
137
    std::string consumer_map_str = symbolic::expression_to_map_str(consumer_sub, consumer_assumptions);
26✔
138

139
    isl_ctx* ctx = isl_ctx_alloc();
26✔
140
    isl_options_set_on_error(ctx, ISL_ON_ERROR_CONTINUE);
26✔
141

142
    isl_map* producer_map = isl_map_read_from_str(ctx, producer_map_str.c_str());
26✔
143
    isl_map* consumer_map = isl_map_read_from_str(ctx, consumer_map_str.c_str());
26✔
144

145
    if (!producer_map || !consumer_map) {
26✔
146
        if (producer_map) isl_map_free(producer_map);
×
147
        if (consumer_map) isl_map_free(consumer_map);
×
148
        isl_ctx_free(ctx);
×
149
        return {};
×
150
    }
×
151

152
    // Align parameters between the two maps
153
    isl_space* params_p = isl_space_params(isl_map_get_space(producer_map));
26✔
154
    isl_space* params_c = isl_space_params(isl_map_get_space(consumer_map));
26✔
155
    isl_space* unified = isl_space_align_params(isl_space_copy(params_p), isl_space_copy(params_c));
26✔
156
    isl_space_free(params_p);
26✔
157
    isl_space_free(params_c);
26✔
158

159
    producer_map = isl_map_align_params(producer_map, isl_space_copy(unified));
26✔
160
    consumer_map = isl_map_align_params(consumer_map, isl_space_copy(unified));
26✔
161

162
    // Save consumer domain before consuming consumer_map in composition
163
    isl_set* consumer_domain = isl_map_domain(isl_map_copy(consumer_map));
26✔
164

165
    // Compute composition: consumer_access ∘ inverse(producer_access)
166
    // This checks whether the equation system producer_subset = consumer_subset
167
    // has an integer solution for each consumer domain point.
168
    isl_map* producer_inverse = isl_map_reverse(producer_map);
26✔
169
    isl_map* composition = isl_map_apply_range(consumer_map, producer_inverse);
26✔
170

171
    // Check single-valuedness: each consumer point maps to at most one producer point
172
    bool single_valued = isl_map_is_single_valued(composition) == isl_bool_true;
26✔
173

174
    // Check domain coverage: every consumer point has a valid integer mapping
175
    isl_set* comp_domain = isl_map_domain(composition);
26✔
176

177
    bool domain_covered = isl_set_is_subset(consumer_domain, comp_domain) == isl_bool_true;
26✔
178

179
    isl_set_free(comp_domain);
26✔
180
    isl_set_free(consumer_domain);
26✔
181

182
    // Step 3: Verify producer write range covers consumer read range.
183
    // The producer only writes a subset of the array if its loops have restricted bounds.
184
    // Fusion is invalid if the consumer reads elements the producer never writes.
185
    bool range_covered = false;
26✔
186
    if (single_valued && domain_covered) {
26✔
187
        std::string constrained_producer_map_str = symbolic::expression_to_map_str(producer_sub, producer_assumptions);
24✔
188
        isl_map* constrained_producer = isl_map_read_from_str(ctx, constrained_producer_map_str.c_str());
24✔
189
        isl_map* consumer_map_copy = isl_map_read_from_str(ctx, consumer_map_str.c_str());
24✔
190

191
        if (constrained_producer && consumer_map_copy) {
24✔
192
            constrained_producer = isl_map_align_params(constrained_producer, isl_space_copy(unified));
24✔
193
            consumer_map_copy = isl_map_align_params(consumer_map_copy, isl_space_copy(unified));
24✔
194

195
            isl_set* producer_range = isl_map_range(constrained_producer);
24✔
196
            isl_set* consumer_range = isl_map_range(consumer_map_copy);
24✔
197

198
            range_covered = isl_set_is_subset(consumer_range, producer_range) == isl_bool_true;
24✔
199

200
            isl_set_free(producer_range);
24✔
201
            isl_set_free(consumer_range);
24✔
202
        } else {
24✔
203
            if (constrained_producer) isl_map_free(constrained_producer);
×
204
            if (consumer_map_copy) isl_map_free(consumer_map_copy);
×
205
        }
×
206
    }
24✔
207

208
    isl_space_free(unified);
26✔
209
    isl_ctx_free(ctx);
26✔
210

211
    if (!single_valued || !domain_covered || !range_covered) {
26✔
212
        return {};
4✔
213
    }
4✔
214

215
    return mappings;
22✔
216
}
26✔
217

218
bool MapFusion::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
27✔
219
    fusion_candidates_.clear();
27✔
220

221
    // no use in fusing empty loops. Also presumed to not be empty further down
222
    if (first_map_.root().size() == 0 || second_loop_.root().size() == 0) {
27✔
NEW
223
        return false;
×
NEW
224
    }
×
225

226
    // Criterion: Get parent scope and verify both loops are sequential children
227
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
27✔
228
    auto* first_parent = scope_analysis.parent_scope(&first_map_);
27✔
229
    auto* second_parent = scope_analysis.parent_scope(&second_loop_);
27✔
230
    if (first_parent == nullptr || second_parent == nullptr) {
27✔
231
        return false;
×
232
    }
×
233
    if (first_parent != second_parent) {
27✔
234
        return false;
×
235
    }
×
236

237
    auto* parent_sequence = dynamic_cast<structured_control_flow::Sequence*>(first_parent);
27✔
238
    if (parent_sequence == nullptr) {
27✔
239
        return false;
×
240
    }
×
241

242
    int first_index = parent_sequence->index(first_map_);
27✔
243
    int second_index = parent_sequence->index(second_loop_);
27✔
244
    if (first_index == -1 || second_index == -1) {
27✔
245
        return false;
×
246
    }
×
247
    if (second_index != first_index + 1) {
27✔
248
        return false;
1✔
249
    }
1✔
250

251
    // Criterion: Transition between maps should have no assignments
252
    auto& transition = parent_sequence->at(first_index).second;
26✔
253
    if (!transition.empty()) {
26✔
254
        return false;
×
255
    }
×
256
    // Criterion: First loop is perfectly nested
257
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
26✔
258
    auto first_loop_info = loop_analysis.loop_info(&first_map_);
26✔
259
    if (!first_loop_info.is_perfectly_nested) {
26✔
260
        return false;
×
261
    }
×
262
    if (!first_loop_info.is_perfectly_parallel) {
26✔
263
        return false;
×
264
    }
×
265
    std::vector<structured_control_flow::StructuredLoop*> producer_loops = {&first_map_};
26✔
266
    structured_control_flow::Sequence* producer_body = &first_map_.root();
26✔
267
    structured_control_flow::ControlFlowNode* producer_node = &first_map_.root().at(0).first;
26✔
268
    while (auto* nested = dynamic_cast<structured_control_flow::Map*>(producer_node)) {
33✔
269
        producer_loops.push_back(nested);
7✔
270
        producer_body = &nested->root();
7✔
271
        producer_node = &nested->root().at(0).first;
7✔
272
    }
7✔
273
    auto* producer_block_ptr = dynamic_cast<structured_control_flow::Block*>(producer_node);
26✔
274
    if (producer_block_ptr == nullptr) {
26✔
275
        return false;
×
276
    }
×
277

278
    // Criterion: Second loop is perfectly nested (but can have non-parallel loops)
279
    auto second_loop_info = loop_analysis.loop_info(&second_loop_);
26✔
280
    if (!second_loop_info.is_perfectly_nested) {
26✔
281
        return false;
×
282
    }
×
283
    std::vector<structured_control_flow::StructuredLoop*> consumer_loops = {&second_loop_};
26✔
284
    structured_control_flow::Sequence* consumer_body = &second_loop_.root();
26✔
285
    structured_control_flow::ControlFlowNode* consumer_node = &second_loop_.root().at(0).first;
26✔
286
    while (auto* nested = dynamic_cast<structured_control_flow::StructuredLoop*>(consumer_node)) {
32✔
287
        consumer_loops.push_back(nested);
6✔
288
        consumer_body = &nested->root();
6✔
289
        consumer_node = &nested->root().at(0).first;
6✔
290
    }
6✔
291

292
    // Get arguments analysis to identify inputs/outputs of each loop
293
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
26✔
294
    auto first_args = arguments_analysis.arguments(analysis_manager, first_map_);
26✔
295
    auto second_args = arguments_analysis.arguments(analysis_manager, second_loop_);
26✔
296

297
    std::unordered_set<std::string> first_inputs;
26✔
298
    std::unordered_set<std::string> first_outputs;
26✔
299
    for (const auto& [name, arg] : first_args) {
85✔
300
        if (arg.is_output) {
85✔
301
            first_outputs.insert(name);
26✔
302
        }
26✔
303
        if (arg.is_input) {
85✔
304
            first_inputs.insert(name);
85✔
305
        }
85✔
306
    }
85✔
307

308
    std::unordered_set<std::string> fusion_containers;
26✔
309
    for (const auto& [name, arg] : second_args) {
85✔
310
        if (first_outputs.contains(name)) {
85✔
311
            if (arg.is_output) {
24✔
312
                return false;
×
313
            }
×
314
            if (arg.is_input) {
24✔
315
                fusion_containers.insert(name);
24✔
316
            }
24✔
317
        }
24✔
318
        if (first_inputs.contains(name) && arg.is_output) {
85✔
319
            return false;
1✔
320
        }
1✔
321
    }
85✔
322
    if (fusion_containers.empty()) {
25✔
323
        return false;
1✔
324
    }
1✔
325
    // Get assumptions for the innermost blocks (includes all enclosing loop bounds)
326
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
24✔
327
    auto& producer_assumptions = assumptions_analysis.get(*producer_node);
24✔
328
    auto& consumer_assumptions = assumptions_analysis.get(consumer_body->at(0).first);
24✔
329

330
    // For each fusion container, find the producer memlet and collect unique consumer subsets
331
    auto& first_dataflow = producer_block_ptr->dataflow();
24✔
332
    for (const auto& container : fusion_containers) {
24✔
333
        // Find unique producer in first map (producer)
334
        data_flow::Memlet* producer_memlet = nullptr;
24✔
335

336
        for (auto& node : first_dataflow.nodes()) {
75✔
337
            auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
75✔
338
            if (access == nullptr || access->data() != container) {
75✔
339
                continue;
51✔
340
            }
51✔
341
            if (first_dataflow.in_degree(*access) != 1 || first_dataflow.out_degree(*access) != 0) {
24✔
342
                return false;
×
343
            }
×
344
            auto& iedge = *first_dataflow.in_edges(*access).begin();
24✔
345
            if (iedge.type() != data_flow::MemletType::Computational) {
24✔
346
                return false;
×
347
            }
×
348
            if (producer_memlet != nullptr) {
24✔
349
                return false;
×
350
            }
×
351
            producer_memlet = &iedge;
24✔
352
        }
24✔
353
        if (producer_memlet == nullptr) {
24✔
354
            return false;
×
355
        }
×
356

357
        const auto& producer_subset = producer_memlet->subset();
24✔
358
        if (producer_subset.empty()) {
24✔
359
            return false;
×
360
        }
×
361

362
        // Collect all unique subsets from consumer blocks
363
        // Use a vector of subsets and deduplicate manually
364
        std::vector<data_flow::Subset> unique_subsets;
24✔
365
        for (size_t i = 0; i < consumer_body->size(); ++i) {
48✔
366
            auto* block = dynamic_cast<structured_control_flow::Block*>(&consumer_body->at(i).first);
25✔
367
            if (block == nullptr) {
25✔
368
                return false;
×
369
            }
×
370

371
            auto& dataflow = block->dataflow();
25✔
372
            for (auto& node : dataflow.nodes()) {
86✔
373
                auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
86✔
374
                if (access == nullptr || access->data() != container) {
86✔
375
                    continue;
58✔
376
                }
58✔
377
                if (dataflow.in_degree(*access) != 0 || dataflow.out_degree(*access) == 0) {
28✔
378
                    return false;
×
379
                }
×
380

381
                // Check all read memlets from this access
382
                for (auto& memlet : dataflow.out_edges(*access)) {
29✔
383
                    if (memlet.type() != data_flow::MemletType::Computational) {
29✔
384
                        return false;
×
385
                    }
×
386

387
                    auto& consumer_subset = memlet.subset();
29✔
388
                    if (consumer_subset.size() != producer_subset.size()) {
29✔
389
                        return false; // Dimension mismatch
1✔
390
                    }
1✔
391

392
                    // Check if this subset is already in unique_subsets
393
                    bool found = false;
28✔
394
                    for (const auto& existing : unique_subsets) {
28✔
395
                        if (existing.size() != consumer_subset.size()) continue;
6✔
396
                        bool match = true;
6✔
397
                        for (size_t d = 0; d < existing.size(); ++d) {
8✔
398
                            if (!symbolic::eq(existing[d], consumer_subset[d])) {
6✔
399
                                match = false;
4✔
400
                                break;
4✔
401
                            }
4✔
402
                        }
6✔
403
                        if (match) {
6✔
404
                            found = true;
2✔
405
                            break;
2✔
406
                        }
2✔
407
                    }
6✔
408
                    if (!found) {
28✔
409
                        unique_subsets.push_back(consumer_subset);
26✔
410
                    }
26✔
411
                }
28✔
412
            }
28✔
413
        }
25✔
414

415
        // For each unique consumer subset, solve index mappings and create a FusionCandidate
416
        for (const auto& consumer_subset : unique_subsets) {
26✔
417
            auto mappings = solve_subsets(
26✔
418
                producer_subset,
26✔
419
                consumer_subset,
26✔
420
                producer_loops,
26✔
421
                consumer_loops,
26✔
422
                producer_assumptions,
26✔
423
                consumer_assumptions
26✔
424
            );
26✔
425
            if (mappings.empty()) {
26✔
426
                return false;
4✔
427
            }
4✔
428

429
            FusionCandidate candidate;
22✔
430
            candidate.container = container;
22✔
431
            candidate.consumer_subset = consumer_subset;
22✔
432
            candidate.index_mappings = std::move(mappings);
22✔
433

434
            fusion_candidates_.push_back(candidate);
22✔
435
        }
22✔
436
    }
23✔
437

438
    // Criterion: At least one valid fusion candidate
439
    return !fusion_candidates_.empty();
19✔
440
}
24✔
441

442
void MapFusion::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
11✔
443
    auto& sdfg = builder.subject();
11✔
444

445
    // Navigate to the innermost block of the first map (handling nested maps)
446
    structured_control_flow::ControlFlowNode* first_block_node = &first_map_.root().at(0).first;
11✔
447
    while (auto* nested_map = dynamic_cast<structured_control_flow::Map*>(first_block_node)) {
13✔
448
        first_block_node = &nested_map->root().at(0).first;
2✔
449
    }
2✔
450
    auto* first_block = dynamic_cast<structured_control_flow::Block*>(first_block_node);
11✔
451
    auto& first_dataflow = first_block->dataflow();
11✔
452

453
    // Navigate to the innermost consumer sequence (handling nested loops)
454
    structured_control_flow::Sequence* second_root = &second_loop_.root();
11✔
455
    structured_control_flow::ControlFlowNode* consumer_node = &second_loop_.root().at(0).first;
11✔
456
    while (auto* nested = dynamic_cast<structured_control_flow::StructuredLoop*>(consumer_node)) {
13✔
457
        second_root = &nested->root();
2✔
458
        consumer_node = &nested->root().at(0).first;
2✔
459
    }
2✔
460

461
    // For each fusion candidate (unique container+subset pair), create a temp and insert a producer block
462
    // Track: candidate_index -> temp_name
463
    std::vector<std::string> candidate_temps;
11✔
464

465
    for (size_t cand_idx = 0; cand_idx < fusion_candidates_.size(); ++cand_idx) {
23✔
466
        auto& candidate = fusion_candidates_[cand_idx];
12✔
467

468
        // Create a temp scalar for this candidate
469
        auto& container_type = sdfg.type(candidate.container);
12✔
470
        std::string temp_name = builder.find_new_name("_fused_tmp");
12✔
471
        types::Scalar tmp_type(container_type.primitive_type());
12✔
472
        builder.add_container(temp_name, tmp_type);
12✔
473
        candidate_temps.push_back(temp_name);
12✔
474

475
        // Insert a producer block at the beginning of the consumer's innermost body
476
        auto& first_child = second_root->at(0).first;
12✔
477
        control_flow::Assignments empty_assignments;
12✔
478
        auto& producer_block = builder.add_block_before(*second_root, first_child, empty_assignments);
12✔
479

480
        // Deep copy all nodes from first block to producer block
481
        std::unordered_map<const data_flow::DataFlowNode*, data_flow::DataFlowNode*> node_mapping;
12✔
482
        for (auto& node : first_dataflow.nodes()) {
39✔
483
            node_mapping[&node] = &builder.copy_node(producer_block, node);
39✔
484
            auto access = node_mapping[&node];
39✔
485
            if (auto* access_node = dynamic_cast<data_flow::AccessNode*>(access)) {
39✔
486
                if (access_node->data() == candidate.container) {
27✔
487
                    // This is the producer access for this candidate - update to temp
488
                    access_node->data(temp_name);
12✔
489
                }
12✔
490
            }
27✔
491
        }
39✔
492

493
        // Add memlets with index substitution using this candidate's index_mappings
494
        for (auto& edge : first_dataflow.edges()) {
27✔
495
            auto& src_node = edge.src();
27✔
496
            auto& dst_node = edge.dst();
27✔
497

498
            // Substitute all producer indvars in subset
499
            const types::IType* base_type = &edge.base_type();
27✔
500
            data_flow::Subset new_subset;
27✔
501
            for (const auto& dim : edge.subset()) {
28✔
502
                auto new_dim = dim;
28✔
503
                for (const auto& [pvar, mapping] : candidate.index_mappings) {
36✔
504
                    new_dim = symbolic::subs(new_dim, pvar, mapping);
36✔
505
                }
36✔
506
                new_dim = symbolic::expand(new_dim);
28✔
507
                new_subset.push_back(new_dim);
28✔
508
            }
28✔
509

510
            // For output edges (to temp scalar), use empty subset
511
            auto* dst_access = dynamic_cast<data_flow::AccessNode*>(&dst_node);
27✔
512
            if (dst_access != nullptr && dst_access->data() == candidate.container &&
27✔
513
                first_dataflow.in_degree(*dst_access) > 0) {
27✔
514
                new_subset.clear(); // Scalar has empty subset
12✔
515
                base_type = &tmp_type;
12✔
516
            }
12✔
517

518
            builder.add_memlet(
27✔
519
                producer_block,
27✔
520
                *node_mapping[&src_node],
27✔
521
                edge.src_conn(),
27✔
522
                *node_mapping[&dst_node],
27✔
523
                edge.dst_conn(),
27✔
524
                new_subset,
27✔
525
                *base_type,
27✔
526
                edge.debug_info()
27✔
527
            );
27✔
528
        }
27✔
529
    }
12✔
530

531
    // Now update all read accesses in consumer blocks to point to the appropriate temp
532
    // We need to match each access node's memlet subset to find the right candidate
533
    size_t num_producer_blocks = fusion_candidates_.size();
11✔
534

535
    for (size_t block_idx = num_producer_blocks; block_idx < second_root->size(); ++block_idx) {
23✔
536
        auto* block = dynamic_cast<structured_control_flow::Block*>(&second_root->at(block_idx).first);
12✔
537
        if (block == nullptr) {
12✔
538
            continue;
×
539
        }
×
540

541
        auto& dataflow = block->dataflow();
12✔
542

543
        // Find all read access nodes
544
        for (auto& node : dataflow.nodes()) {
42✔
545
            auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
42✔
546
            if (access == nullptr) {
42✔
547
                continue;
13✔
548
            }
13✔
549

550
            // Only update read accesses (out_degree > 0)
551
            if (dataflow.out_degree(*access) == 0) {
29✔
552
                continue;
13✔
553
            }
13✔
554

555
            // Capture original container name before any modifications
556
            std::string original_container = access->data();
16✔
557

558
            // Check each outgoing memlet to find which candidates match
559
            for (auto& memlet : dataflow.out_edges(*access)) {
17✔
560
                if (memlet.type() != data_flow::MemletType::Computational) {
17✔
561
                    continue;
×
562
                }
×
563

564
                const auto& memlet_subset = memlet.subset();
17✔
565

566
                // Find matching candidate by container and subset
567
                for (size_t cand_idx = 0; cand_idx < fusion_candidates_.size(); ++cand_idx) {
21✔
568
                    auto& candidate = fusion_candidates_[cand_idx];
18✔
569

570
                    if (original_container != candidate.container) {
18✔
571
                        continue;
3✔
572
                    }
3✔
573

574
                    // Check if subset matches
575
                    if (memlet_subset.size() != candidate.consumer_subset.size()) {
15✔
576
                        continue;
×
577
                    }
×
578

579
                    bool subset_matches = true;
15✔
580
                    for (size_t d = 0; d < memlet_subset.size(); ++d) {
31✔
581
                        if (!symbolic::eq(memlet_subset[d], candidate.consumer_subset[d])) {
17✔
582
                            subset_matches = false;
1✔
583
                            break;
1✔
584
                        }
1✔
585
                    }
17✔
586

587
                    if (!subset_matches) {
15✔
588
                        continue;
1✔
589
                    }
1✔
590

591
                    // Found a match - update the access node and memlet
592
                    const auto& temp_name = candidate_temps[cand_idx];
14✔
593
                    auto& temp_type = sdfg.type(temp_name);
14✔
594

595
                    access->data(temp_name);
14✔
596

597
                    // Update this memlet
598
                    memlet.set_subset({});
14✔
599
                    memlet.set_base_type(temp_type);
14✔
600

601
                    // Also update any incoming edges
602
                    for (auto& in_edge : dataflow.in_edges(*access)) {
14✔
603
                        in_edge.set_subset({});
×
604
                        in_edge.set_base_type(temp_type);
×
605
                    }
×
606

607
                    break; // Found the matching candidate
14✔
608
                }
15✔
609
            }
17✔
610
        }
16✔
611
    }
12✔
612

613
    analysis_manager.invalidate_all();
11✔
614
    applied_ = true;
11✔
615
}
11✔
616

617
void MapFusion::to_json(nlohmann::json& j) const {
1✔
618
    std::string second_type = "for";
1✔
619
    if (dynamic_cast<structured_control_flow::Map*>(&second_loop_) != nullptr) {
1✔
620
        second_type = "map";
1✔
621
    }
1✔
622
    j["transformation_type"] = this->name();
1✔
623
    j["subgraph"] = {
1✔
624
        {"0", {{"element_id", first_map_.element_id()}, {"type", "map"}}},
1✔
625
        {"1", {{"element_id", second_loop_.element_id()}, {"type", second_type}}}
1✔
626
    };
1✔
627
}
1✔
628

629
MapFusion MapFusion::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
630
    auto first_map_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
631
    auto second_loop_id = desc["subgraph"]["1"]["element_id"].get<size_t>();
1✔
632

633
    auto first_element = builder.find_element_by_id(first_map_id);
1✔
634
    auto second_element = builder.find_element_by_id(second_loop_id);
1✔
635

636
    if (first_element == nullptr) {
1✔
637
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(first_map_id) + " not found.");
×
638
    }
×
639
    if (second_element == nullptr) {
1✔
640
        throw InvalidTransformationDescriptionException(
×
641
            "Element with ID " + std::to_string(second_loop_id) + " not found."
×
642
        );
×
643
    }
×
644

645
    auto* first_map = dynamic_cast<structured_control_flow::Map*>(first_element);
1✔
646
    auto* second_loop = dynamic_cast<structured_control_flow::StructuredLoop*>(second_element);
1✔
647

648
    if (first_map == nullptr) {
1✔
649
        throw InvalidTransformationDescriptionException(
×
650
            "Element with ID " + std::to_string(first_map_id) + " is not a Map."
×
651
        );
×
652
    }
×
653
    if (second_loop == nullptr) {
1✔
654
        throw InvalidTransformationDescriptionException(
×
655
            "Element with ID " + std::to_string(second_loop_id) + " is not a StructuredLoop."
×
656
        );
×
657
    }
×
658

659
    return MapFusion(*first_map, *second_loop);
1✔
660
}
1✔
661

662
} // namespace transformations
663
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc