• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 22113548439

17 Feb 2026 07:58PM UTC coverage: 66.495% (+0.05%) from 66.441%
22113548439

Pull #530

github

web-flow
Merge e83554cfd into 3b93d04b5
Pull Request #530: adds map fusion transformation

248 of 343 new or added lines in 4 files covered. (72.3%)

2 existing lines in 1 file now uncovered.

23829 of 35836 relevant lines covered (66.49%)

374.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.68
/sdfg/src/transformations/map_fusion.cpp
1
#include "sdfg/transformations/map_fusion.h"
2

3
#include "sdfg/analysis/arguments_analysis.h"
4
#include "sdfg/analysis/scope_analysis.h"
5
#include "sdfg/analysis/users.h"
6
#include "sdfg/control_flow/interstate_edge.h"
7
#include "sdfg/data_flow/data_flow_graph.h"
8
#include "sdfg/structured_control_flow/block.h"
9
#include "sdfg/structured_control_flow/for.h"
10
#include "sdfg/symbolic/polynomials.h"
11

12
namespace sdfg {
13
namespace transformations {
14

15
MapFusion::MapFusion(structured_control_flow::Map& first_map, structured_control_flow::StructuredLoop& second_loop)
16
    : first_map_(first_map), second_loop_(second_loop) {}
16✔
17

18
std::string MapFusion::name() const { return "MapFusion"; }
2✔
19

20
bool MapFusion::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
14✔
21
    fusion_candidates_.clear();
14✔
22

23
    // Criterion: Get parent scope and verify both loops are sequential children
24
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
14✔
25
    auto* first_parent = scope_analysis.parent_scope(&first_map_);
14✔
26
    auto* second_parent = scope_analysis.parent_scope(&second_loop_);
14✔
27

28
    if (first_parent == nullptr || second_parent == nullptr) {
14✔
NEW
29
        return false;
×
NEW
30
    }
×
31

32
    // Both must have the same parent sequence
33
    if (first_parent != second_parent) {
14✔
NEW
34
        return false;
×
NEW
35
    }
×
36

37
    auto* parent_sequence = dynamic_cast<structured_control_flow::Sequence*>(first_parent);
14✔
38
    if (parent_sequence == nullptr) {
14✔
NEW
39
        return false;
×
NEW
40
    }
×
41

42
    // Criterion: first_map must immediately precede second_loop in the sequence
43
    int first_index = parent_sequence->index(first_map_);
14✔
44
    int second_index = parent_sequence->index(second_loop_);
14✔
45

46
    if (first_index == -1 || second_index == -1) {
14✔
NEW
47
        return false;
×
NEW
48
    }
×
49

50
    if (second_index != first_index + 1) {
14✔
51
        return false;
1✔
52
    }
1✔
53

54
    // Criterion: Transition between maps should have no assignments
55
    auto& transition = parent_sequence->at(first_index).second;
13✔
56
    if (!transition.empty()) {
13✔
NEW
57
        return false;
×
NEW
58
    }
×
59

60
    // Criterion: First map must have simple body (single block)
61
    // This ensures no WAR/WAW hazards when replicating the producer
62
    if (first_map_.root().size() != 1) {
13✔
NEW
63
        return false;
×
NEW
64
    }
×
65

66
    auto* first_block = dynamic_cast<structured_control_flow::Block*>(&first_map_.root().at(0).first);
13✔
67
    if (first_block == nullptr) {
13✔
NEW
68
        return false;
×
NEW
69
    }
×
70

71
    // Criterion: First block's transition should be empty
72
    if (!first_map_.root().at(0).second.empty()) {
13✔
NEW
73
        return false;
×
NEW
74
    }
×
75

76
    // Get arguments analysis to identify inputs/outputs of each loop
77
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
13✔
78
    auto first_args = arguments_analysis.arguments(analysis_manager, first_map_);
13✔
79
    auto second_args = arguments_analysis.arguments(analysis_manager, second_loop_);
13✔
80

81
    // Find containers that are outputs of first map and inputs of second map
82
    std::unordered_set<std::string> first_outputs;
13✔
83
    for (const auto& [name, arg] : first_args) {
39✔
84
        if (arg.is_output) {
39✔
85
            first_outputs.insert(name);
13✔
86
        }
13✔
87
    }
39✔
88

89
    std::unordered_set<std::string> fusion_containers;
13✔
90
    for (const auto& [name, arg] : second_args) {
40✔
91
        if (arg.is_input && first_outputs.contains(name)) {
40✔
92
            // Criterion: Skip scalars - only fuse array/pointer accesses
93
            if (arg.is_scalar) {
12✔
NEW
94
                continue;
×
NEW
95
            }
×
96
            fusion_containers.insert(name);
12✔
97
        }
12✔
98
    }
40✔
99

100
    if (fusion_containers.empty()) {
13✔
101
        return false;
1✔
102
    }
1✔
103

104
    // Analyze memory access patterns for each fusion candidate
105
    auto& first_dataflow = first_block->dataflow();
12✔
106

107
    auto first_indvar = first_map_.indvar();
12✔
108
    auto second_indvar = second_loop_.indvar();
12✔
109

110
    // For each fusion container, check if we can solve the access equation
111
    for (const auto& container : fusion_containers) {
12✔
112
        // Find write accesses in first map
113
        data_flow::AccessNode* producer_access = nullptr;
12✔
114
        data_flow::Memlet* producer_memlet = nullptr;
12✔
115

116
        for (auto& node : first_dataflow.nodes()) {
22✔
117
            auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
22✔
118
            if (access == nullptr || access->data() != container) {
22✔
119
                continue;
10✔
120
            }
10✔
121

122
            // Check if this is a write (has incoming edges)
123
            if (first_dataflow.in_degree(*access) > 0) {
12✔
124
                // Get the write memlet
125
                for (auto& memlet : first_dataflow.in_edges(*access)) {
12✔
126
                    if (memlet.type() == data_flow::MemletType::Computational) {
12✔
127
                        producer_access = access;
12✔
128
                        producer_memlet = &memlet;
12✔
129
                        break;
12✔
130
                    }
12✔
131
                }
12✔
132
                if (producer_access != nullptr) {
12✔
133
                    break;
12✔
134
                }
12✔
135
            }
12✔
136
        }
12✔
137

138
        if (producer_access == nullptr || producer_memlet == nullptr) {
12✔
NEW
139
            continue; // No valid producer found for this container
×
NEW
140
        }
×
141

142
        // Find read accesses in any block of the second loop
143
        data_flow::AccessNode* consumer_access = nullptr;
12✔
144
        data_flow::Memlet* consumer_memlet = nullptr;
12✔
145
        structured_control_flow::Block* consumer_block = nullptr;
12✔
146

147
        for (size_t i = 0; i < second_loop_.root().size(); ++i) {
12✔
148
            auto* block = dynamic_cast<structured_control_flow::Block*>(&second_loop_.root().at(i).first);
12✔
149
            if (block == nullptr) {
12✔
NEW
150
                continue;
×
NEW
151
            }
×
152

153
            auto& second_dataflow = block->dataflow();
12✔
154
            for (auto& node : second_dataflow.nodes()) {
42✔
155
                auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
42✔
156
                if (access == nullptr || access->data() != container) {
42✔
157
                    continue;
30✔
158
                }
30✔
159

160
                // Check if this is a read (has outgoing edges)
161
                if (second_dataflow.out_degree(*access) > 0) {
12✔
162
                    // Get the read memlet
163
                    for (auto& memlet : second_dataflow.out_edges(*access)) {
12✔
164
                        if (memlet.type() == data_flow::MemletType::Computational) {
12✔
165
                            consumer_access = access;
12✔
166
                            consumer_memlet = &memlet;
12✔
167
                            consumer_block = block;
12✔
168
                            break;
12✔
169
                        }
12✔
170
                    }
12✔
171
                    if (consumer_access != nullptr) {
12✔
172
                        break;
12✔
173
                    }
12✔
174
                }
12✔
175
            }
12✔
176
            if (consumer_access != nullptr) {
12✔
177
                break;
12✔
178
            }
12✔
179
        }
12✔
180

181
        if (consumer_access == nullptr || consumer_memlet == nullptr) {
12✔
NEW
182
            continue; // No valid consumer found for this container
×
NEW
183
        }
×
184

185
        // Extract subsets
186
        const auto& producer_subset = producer_memlet->subset();
12✔
187
        const auto& consumer_subset = consumer_memlet->subset();
12✔
188

189
        // Criterion: Subsets must have the same dimensionality
190
        if (producer_subset.size() != consumer_subset.size()) {
12✔
NEW
191
            continue;
×
NEW
192
        }
×
193

194
        // Criterion: For now, focus on 1D accesses (simple case)
195
        if (producer_subset.size() != 1) {
12✔
NEW
196
            continue;
×
NEW
197
        }
×
198

199
        // Solve the affine equation: producer_subset[first_indvar] = consumer_subset[second_indvar]
200
        // We need to find first_indvar in terms of second_indvar
201
        //
202
        // Given: producer_subset = a * first_indvar + b
203
        //        consumer_subset = c * second_indvar + d
204
        // If they access the same element: a * first_indvar + b = c * second_indvar + d
205
        // Solve for first_indvar: first_indvar = (c * second_indvar + d - b) / a
206

207
        auto producer_expr = producer_subset[0];
12✔
208
        auto consumer_expr = consumer_subset[0];
12✔
209

210
        // Extract affine coefficients for producer
211
        symbolic::SymbolVec producer_symbols = {first_indvar};
12✔
212
        auto producer_poly = symbolic::polynomial(producer_expr, producer_symbols);
12✔
213
        if (producer_poly.is_null()) {
12✔
NEW
214
            continue; // Not a polynomial
×
NEW
215
        }
×
216
        auto producer_coeffs = symbolic::affine_coefficients(producer_poly, producer_symbols);
12✔
217
        if (producer_coeffs.empty()) {
12✔
NEW
218
            continue; // Not affine
×
NEW
219
        }
×
220

221
        // Check that the coefficient of first_indvar is non-zero
222
        auto first_coeff = producer_coeffs[first_indvar];
12✔
223
        if (symbolic::eq(first_coeff, symbolic::zero())) {
12✔
NEW
224
            continue; // first_indvar doesn't appear in producer expression
×
NEW
225
        }
×
226

227
        // Compute the inverse: first_indvar = (consumer_expr - producer_constant) / first_coeff
228
        auto producer_constant = producer_coeffs[symbolic::symbol("__daisy_constant__")];
12✔
229

230
        // index_mapping = (consumer_expr - producer_constant) / first_coeff
231
        auto numerator = symbolic::sub(consumer_expr, producer_constant);
12✔
232
        auto index_mapping = symbolic::div(numerator, first_coeff);
12✔
233

234
        // Simplify and check that it only uses second_indvar and constants
235
        index_mapping = symbolic::expand(index_mapping);
12✔
236

237
        // Verify the mapping is valid (contains only second_indvar and constants)
238
        bool valid_mapping = true;
12✔
239
        for (const auto& atom : symbolic::atoms(index_mapping)) {
12✔
240
            std::string name = atom->get_name();
12✔
241
            if (name != second_indvar->get_name()) {
12✔
242
                // Check if it's a parameter (constant for both loops)
NEW
243
                bool is_param = false;
×
NEW
244
                for (const auto& [arg_name, arg] : second_args) {
×
NEW
245
                    if (arg_name == name && arg.is_scalar && !arg.is_output) {
×
NEW
246
                        is_param = true;
×
NEW
247
                        break;
×
NEW
248
                    }
×
NEW
249
                }
×
250
                // Also check if it's in first_args as input scalar
NEW
251
                if (!is_param) {
×
NEW
252
                    for (const auto& [arg_name, arg] : first_args) {
×
NEW
253
                        if (arg_name == name && arg.is_scalar && !arg.is_output) {
×
NEW
254
                            is_param = true;
×
NEW
255
                            break;
×
NEW
256
                        }
×
NEW
257
                    }
×
NEW
258
                }
×
NEW
259
                if (!is_param) {
×
NEW
260
                    valid_mapping = false;
×
NEW
261
                    break;
×
NEW
262
                }
×
NEW
263
            }
×
264
        }
12✔
265

266
        if (!valid_mapping) {
12✔
NEW
267
            continue;
×
NEW
268
        }
×
269

270
        // Additional check: verify that the mapping results in integer indices
271
        // For now, we require that the coefficient divides evenly
272
        // This is a simplified check - a more complete implementation would use ISL
273

274
        // Store the fusion candidate
275
        FusionCandidate candidate;
12✔
276
        candidate.container = container;
12✔
277
        candidate.consumer_access = consumer_access;
12✔
278
        candidate.producer_access = producer_access;
12✔
279
        candidate.consumer_memlet = consumer_memlet;
12✔
280
        candidate.producer_memlet = producer_memlet;
12✔
281
        candidate.consumer_block = consumer_block;
12✔
282
        candidate.index_mapping = index_mapping;
12✔
283

284
        fusion_candidates_.push_back(candidate);
12✔
285
    }
12✔
286

287
    // Criterion: At least one valid fusion candidate
288
    return !fusion_candidates_.empty();
12✔
289
}
13✔
290

291
void MapFusion::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
7✔
292
    auto& sdfg = builder.subject();
7✔
293

294
    // Get the producer block from first map
295
    auto* first_block = dynamic_cast<structured_control_flow::Block*>(&first_map_.root().at(0).first);
7✔
296
    auto& first_dataflow = first_block->dataflow();
7✔
297

298
    auto first_indvar = first_map_.indvar();
7✔
299

300
    // Insert a new producer block at the beginning of the second loop's body
301
    auto& second_root = second_loop_.root();
7✔
302
    auto& first_child = second_root.at(0).first;
7✔
303
    control_flow::Assignments empty_assignments;
7✔
304
    auto& producer_block = builder.add_block_before(second_root, first_child, empty_assignments);
7✔
305

306
    // Build a set of containers that are written to in the first block (outputs)
307
    std::unordered_set<std::string> output_containers;
7✔
308
    for (auto node : first_dataflow.data_nodes()) {
16✔
309
        if (dynamic_cast<data_flow::ConstantNode*>(node) != nullptr) {
16✔
310
            continue; // Skip constants
2✔
311
        }
2✔
312
        if (first_dataflow.in_degree(*node) > 0) {
14✔
313
            output_containers.insert(node->data());
7✔
314
        }
7✔
315
    }
14✔
316

317
    // Create temporary scalars for each output container
318
    std::unordered_map<std::string, std::string> container_to_temp;
7✔
319

320
    for (const auto& container : output_containers) {
7✔
321
        auto& type = sdfg.type(container);
7✔
322
        std::string temp_name = builder.find_new_name("_fused_tmp");
7✔
323
        types::Scalar tmp_type(type.primitive_type());
7✔
324
        builder.add_container(temp_name, tmp_type);
7✔
325
        container_to_temp[container] = temp_name;
7✔
326
    }
7✔
327

328
    // Deep copy all nodes from first block to producer block
329
    std::unordered_map<const data_flow::DataFlowNode*, data_flow::DataFlowNode*> node_mapping;
7✔
330

331
    for (auto& node : first_dataflow.nodes()) {
23✔
332
        auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
23✔
333
        if (access != nullptr) {
23✔
334
            // Check if this is an output access node (written to)
335
            if (first_dataflow.in_degree(*access) > 0 && container_to_temp.contains(access->data())) {
16✔
336
                // Replace with temp scalar access
337
                auto& temp_access = builder.add_access(producer_block, container_to_temp[access->data()]);
7✔
338
                node_mapping[&node] = &temp_access;
7✔
339
            } else {
9✔
340
                // Copy the access node as-is
341
                node_mapping[&node] = &builder.copy_node(producer_block, node);
9✔
342
            }
9✔
343
        } else {
16✔
344
            // Copy other nodes (tasklets, library nodes, etc.)
345
            node_mapping[&node] = &builder.copy_node(producer_block, node);
7✔
346
        }
7✔
347
    }
23✔
348

349
    // Add memlets with index substitution
350
    for (auto& edge : first_dataflow.edges()) {
16✔
351
        auto& src_node = edge.src();
16✔
352
        auto& dst_node = edge.dst();
16✔
353

354
        // Substitute indices in subset
355
        data_flow::Subset new_subset;
16✔
356
        for (const auto& dim : edge.subset()) {
16✔
357
            // For each fusion candidate, use its index_mapping
358
            auto new_dim = dim;
14✔
359
            for (auto& candidate : fusion_candidates_) {
14✔
360
                new_dim = symbolic::subs(new_dim, first_indvar, candidate.index_mapping);
14✔
361
            }
14✔
362
            new_dim = symbolic::expand(new_dim);
14✔
363
            new_subset.push_back(new_dim);
14✔
364
        }
14✔
365

366
        // For output edges (to temp scalars), use empty subset
367
        auto* dst_access = dynamic_cast<data_flow::AccessNode*>(&dst_node);
16✔
368
        if (dst_access != nullptr && container_to_temp.contains(dst_access->data())) {
16✔
369
            new_subset.clear(); // Scalar has empty subset
7✔
370
        }
7✔
371

372
        builder.add_memlet(
16✔
373
            producer_block,
16✔
374
            *node_mapping[&src_node],
16✔
375
            edge.src_conn(),
16✔
376
            *node_mapping[&dst_node],
16✔
377
            edge.dst_conn(),
16✔
378
            new_subset,
16✔
379
            edge.base_type(),
16✔
380
            edge.debug_info()
16✔
381
        );
16✔
382
    }
16✔
383

384
    // In the consumer blocks, replace reads from intermediate containers with reads from temp scalars
385
    for (auto& candidate : fusion_candidates_) {
7✔
386
        auto& consumer_block = *candidate.consumer_block;
7✔
387
        auto& consumer_dataflow = consumer_block.dataflow();
7✔
388
        const auto& temp_name = container_to_temp[candidate.container];
7✔
389
        auto& temp_type = sdfg.type(temp_name);
7✔
390

391
        auto* consumer_access = candidate.consumer_access;
7✔
392

393
        // Rename the access node to point to the temp scalar
394
        consumer_access->data(temp_name);
7✔
395

396
        // Update all outgoing edges: set empty subset (scalar) and new type
397
        for (auto& edge : consumer_dataflow.out_edges(*consumer_access)) {
8✔
398
            edge.set_subset({});
8✔
399
            edge.set_base_type(temp_type);
8✔
400
        }
8✔
401

402
        // Update all incoming edges: set empty subset (scalar) and new type
403
        for (auto& edge : consumer_dataflow.in_edges(*consumer_access)) {
7✔
NEW
404
            edge.set_subset({});
×
NEW
405
            edge.set_base_type(temp_type);
×
NEW
406
        }
×
407
    }
7✔
408

409
    analysis_manager.invalidate_all();
7✔
410
    applied_ = true;
7✔
411
}
7✔
412

413
void MapFusion::to_json(nlohmann::json& j) const {
1✔
414
    std::string second_type = "for";
1✔
415
    if (dynamic_cast<structured_control_flow::Map*>(&second_loop_) != nullptr) {
1✔
416
        second_type = "map";
1✔
417
    }
1✔
418
    j["transformation_type"] = this->name();
1✔
419
    j["subgraph"] = {
1✔
420
        {"0", {{"element_id", first_map_.element_id()}, {"type", "map"}}},
1✔
421
        {"1", {{"element_id", second_loop_.element_id()}, {"type", second_type}}}
1✔
422
    };
1✔
423
}
1✔
424

425
MapFusion MapFusion::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
426
    auto first_map_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
427
    auto second_loop_id = desc["subgraph"]["1"]["element_id"].get<size_t>();
1✔
428

429
    auto first_element = builder.find_element_by_id(first_map_id);
1✔
430
    auto second_element = builder.find_element_by_id(second_loop_id);
1✔
431

432
    if (first_element == nullptr) {
1✔
NEW
433
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(first_map_id) + " not found.");
×
NEW
434
    }
×
435
    if (second_element == nullptr) {
1✔
NEW
436
        throw InvalidTransformationDescriptionException(
×
NEW
437
            "Element with ID " + std::to_string(second_loop_id) + " not found."
×
NEW
438
        );
×
NEW
439
    }
×
440

441
    auto* first_map = dynamic_cast<structured_control_flow::Map*>(first_element);
1✔
442
    auto* second_loop = dynamic_cast<structured_control_flow::StructuredLoop*>(second_element);
1✔
443

444
    if (first_map == nullptr) {
1✔
NEW
445
        throw InvalidTransformationDescriptionException(
×
NEW
446
            "Element with ID " + std::to_string(first_map_id) + " is not a Map."
×
NEW
447
        );
×
NEW
448
    }
×
449
    if (second_loop == nullptr) {
1✔
NEW
450
        throw InvalidTransformationDescriptionException(
×
NEW
451
            "Element with ID " + std::to_string(second_loop_id) + " is not a StructuredLoop."
×
NEW
452
        );
×
NEW
453
    }
×
454

455
    return MapFusion(*first_map, *second_loop);
1✔
456
}
1✔
457

458
} // namespace transformations
459
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc