• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28106147644

24 Jun 2026 02:32PM UTC coverage: 61.922% (+0.1%) from 61.779%
28106147644

Pull #806

github

web-flow
Merge 2be414d54 into 57cc1db99
Pull Request #806: Map Collapse for Multiple targets in a neste sequence

165 of 185 new or added lines in 2 files covered. (89.19%)

419 existing lines in 30 files now uncovered.

37705 of 60891 relevant lines covered (61.92%)

1004.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.29
/opt/src/transformations/tile_fusion.cpp
1
#include "sdfg/transformations/tile_fusion.h"
2

3
#include <climits>
4
#include <cmath>
5
#include <stdexcept>
6

7
#include <symengine/solve.h>
8

9
#include "sdfg/analysis/arguments_analysis.h"
10
#include "sdfg/analysis/assumptions_analysis.h"
11
#include "sdfg/analysis/loop_analysis.h"
12
#include "sdfg/analysis/memory_layout_analysis.h"
13
#include "sdfg/analysis/users.h"
14
#include "sdfg/builder/structured_sdfg_builder.h"
15
#include "sdfg/data_flow/data_flow_graph.h"
16
#include "sdfg/structured_control_flow/block.h"
17
#include "sdfg/structured_control_flow/for.h"
18
#include "sdfg/structured_control_flow/if_else.h"
19
#include "sdfg/structured_control_flow/map.h"
20
#include "sdfg/symbolic/conjunctive_normal_form.h"
21
#include "sdfg/symbolic/delinearization.h"
22
#include "sdfg/symbolic/polynomials.h"
23
#include "sdfg/symbolic/symbolic.h"
24
#include "sdfg/symbolic/utils.h"
25
#include "sdfg/types/array.h"
26
#include "sdfg/types/pointer.h"
27
#include "sdfg/types/scalar.h"
28

29
namespace sdfg {
30
namespace transformations {
31

32
namespace {
33

34
/// Collect all Block nodes reachable from a Sequence, including through nested For/Map loops.
35
void collect_blocks(structured_control_flow::Sequence& seq, std::vector<structured_control_flow::Block*>& blocks) {
32✔
36
    for (size_t i = 0; i < seq.size(); ++i) {
102✔
37
        auto& child = seq.at(i).first;
70✔
38
        if (auto* block = dynamic_cast<structured_control_flow::Block*>(&child)) {
70✔
39
            blocks.push_back(block);
62✔
40
        } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&child)) {
62✔
41
            collect_blocks(loop->root(), blocks);
8✔
42
        }
8✔
43
    }
70✔
44
}
32✔
45

46
/// Get the first block from a sequence, recursively descending into loops.
47
structured_control_flow::Block* get_first_block(structured_control_flow::Sequence& seq) {
32✔
48
    for (size_t i = 0; i < seq.size(); ++i) {
32✔
49
        auto& child = seq.at(i).first;
32✔
50
        if (auto* block = dynamic_cast<structured_control_flow::Block*>(&child)) {
32✔
51
            return block;
24✔
52
        } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&child)) {
24✔
53
            auto* result = get_first_block(loop->root());
8✔
54
            if (result) return result;
8✔
55
        }
8✔
56
    }
32✔
57
    return nullptr;
×
58
}
32✔
59

60
} // namespace
61

62
TileFusion::TileFusion(structured_control_flow::Map& first_map, structured_control_flow::Map& second_map)
63
    : first_map_(first_map), second_map_(second_map) {}
16✔
64

65
std::string TileFusion::name() const { return "TileFusion"; }
6✔
66

67
int TileFusion::compute_radius(
68
    const data_flow::Subset& producer_write_subset,
69
    const std::vector<data_flow::Subset>& consumer_read_subsets,
70
    const std::vector<structured_control_flow::StructuredLoop*>& producer_loops,
71
    const std::vector<structured_control_flow::StructuredLoop*>& consumer_loops,
72
    const symbolic::Assumptions& producer_assumptions,
73
    const symbolic::Assumptions& consumer_assumptions
74
) {
12✔
75
    if (producer_loops.empty() || consumer_loops.empty()) {
12✔
76
        return -1;
×
77
    }
×
78

79
    // Delinearize the producer write subset
80
    auto producer_sub = producer_write_subset;
12✔
81
    if (producer_sub.size() == 1) {
12✔
82
        auto producer_result = symbolic::delinearize(producer_write_subset.at(0), producer_assumptions);
9✔
83
        if (producer_result.success) {
9✔
84
            producer_sub = producer_result.indices;
9✔
85
        }
9✔
86
    }
9✔
87

88
    // Extract the innermost producer loop indvar (the spatial iteration variable)
89
    auto* producer_inner = producer_loops.back();
12✔
90
    auto producer_indvar = producer_inner->indvar();
12✔
91

92
    // Extract the innermost consumer loop indvar
93
    auto* consumer_inner = consumer_loops.back();
12✔
94
    auto consumer_indvar = consumer_inner->indvar();
12✔
95

96
    int max_radius = 0;
12✔
97

98
    for (const auto& consumer_read_subset : consumer_read_subsets) {
32✔
99
        auto consumer_sub = consumer_read_subset;
32✔
100
        if (consumer_sub.size() == 1) {
32✔
101
            auto consumer_result = symbolic::delinearize(consumer_read_subset.at(0), consumer_assumptions);
26✔
102
            if (consumer_result.success) {
26✔
103
                consumer_sub = consumer_result.indices;
26✔
104
            }
26✔
105
        }
26✔
106
        if (consumer_sub.size() != producer_sub.size()) {
32✔
107
            return -1;
×
108
        }
×
109

110
        // Solve: producer_sub[d] = consumer_sub[d] for producer_indvar
111
        // This gives us: producer_indvar = f(consumer_indvar)
112
        SymEngine::vec_sym producer_vars;
32✔
113
        producer_vars.push_back(SymEngine::rcp_static_cast<const SymEngine::Symbol>(producer_indvar));
32✔
114

115
        SymEngine::vec_basic equations;
32✔
116
        for (size_t d = 0; d < producer_sub.size(); ++d) {
75✔
117
            auto diff = symbolic::sub(producer_sub.at(d), consumer_sub.at(d));
43✔
118
            if (symbolic::uses(diff, producer_indvar->get_name())) {
43✔
119
                // This dimension depends on the producer indvar — include in system
120
                equations.push_back(diff);
32✔
121
            }
32✔
122
            // Dimensions not involving the producer indvar are independent
123
            // (e.g., inner loop variables in 2D stencils) — skip them.
124
        }
43✔
125

126
        if (equations.size() != producer_vars.size()) {
32✔
127
            return -1;
×
128
        }
×
129

130
        SymEngine::vec_basic solution;
32✔
131
        try {
32✔
132
            solution = SymEngine::linsolve(equations, producer_vars);
32✔
133
        } catch (...) {
32✔
134
            return -1;
×
135
        }
×
136

137
        if (solution.size() != 1) {
32✔
138
            return -1;
×
139
        }
×
140

141
        auto& sol = solution[0];
32✔
142
        if (SymEngine::is_a<SymEngine::NaN>(*sol) || SymEngine::is_a<SymEngine::Infty>(*sol)) {
32✔
143
            return -1;
×
144
        }
×
145

146
        // The solution is: producer_indvar = f(consumer_indvar)
147
        // The offset is: f(consumer_indvar) - consumer_indvar
148
        // For a stencil B[1+i] and consumer reads B[j], B[1+j], B[2+j]:
149
        //   solve 1+i=j   -> i = j-1, offset = -1
150
        //   solve 1+i=1+j -> i = j,   offset = 0
151
        //   solve 1+i=2+j -> i = j+1, offset = +1
152
        auto offset_expr = symbolic::expand(symbolic::sub(sol, consumer_indvar));
32✔
153

154
        // The offset should be a constant integer
155
        if (!SymEngine::is_a<SymEngine::Integer>(*offset_expr)) {
32✔
156
            return -1;
×
157
        }
×
158

159
        int offset = std::abs(static_cast<int>(SymEngine::down_cast<const SymEngine::Integer&>(*offset_expr).as_int()));
32✔
160

161
        if (offset > max_radius) {
32✔
162
            max_radius = offset;
8✔
163
        }
8✔
164
    }
32✔
165

166
    return max_radius;
12✔
167
}
12✔
168

169
bool TileFusion::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
16✔
170
    candidates_.clear();
16✔
171
    radius_ = 0;
16✔
172

173
    // Get analyses upfront
174
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
16✔
175
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
16✔
176
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
16✔
177

178
    // ==========================================================================
179
    // Criterion 1: Both maps are in the same parent sequence, consecutive
180
    // ==========================================================================
181
    auto* first_parent = first_map_.get_parent();
16✔
182
    auto* second_parent = second_map_.get_parent();
16✔
183
    if (first_parent == nullptr || second_parent == nullptr) {
16✔
184
        return false;
×
185
    }
×
186
    if (first_parent != second_parent) {
16✔
187
        return false;
×
188
    }
×
189

190
    auto* parent_sequence = dynamic_cast<structured_control_flow::Sequence*>(first_parent);
16✔
191
    if (parent_sequence == nullptr) {
16✔
192
        return false;
×
193
    }
×
194

195
    int first_index = parent_sequence->index(first_map_);
16✔
196
    int second_index = parent_sequence->index(second_map_);
16✔
197
    if (first_index == -1 || second_index == -1) {
16✔
198
        return false;
×
199
    }
×
200
    if (second_index != first_index + 1) {
16✔
201
        return false;
1✔
202
    }
1✔
203

204
    // ==========================================================================
205
    // Criterion 2: Transition between them is empty
206
    // ==========================================================================
207
    auto& transition = parent_sequence->at(first_index).second;
15✔
208
    if (!transition.empty()) {
15✔
209
        return false;
×
210
    }
×
211

212
    // ==========================================================================
213
    // Criterion 3: Both are perfectly nested tile structures with Maps
214
    // Structure required: outer tile Map -> inner iteration Map(s) -> Block(s)
215
    // ==========================================================================
216
    auto first_loop_info = loop_analysis.loop_info(&first_map_);
15✔
217
    auto second_loop_info = loop_analysis.loop_info(&second_map_);
15✔
218

219
    // Must be perfectly nested (no side code between loop levels)
220
    if (!first_loop_info.is_perfectly_nested) {
15✔
221
        return false;
×
222
    }
×
223
    if (!second_loop_info.is_perfectly_nested) {
15✔
224
        return false;
×
225
    }
×
226

227
    // Must be all Maps (perfectly parallel) - this is a tile fusion on parallel maps
228
    if (!first_loop_info.is_perfectly_parallel) {
15✔
229
        return false;
×
230
    }
×
231
    if (!second_loop_info.is_perfectly_parallel) {
15✔
232
        return false;
×
233
    }
×
234

235
    // Must have at least depth 2 (outer tile + inner iteration)
236
    if (first_loop_info.max_depth < 2) {
15✔
237
        return false;
×
238
    }
×
239
    if (second_loop_info.max_depth < 2) {
15✔
240
        return false;
×
241
    }
×
242

243
    // The immediate child must be a Map (the inner iteration map)
244
    if (first_map_.root().size() != 1) {
15✔
245
        return false;
×
246
    }
×
247
    auto* first_inner_map = dynamic_cast<structured_control_flow::Map*>(&first_map_.root().at(0).first);
15✔
248
    if (first_inner_map == nullptr) {
15✔
249
        return false;
×
250
    }
×
251

252
    if (second_map_.root().size() != 1) {
15✔
253
        return false;
×
254
    }
×
255
    auto* second_inner_map = dynamic_cast<structured_control_flow::Map*>(&second_map_.root().at(0).first);
15✔
256
    if (second_inner_map == nullptr) {
15✔
257
        return false;
×
258
    }
×
259

260
    // ==========================================================================
261
    // Criterion 4: Compatible tile bounds (same init and stride)
262
    // ==========================================================================
263
    if (!symbolic::eq(first_map_.init(), second_map_.init())) {
15✔
264
        return false;
×
265
    }
×
266

267
    auto first_stride = first_map_.stride();
15✔
268
    auto second_stride = second_map_.stride();
15✔
269
    if (first_stride.is_null() || second_stride.is_null()) {
15✔
270
        return false;
×
271
    }
×
272
    if (!symbolic::eq(first_stride, second_stride)) {
15✔
273
        return false;
1✔
274
    }
1✔
275

276
    // Extract tile size as integer
277
    if (!SymEngine::is_a<SymEngine::Integer>(*first_stride)) {
14✔
278
        return false;
×
279
    }
×
280
    int tile_size = static_cast<int>(SymEngine::down_cast<const SymEngine::Integer&>(*first_stride).as_int());
14✔
281
    if (tile_size <= 0) {
14✔
282
        return false;
×
283
    }
×
284

285
    // ==========================================================================
286
    // Criterion 5: Shared intermediate container exists
287
    // First writes C, second reads C, second does NOT write C
288
    // Also detect cyclic containers: second writes D, first reads D
289
    // ==========================================================================
290
    auto first_args = arguments_analysis.arguments(analysis_manager, first_map_);
14✔
291
    auto second_args = arguments_analysis.arguments(analysis_manager, second_map_);
14✔
292

293
    std::unordered_set<std::string> first_outputs;
14✔
294
    std::unordered_set<std::string> first_inputs;
14✔
295
    for (const auto& [name, arg] : first_args) {
47✔
296
        if (arg.is_output) {
47✔
297
            first_outputs.insert(name);
14✔
298
        }
14✔
299
        if (arg.is_input) {
47✔
300
            first_inputs.insert(name);
47✔
301
        }
47✔
302
    }
47✔
303

304
    std::unordered_set<std::string> shared_containers;
14✔
305
    std::unordered_set<std::string> cyclic_container_names;
14✔
306
    for (const auto& [name, arg] : second_args) {
45✔
307
        if (first_outputs.contains(name)) {
45✔
308
            if (arg.is_output) {
13✔
309
                return false; // Consumer also writes the shared container
1✔
310
            }
1✔
311
            if (arg.is_input) {
12✔
312
                shared_containers.insert(name);
12✔
313
            }
12✔
314
        }
12✔
315
        // Detect cyclic: K2 writes container that K1 reads
316
        if (arg.is_output && first_inputs.contains(name) && !first_outputs.contains(name)) {
44✔
317
            cyclic_container_names.insert(name);
11✔
318
        }
11✔
319
    }
44✔
320
    if (shared_containers.empty()) {
13✔
321
        return false;
1✔
322
    }
1✔
323

324
    // ==========================================================================
325
    // Criterion 6: Compute radius for each shared container
326
    // ==========================================================================
327
    // Collect the loop hierarchy for producer and consumer
328
    std::vector<structured_control_flow::StructuredLoop*> producer_loops = {&first_map_, first_inner_map};
12✔
329
    std::vector<structured_control_flow::StructuredLoop*> consumer_loops = {&second_map_, second_inner_map};
12✔
330

331
    // Get assumptions from the innermost block (recursively find it)
332
    auto* first_block = get_first_block(first_inner_map->root());
12✔
333
    if (first_block == nullptr) {
12✔
334
        return false;
×
335
    }
×
336
    auto& producer_assumptions = assumptions_analysis.get(*first_block);
12✔
337

338
    auto* second_block = get_first_block(second_inner_map->root());
12✔
339
    if (second_block == nullptr) {
12✔
340
        return false;
×
341
    }
×
342
    auto& consumer_assumptions = assumptions_analysis.get(*second_block);
12✔
343

344
    // Collect all blocks in producer and consumer
345
    std::vector<structured_control_flow::Block*> producer_blocks;
12✔
346
    collect_blocks(first_inner_map->root(), producer_blocks);
12✔
347
    std::vector<structured_control_flow::Block*> consumer_blocks;
12✔
348
    collect_blocks(second_inner_map->root(), consumer_blocks);
12✔
349

350
    int overall_radius = 0;
12✔
351

352
    for (const auto& container : shared_containers) {
12✔
353
        // Find the producer write subset for this container
354
        data_flow::Subset producer_write_subset;
12✔
355
        bool found_producer = false;
12✔
356

357
        for (auto* block : producer_blocks) {
31✔
358
            auto& dataflow = block->dataflow();
31✔
359
            for (auto& node : dataflow.nodes()) {
112✔
360
                auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
112✔
361
                if (access == nullptr || access->data() != container) {
112✔
362
                    continue;
97✔
363
                }
97✔
364
                // It's a write if it has incoming edges and no outgoing edges
365
                if (dataflow.in_degree(*access) > 0 && dataflow.out_degree(*access) == 0) {
15✔
366
                    auto& iedge = *dataflow.in_edges(*access).begin();
12✔
367
                    if (iedge.type() != data_flow::MemletType::Computational) {
12✔
368
                        continue;
×
369
                    }
×
370
                    if (found_producer) {
12✔
371
                        return false; // Multiple writes to same container
×
372
                    }
×
373
                    producer_write_subset = iedge.subset();
12✔
374
                    found_producer = true;
12✔
375
                }
12✔
376
            }
15✔
377
        }
31✔
378
        if (!found_producer || producer_write_subset.empty()) {
12✔
379
            return false;
×
380
        }
×
381

382
        // Collect all consumer read subsets for this container
383
        std::vector<data_flow::Subset> consumer_read_subsets;
12✔
384
        for (auto* block : consumer_blocks) {
31✔
385
            auto& dataflow = block->dataflow();
31✔
386
            for (auto& node : dataflow.nodes()) {
112✔
387
                auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
112✔
388
                if (access == nullptr || access->data() != container) {
112✔
389
                    continue;
91✔
390
                }
91✔
391
                // It's a read if it has outgoing edges
392
                if (dataflow.out_degree(*access) > 0) {
21✔
393
                    for (auto& memlet : dataflow.out_edges(*access)) {
32✔
394
                        if (memlet.type() != data_flow::MemletType::Computational) {
32✔
395
                            continue;
×
396
                        }
×
397
                        auto& subset = memlet.subset();
32✔
398
                        // Deduplicate
399
                        bool found = false;
32✔
400
                        for (const auto& existing : consumer_read_subsets) {
32✔
401
                            if (existing.size() != subset.size()) continue;
32✔
402
                            bool match = true;
32✔
403
                            for (size_t d = 0; d < existing.size(); ++d) {
35✔
404
                                if (!symbolic::eq(existing[d], subset[d])) {
35✔
405
                                    match = false;
32✔
406
                                    break;
32✔
407
                                }
32✔
408
                            }
35✔
409
                            if (match) {
32✔
410
                                found = true;
×
411
                                break;
×
412
                            }
×
413
                        }
32✔
414
                        if (!found) {
32✔
415
                            consumer_read_subsets.push_back(subset);
32✔
416
                        }
32✔
417
                    }
32✔
418
                }
21✔
419
            }
21✔
420
        }
31✔
421
        if (consumer_read_subsets.empty()) {
12✔
422
            return false;
×
423
        }
×
424

425
        int radius = compute_radius(
12✔
426
            producer_write_subset,
12✔
427
            consumer_read_subsets,
12✔
428
            producer_loops,
12✔
429
            consumer_loops,
12✔
430
            producer_assumptions,
12✔
431
            consumer_assumptions
12✔
432
        );
12✔
433
        if (radius < 0) {
12✔
434
            return false;
×
435
        }
×
436

437
        // ==========================================================================
438
        // Criterion 7: Radius must be less than tile size
439
        // ==========================================================================
440
        if (radius >= tile_size) {
12✔
441
            return false;
×
442
        }
×
443

444
        if (radius > overall_radius) {
12✔
445
            overall_radius = radius;
8✔
446
        }
8✔
447

448
        TileFusionCandidate candidate;
12✔
449
        candidate.container = container;
12✔
450
        candidate.radius = radius;
12✔
451
        candidates_.push_back(candidate);
12✔
452
    }
12✔
453

454
    radius_ = overall_radius;
12✔
455

456
    // ==========================================================================
457
    // Criterion 8: Analyze cyclic containers for double buffering
458
    // Use MemoryLayoutAnalysis to delinearize accesses and reason about
459
    // multi-dimensional stencil patterns (e.g., A[i*N+j] → A[i][j]).
460
    // For each cyclic container, identify the tiled dimension, extract
461
    // stencil offsets in that dimension, and compute buffer geometry.
462
    // Only needed when radius > 0 (tiles overlap).
463
    // ==========================================================================
464
    cyclic_containers_.clear();
12✔
465

466
    if (overall_radius > 0) {
12✔
467
        auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
8✔
468

469
        for (const auto& container : cyclic_container_names) {
8✔
470
            // Collect all K1 read memlets for this container
471
            struct ReadMemletInfo {
7✔
472
                const data_flow::Memlet* memlet;
7✔
473
            };
7✔
474
            std::vector<ReadMemletInfo> read_memlets;
7✔
475
            for (auto* block : producer_blocks) {
23✔
476
                auto& dataflow = block->dataflow();
23✔
477
                for (auto& node : dataflow.nodes()) {
85✔
478
                    auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
85✔
479
                    if (access == nullptr || access->data() != container) continue;
85✔
480
                    if (dataflow.out_degree(*access) > 0) {
16✔
481
                        for (auto& memlet : dataflow.out_edges(*access)) {
23✔
482
                            if (memlet.type() != data_flow::MemletType::Computational) continue;
23✔
483
                            read_memlets.push_back({&memlet});
23✔
484
                        }
23✔
485
                    }
16✔
486
                }
16✔
487
            }
23✔
488
            if (read_memlets.empty()) {
7✔
489
                return false;
×
490
            }
×
491

492
            auto inner_indvar = first_inner_map->indvar();
7✔
493

494
            int min_offset = std::numeric_limits<int>::max();
7✔
495
            int max_offset = std::numeric_limits<int>::min();
7✔
496
            int tiled_dim = -1;
7✔
497
            std::vector<symbolic::Expression> layout_dimensions;
7✔
498
            std::vector<symbolic::Expression> layout_strides;
7✔
499
            symbolic::Expression layout_offset = symbolic::integer(0);
7✔
500

501
            for (auto& info : read_memlets) {
23✔
502
                // Use MemoryLayoutAnalysis to get delinearized access
503
                auto* mem_access = mla.access(*info.memlet);
23✔
504
                if (mem_access == nullptr) {
23✔
505
                    return false; // Delinearization failed
×
506
                }
×
507

508
                auto& delin_subset = mem_access->subset;
23✔
509

510
                // Find which dimension contains the inner (tiled) indvar
511
                for (size_t d = 0; d < delin_subset.size(); d++) {
51✔
512
                    if (symbolic::uses(delin_subset[d], inner_indvar->get_name())) {
28✔
513
                        if (tiled_dim == -1) {
23✔
514
                            tiled_dim = static_cast<int>(d);
7✔
515
                        } else if (tiled_dim != static_cast<int>(d)) {
16✔
516
                            return false; // Indvar appears in multiple dimensions
×
517
                        }
×
518

519
                        // Decompose this dimension: should be coeff*indvar + offset
520
                        auto decomp = symbolic::affine_decomposition(delin_subset[d], inner_indvar);
23✔
521
                        if (!decomp.success) {
23✔
522
                            return false;
×
523
                        }
×
524
                        auto coeff_int = SymEngine::rcp_static_cast<const SymEngine::Integer>(decomp.coeff)->as_int();
23✔
525
                        if (coeff_int != 1) {
23✔
526
                            return false; // Non-unit stride in tiled dimension
×
527
                        }
×
528
                        auto offset_expr = decomp.offset;
23✔
529
                        if (!SymEngine::is_a<SymEngine::Integer>(*offset_expr)) {
23✔
530
                            return false; // Non-constant offset
×
531
                        }
×
532
                        int offset =
23✔
533
                            static_cast<int>(SymEngine::down_cast<const SymEngine::Integer&>(*offset_expr).as_int());
23✔
534
                        min_offset = std::min(min_offset, offset);
23✔
535
                        max_offset = std::max(max_offset, offset);
23✔
536
                    }
23✔
537
                }
28✔
538

539
                // Capture layout info from first access (all accesses to same container share layout)
540
                if (layout_dimensions.empty()) {
23✔
541
                    auto& layout = mem_access->layout;
7✔
542
                    for (size_t d = 0; d < layout.dims(); d++) {
15✔
543
                        layout_dimensions.push_back(layout.shape().at(d));
8✔
544
                        layout_strides.push_back(layout.strides().at(d));
8✔
545
                    }
8✔
546
                    layout_offset = layout.offset();
7✔
547
                }
7✔
548
            }
23✔
549

550
            if (tiled_dim == -1) {
7✔
551
                return false; // No dimension depends on inner indvar
×
552
            }
×
553

554
            int tiled_dim_buf_size = tile_size + 2 * overall_radius + max_offset - min_offset;
7✔
555

556
            // Compute total buffer size: tiled_dim_buf_size * product(other dimensions)
557
            symbolic::Expression total_buf_size = symbolic::integer(tiled_dim_buf_size);
7✔
558
            for (size_t d = 0; d < layout_dimensions.size(); d++) {
15✔
559
                if (static_cast<int>(d) != tiled_dim) {
8✔
560
                    total_buf_size = symbolic::mul(total_buf_size, layout_dimensions[d]);
1✔
561
                }
1✔
562
            }
8✔
563

564
            // Total buffer size must be a constant integer for now
565
            // (non-tiled dimensions may be symbolic like N — that's fine, buffer is symbolic-sized)
566
            CyclicContainerInfo cyc_info;
7✔
567
            cyc_info.container = container;
7✔
568
            cyc_info.min_offset = min_offset;
7✔
569
            cyc_info.max_offset = max_offset;
7✔
570
            cyc_info.tiled_dim = tiled_dim;
7✔
571
            cyc_info.tiled_dim_buf_size = tiled_dim_buf_size;
7✔
572
            cyc_info.dimensions = layout_dimensions;
7✔
573
            cyc_info.strides = layout_strides;
7✔
574
            cyc_info.layout_offset = layout_offset;
7✔
575
            // buffer_size is now used as a symbolic expression in apply, but store int for 1D compat
576
            if (SymEngine::is_a<SymEngine::Integer>(*total_buf_size)) {
7✔
577
                cyc_info.buffer_size =
6✔
578
                    static_cast<int>(SymEngine::down_cast<const SymEngine::Integer&>(*total_buf_size).as_int());
6✔
579
            } else {
6✔
580
                cyc_info.buffer_size = -1; // Symbolic — will use expression in apply
1✔
581
            }
1✔
582
            cyclic_containers_.push_back(cyc_info);
7✔
583
        }
7✔
584
    } // if (overall_radius > 0)
8✔
585

586
    return true;
12✔
587
}
12✔
588

589
void TileFusion::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
10✔
590
    auto& sdfg = builder.subject();
10✔
591
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
10✔
592

593
    // Get parent sequence
594
    auto* parent = static_cast<structured_control_flow::Sequence*>(first_map_.get_parent());
10✔
595
    size_t first_index = parent->index(first_map_);
10✔
596

597
    // Extract tile loop properties from first map (representative)
598
    auto tile_indvar = first_map_.indvar();
10✔
599
    auto tile_init = first_map_.init();
10✔
600
    auto tile_condition = first_map_.condition();
10✔
601
    auto tile_update = first_map_.update();
10✔
602

603
    // Extract tile size
604
    auto stride = first_map_.stride();
10✔
605
    auto tile_size_expr = stride;
10✔
606

607
    // Get references to inner maps before moving
608
    auto* first_inner_map = dynamic_cast<structured_control_flow::Map*>(&first_map_.root().at(0).first);
10✔
609
    auto* second_inner_map = dynamic_cast<structured_control_flow::Map*>(&second_map_.root().at(0).first);
10✔
610

611
    // Extract inner map properties before they get moved
612
    auto first_inner_indvar = first_inner_map->indvar();
10✔
613
    auto first_inner_init = first_inner_map->init();
10✔
614
    auto first_inner_condition = first_inner_map->condition();
10✔
615
    auto first_inner_update = first_inner_map->update();
10✔
616
    auto first_inner_schedule = first_inner_map->schedule_type();
10✔
617

618
    auto second_inner_indvar = second_inner_map->indvar();
10✔
619
    auto second_inner_init = second_inner_map->init();
10✔
620
    auto second_inner_condition = second_inner_map->condition();
10✔
621
    auto second_inner_update = second_inner_map->update();
10✔
622
    auto second_inner_schedule = second_inner_map->schedule_type();
10✔
623

624
    // Get the old tile indvars to substitute later
625
    auto first_tile_indvar = first_map_.indvar();
10✔
626
    auto second_tile_indvar = second_map_.indvar();
10✔
627

628
    // Step 1: Create a new For tile loop with the same bounds as the first tile Map
629
    // Use the condition from the first map (which uses first_tile_indvar)
630
    // We need a fresh indvar for the new tile loop
631
    auto new_tile_indvar_name = builder.find_new_name(tile_indvar->get_name());
10✔
632
    builder.add_container(new_tile_indvar_name, sdfg.type(tile_indvar->get_name()));
10✔
633
    auto new_tile_indvar = symbolic::symbol(new_tile_indvar_name);
10✔
634

635
    // Substitute the old tile indvar in the condition with the new one
636
    auto new_tile_condition = symbolic::subs(tile_condition, tile_indvar, new_tile_indvar);
10✔
637

638
    auto& fused_for = builder.add_for_before(
10✔
639
        *parent,
10✔
640
        first_map_,
10✔
641
        new_tile_indvar,
10✔
642
        new_tile_condition,
10✔
643
        tile_init,
10✔
644
        symbolic::subs(tile_update, tile_indvar, new_tile_indvar),
10✔
645
        {},
10✔
646
        first_map_.debug_info()
10✔
647
    );
10✔
648

649
    // Step 2: Create the producer inner Map inside the fused For
650
    // Its init and condition need to reference the new tile indvar
651
    auto new_first_inner_init = symbolic::subs(first_inner_init, first_tile_indvar, new_tile_indvar);
10✔
652
    auto new_first_inner_condition = symbolic::subs(first_inner_condition, first_tile_indvar, new_tile_indvar);
10✔
653

654
    // Extend the producer's range by the radius
655
    if (radius_ > 0) {
10✔
656
        // Extend init: max(0, tile - radius) -> take the original init and subtract radius
657
        // Original init is typically: tile_indvar (i.e., the iteration starts at the tile boundary)
658
        // New init: max(original_lower_bound, new_init - radius)
659
        auto radius_expr = symbolic::integer(radius_);
6✔
660
        auto extended_init = symbolic::max(symbolic::integer(0), symbolic::sub(new_first_inner_init, radius_expr));
6✔
661
        new_first_inner_init = extended_init;
6✔
662

663
        // Extend condition: the condition has form like And(i < tile + S, i < N)
664
        // We need to adjust the tile-relative bound: i < tile + S becomes i < tile + S + radius
665
        // We do this by substituting new_tile_indvar with (new_tile_indvar + radius) in the
666
        // tile-relative part. Since the condition is a conjunction, we can reconstruct it:
667
        // Original: And(inner_indvar < new_tile_indvar + S, inner_indvar < N)
668
        // Extended: And(inner_indvar < new_tile_indvar + S + radius, inner_indvar < N)
669
        auto extended_tile_bound =
6✔
670
            symbolic::Lt(first_inner_indvar, symbolic::add(new_tile_indvar, symbolic::add(tile_size_expr, radius_expr)));
6✔
671

672
        // Extract non-tile upper bounds from the original inner map condition.
673
        // canonical_bound() returns min of ALL bounds including the tile bound,
674
        // but we only want bounds independent of the tile indvar (e.g., array size).
675
        auto original_condition = first_inner_map->condition();
6✔
676
        symbolic::CNF cnf;
6✔
677
        try {
6✔
678
            cnf = symbolic::conjunctive_normal_form(original_condition);
6✔
679
        } catch (...) {
6✔
680
            cnf.clear();
×
681
        }
×
682

683
        // Collect non-tile upper bounds on the inner indvar
684
        std::vector<symbolic::Expression> non_tile_bounds;
6✔
685
        for (auto& clause : cnf) {
12✔
686
            for (auto& literal : clause) {
12✔
687
                // Check for i < bound forms
688
                if (SymEngine::is_a<SymEngine::StrictLessThan>(*literal)) {
12✔
689
                    auto lt = SymEngine::rcp_static_cast<const SymEngine::StrictLessThan>(literal);
12✔
690
                    auto lhs = lt->get_arg1();
12✔
691
                    auto rhs = lt->get_arg2();
12✔
692
                    if (symbolic::eq(lhs, first_inner_indvar) && !symbolic::uses(rhs, first_tile_indvar->get_name())) {
12✔
693
                        non_tile_bounds.push_back(rhs);
6✔
694
                    }
6✔
695
                }
12✔
696
            }
12✔
697
        }
12✔
698

699
        if (!non_tile_bounds.empty()) {
6✔
700
            // Build condition: extended_tile_bound AND all non-tile bounds
701
            auto result_condition = extended_tile_bound;
6✔
702
            for (auto& bound : non_tile_bounds) {
6✔
703
                result_condition = symbolic::And(result_condition, symbolic::Lt(first_inner_indvar, bound));
6✔
704
            }
6✔
705
            new_first_inner_condition = result_condition;
6✔
706
        } else {
6✔
707
            new_first_inner_condition = extended_tile_bound;
×
708
        }
×
709
    }
6✔
710

711
    auto& new_first_inner = builder.add_map(
10✔
712
        fused_for.root(),
10✔
713
        first_inner_indvar,
10✔
714
        new_first_inner_condition,
10✔
715
        new_first_inner_init,
10✔
716
        first_inner_update,
10✔
717
        first_inner_schedule
10✔
718
    );
10✔
719

720
    // Move the producer body into the new inner map
721
    builder.move_children(first_inner_map->root(), new_first_inner.root());
10✔
722

723
    // Step 3: Create the consumer inner Map inside the fused For, after the producer
724
    auto new_second_inner_init = symbolic::subs(second_inner_init, second_tile_indvar, new_tile_indvar);
10✔
725
    auto new_second_inner_condition = symbolic::subs(second_inner_condition, second_tile_indvar, new_tile_indvar);
10✔
726

727
    auto& new_second_inner = builder.add_map(
10✔
728
        fused_for.root(),
10✔
729
        second_inner_indvar,
10✔
730
        new_second_inner_condition,
10✔
731
        new_second_inner_init,
10✔
732
        second_inner_update,
10✔
733
        second_inner_schedule
10✔
734
    );
10✔
735

736
    // Move the consumer body into the new inner map
737
    builder.move_children(second_inner_map->root(), new_second_inner.root());
10✔
738

739
    // ==========================================================================
740
    // Step 3.5: Double-buffer pre-fetch for cyclic containers
741
    //
742
    // Uses layout info from MemoryLayoutAnalysis (computed in can_be_applied)
743
    // to handle multi-dimensional stencils. For each cyclic container,
744
    // generates nested copy loops and rewrites K1's reads to use the buffer.
745
    // ==========================================================================
746

747
    for (const auto& cyc : cyclic_containers_) {
10✔
748
        auto& container_type = sdfg.type(cyc.container);
5✔
749
        // Get scalar element type from container (Pointer or Array)
750
        const types::IType* scalar_type_ptr = nullptr;
5✔
751
        if (auto* ptr_type = dynamic_cast<const types::Pointer*>(&container_type)) {
5✔
752
            scalar_type_ptr = &ptr_type->pointee_type();
1✔
753
        } else if (auto* arr_type = dynamic_cast<const types::Array*>(&container_type)) {
4✔
754
            scalar_type_ptr = &arr_type->element_type();
4✔
755
        } else {
4✔
756
            throw std::runtime_error("TileFusion: cyclic container '" + cyc.container + "' has unsupported type");
×
757
        }
×
758
        auto& scalar_type = *scalar_type_ptr;
5✔
759
        bool is_pointer = dynamic_cast<const types::Pointer*>(&container_type) != nullptr;
5✔
760

761
        auto radius_expr = symbolic::integer(radius_);
5✔
762
        auto min_offset_expr = symbolic::integer(cyc.min_offset);
5✔
763
        types::Scalar idx_type(types::PrimitiveType::UInt64);
5✔
764

765
        // Build buffer dimensions: same as layout but with tiled dim replaced by tiled_dim_buf_size
766
        std::vector<symbolic::Expression> buf_dimensions;
5✔
767
        for (size_t d = 0; d < cyc.dimensions.size(); d++) {
11✔
768
            if (static_cast<int>(d) == cyc.tiled_dim) {
6✔
769
                buf_dimensions.push_back(symbolic::integer(cyc.tiled_dim_buf_size));
5✔
770
            } else {
5✔
771
                buf_dimensions.push_back(cyc.dimensions[d]);
1✔
772
            }
1✔
773
        }
6✔
774

775
        // Compute total buffer size
776
        symbolic::Expression total_buf_size = symbolic::integer(1);
5✔
777
        for (auto& dim : buf_dimensions) {
6✔
778
            total_buf_size = symbolic::mul(total_buf_size, dim);
6✔
779
        }
6✔
780

781
        // Create two buffer containers (flat 1D arrays of total_buf_size)
782
        auto buf_cur_name = "__tf_buf_cur_" + cyc.container;
5✔
783
        auto buf_pf_name = "__tf_buf_pf_" + cyc.container;
5✔
784
        types::Array buf_type(scalar_type, total_buf_size);
5✔
785
        builder.add_container(buf_cur_name, buf_type);
5✔
786
        builder.add_container(buf_pf_name, buf_type);
5✔
787

788
        // Helper: linearize multi-dim indices into flat buffer index (row-major)
789
        auto linearize_buf = [&](const std::vector<symbolic::Expression>& indices) -> symbolic::Expression {
27✔
790
            symbolic::Expression linear_idx = symbolic::integer(0);
27✔
791
            symbolic::Expression stride = symbolic::integer(1);
27✔
792
            for (int i = static_cast<int>(indices.size()) - 1; i >= 0; i--) {
57✔
793
                linear_idx = symbolic::add(linear_idx, symbolic::mul(indices[i], stride));
30✔
794
                stride = symbolic::mul(stride, buf_dimensions[i]);
30✔
795
            }
30✔
796
            return linear_idx;
27✔
797
        };
27✔
798

799
        // Helper: build original source subset from multi-dim copy indices
800
        // For the tiled dim, the source index is: base + copy_indvar
801
        // For non-tiled dims, the source index is just: copy_indvar
802
        // If the container is a Pointer, re-linearize using layout strides
803
        auto build_src_subset = [&](const std::vector<symbolic::Expression>& copy_indices,
5✔
804
                                    const symbolic::Expression& tiled_base) -> data_flow::Subset {
10✔
805
            std::vector<symbolic::Expression> full_indices;
10✔
806
            for (size_t d = 0; d < cyc.dimensions.size(); d++) {
22✔
807
                if (static_cast<int>(d) == cyc.tiled_dim) {
12✔
808
                    full_indices.push_back(symbolic::add(tiled_base, copy_indices[d]));
10✔
809
                } else {
10✔
810
                    full_indices.push_back(copy_indices[d]);
2✔
811
                }
2✔
812
            }
12✔
813
            if (is_pointer) {
10✔
814
                // Re-linearize using layout strides: offset + sum(stride[d] * index[d])
815
                symbolic::Expression linear = cyc.layout_offset;
2✔
816
                for (size_t d = 0; d < full_indices.size(); d++) {
6✔
817
                    linear = symbolic::add(linear, symbolic::mul(cyc.strides[d], full_indices[d]));
4✔
818
                }
4✔
819
                return {linear};
2✔
820
            } else {
8✔
821
                return data_flow::Subset(full_indices.begin(), full_indices.end());
8✔
822
            }
8✔
823
        };
10✔
824

825
        // Helper: generate nested copy loops and a copy block
826
        // copy_kind: 0 = src→buf_cur, 1 = src→buf_pf, 2 = buf_pf→buf_cur
827
        auto generate_copy_loops = [&](structured_control_flow::Sequence& parent_scope,
5✔
828
                                       structured_control_flow::ControlFlowNode* insert_before,
5✔
829
                                       const symbolic::Expression& tiled_base,
5✔
830
                                       int copy_kind) {
15✔
831
            // Create indvars for each dimension
832
            std::vector<symbolic::Symbol> copy_indvars;
15✔
833
            structured_control_flow::Sequence* scope = &parent_scope;
15✔
834
            bool first_loop = true;
15✔
835

836
            for (size_t d = 0; d < buf_dimensions.size(); d++) {
33✔
837
                std::string suffix = (copy_kind == 0) ? "init" : (copy_kind == 1) ? "pf" : "sw";
18✔
838
                auto indvar_name = builder.find_new_name("__tf_" + suffix + "_d" + std::to_string(d));
18✔
839
                builder.add_container(indvar_name, idx_type);
18✔
840
                auto indvar = symbolic::symbol(indvar_name);
18✔
841
                copy_indvars.push_back(indvar);
18✔
842

843
                auto init = symbolic::integer(0);
18✔
844
                auto condition = symbolic::Lt(indvar, buf_dimensions[d]);
18✔
845
                auto update = symbolic::add(indvar, symbolic::integer(1));
18✔
846

847
                if (first_loop && insert_before != nullptr) {
18✔
848
                    auto& loop = builder.add_map_before(
5✔
849
                        *scope,
5✔
850
                        *insert_before,
5✔
851
                        indvar,
5✔
852
                        condition,
5✔
853
                        init,
5✔
854
                        update,
5✔
855
                        structured_control_flow::ScheduleType_Sequential::create()
5✔
856
                    );
5✔
857
                    scope = &loop.root();
5✔
858
                    first_loop = false;
5✔
859
                } else if (first_loop) {
13✔
860
                    auto& loop = builder.add_map(
10✔
861
                        *scope,
10✔
862
                        indvar,
10✔
863
                        condition,
10✔
864
                        init,
10✔
865
                        update,
10✔
866
                        structured_control_flow::ScheduleType_Sequential::create()
10✔
867
                    );
10✔
868
                    scope = &loop.root();
10✔
869
                    first_loop = false;
10✔
870
                } else {
10✔
871
                    auto& loop = builder.add_map(
3✔
872
                        *scope,
3✔
873
                        indvar,
3✔
874
                        condition,
3✔
875
                        init,
3✔
876
                        update,
3✔
877
                        structured_control_flow::ScheduleType_Sequential::create()
3✔
878
                    );
3✔
879
                    scope = &loop.root();
3✔
880
                }
3✔
881
            }
18✔
882

883
            // Create copy block
884
            auto& blk = builder.add_block(*scope);
15✔
885
            std::vector<symbolic::Expression> copy_exprs(copy_indvars.begin(), copy_indvars.end());
15✔
886
            auto buf_subset = data_flow::Subset{linearize_buf(copy_exprs)};
15✔
887

888
            if (copy_kind == 0 || copy_kind == 1) {
15✔
889
                // src container → buffer
890
                auto src_subset = build_src_subset(copy_exprs, tiled_base);
10✔
891
                auto& src = builder.add_access(blk, cyc.container);
10✔
892
                auto& dst = builder.add_access(blk, (copy_kind == 0) ? buf_cur_name : buf_pf_name);
10✔
893
                auto& tasklet = builder.add_tasklet(blk, data_flow::TaskletCode::assign, "_out", {"_in"});
10✔
894
                builder.add_computational_memlet(blk, src, tasklet, "_in", src_subset, container_type);
10✔
895
                builder.add_computational_memlet(blk, tasklet, "_out", dst, buf_subset, buf_type);
10✔
896
            } else {
10✔
897
                // buf_pf → buf_cur
898
                auto& src = builder.add_access(blk, buf_pf_name);
5✔
899
                auto& dst = builder.add_access(blk, buf_cur_name);
5✔
900
                auto& tasklet = builder.add_tasklet(blk, data_flow::TaskletCode::assign, "_out", {"_in"});
5✔
901
                builder.add_computational_memlet(blk, src, tasklet, "_in", buf_subset, buf_type);
5✔
902
                builder.add_computational_memlet(blk, tasklet, "_out", dst, buf_subset, buf_type);
5✔
903
            }
5✔
904
        };
15✔
905

906
        // ==================================================================
907
        // 3.5a: Initial copy INSIDE the fused For, guarded by if (tile == init)
908
        // On the first iteration, we load from the original array.
909
        // On subsequent iterations, buf_cur already has the right data from swap.
910
        // Base for tiled dim: tile_init - radius + min_offset
911
        // ==================================================================
912
        auto init_tiled_base = symbolic::sub(symbolic::add(tile_init, min_offset_expr), radius_expr);
5✔
913

914
        // Get a reference to the pre-fetch map (first child of fused_for) to insert before it
915
        auto& first_child_in_fused = fused_for.root().at(0).first;
5✔
916

917
        auto& init_if_else =
5✔
918
            builder.add_if_else_before(fused_for.root(), first_child_in_fused, sdfg::control_flow::Assignments{});
5✔
919
        auto init_cond = symbolic::Eq(new_tile_indvar, tile_init);
5✔
920
        auto& init_then = builder.add_case(init_if_else, init_cond);
5✔
921
        // Empty else branch (on subsequent iters, buf_cur has data from swap)
922
        builder.add_case(init_if_else, symbolic::Not(init_cond));
5✔
923

924
        generate_copy_loops(init_then, nullptr, init_tiled_base, 0);
5✔
925

926
        // ==================================================================
927
        // 3.5b: Pre-fetch INSIDE the fused For, BEFORE K1
928
        // Base for tiled dim: tile + S - radius + min_offset
929
        // ==================================================================
930
        auto pf_tiled_base =
5✔
931
            symbolic::sub(symbolic::add(new_tile_indvar, symbolic::add(tile_size_expr, min_offset_expr)), radius_expr);
5✔
932
        generate_copy_loops(fused_for.root(), &new_first_inner, pf_tiled_base, 1);
5✔
933

934
        // ==================================================================
935
        // 3.5c: Redirect K1's reads of container to buf_cur
936
        // Use MemoryLayoutAnalysis to get delinearized indices, rebase
937
        // the tiled dimension, then re-linearize into the flat buffer.
938
        // ==================================================================
939
        auto buf_cur_base_tiled = symbolic::sub(symbolic::add(new_tile_indvar, min_offset_expr), radius_expr);
5✔
940

941
        // Helper: rebase a raw subset for the buffer.
942
        // For Pointer (linearized) access A[expr]:
943
        //   buf_index = expr - stride[tiled_dim] * buf_cur_base_tiled
944
        // For Array (multi-dim) access A[idx0, idx1, ...]:
945
        //   rebase tiled_dim, linearize into flat buffer index
946
        auto rebase_to_buffer = [&](const data_flow::Subset& subset) -> data_flow::Subset {
17✔
947
            if (subset.size() == 1 && cyc.dimensions.size() > 1 && is_pointer) {
17✔
948
                // Pointer type: subtract the tiled dimension's contribution to the linear base
949
                auto linear_base = symbolic::mul(cyc.strides[cyc.tiled_dim], buf_cur_base_tiled);
5✔
950
                return {symbolic::sub(subset.at(0), linear_base)};
5✔
951
            }
5✔
952
            if (subset.size() == cyc.dimensions.size()) {
12✔
953
                // Multi-dim: rebase tiled dimension, linearize into buffer
954
                std::vector<symbolic::Expression> buf_indices;
12✔
955
                for (size_t d = 0; d < cyc.dimensions.size(); d++) {
24✔
956
                    if (static_cast<int>(d) == cyc.tiled_dim) {
12✔
957
                        buf_indices.push_back(symbolic::sub(subset[d], buf_cur_base_tiled));
12✔
958
                    } else {
12✔
959
                        buf_indices.push_back(subset[d]);
×
960
                    }
×
961
                }
12✔
962
                return {linearize_buf(buf_indices)};
12✔
963
            }
12✔
964
            // 1D Array: rebase directly
965
            if (subset.size() == 1) {
×
966
                return {symbolic::sub(subset.at(0), buf_cur_base_tiled)};
×
967
            }
×
968
            return subset; // Fallback: no rebase
×
969
        };
×
970

971
        std::function<void(structured_control_flow::ControlFlowNode&)> rewrite_accesses;
5✔
972
        rewrite_accesses = [&](structured_control_flow::ControlFlowNode& node) {
24✔
973
            if (auto* block = dynamic_cast<structured_control_flow::Block*>(&node)) {
24✔
974
                auto& dfg = block->dataflow();
17✔
975
                for (auto* access : dfg.data_nodes()) {
46✔
976
                    if (access->data() != cyc.container) continue;
46✔
977
                    bool has_writes = dfg.in_degree(*access) > 0;
12✔
978
                    for (auto& memlet : dfg.out_edges(*access)) {
17✔
979
                        if (memlet.type() != data_flow::MemletType::Computational) continue;
17✔
980
                        memlet.set_subset(rebase_to_buffer(memlet.subset()));
17✔
981
                        memlet.set_base_type(buf_type);
17✔
982
                    }
17✔
983
                    if (!has_writes) {
12✔
984
                        access->data(buf_cur_name);
12✔
985
                    }
12✔
986
                }
12✔
987
            } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
17✔
988
                for (size_t i = 0; i < seq->size(); i++) {
24✔
989
                    rewrite_accesses(seq->at(i).first);
18✔
990
                }
18✔
991
            } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
6✔
992
                rewrite_accesses(loop->root());
1✔
993
            } else if (auto* if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
1✔
994
                for (size_t i = 0; i < if_else->size(); i++) {
×
995
                    rewrite_accesses(if_else->at(i).first);
×
996
                }
×
997
            }
×
998
        };
24✔
999
        rewrite_accesses(new_first_inner.root());
5✔
1000

1001
        // ==================================================================
1002
        // 3.5d: Swap INSIDE the fused For, AFTER K2
1003
        // buf_cur = buf_pf
1004
        // ==================================================================
1005
        generate_copy_loops(fused_for.root(), nullptr, symbolic::integer(0), 2);
5✔
1006
    }
5✔
1007

1008
    // Step 4: Remove the original two tile Map nests
1009
    // After we've moved the children out, remove the old maps from the parent
1010
    // The fused_for was inserted before first_map_, so first_map_ is now at first_index + 1
1011
    // and second_map_ is at first_index + 2
1012
    // Remove second first (higher index) to avoid invalidating first's index
1013
    size_t current_first_index = parent->index(first_map_);
10✔
1014
    size_t current_second_index = parent->index(second_map_);
10✔
1015
    builder.remove_child(*parent, current_second_index);
10✔
1016
    builder.remove_child(*parent, current_first_index);
10✔
1017

1018
    analysis_manager.invalidate_all();
10✔
1019
    applied_ = true;
10✔
1020
    fused_loop_ = &fused_for;
10✔
1021
}
10✔
1022

1023
void TileFusion::to_json(nlohmann::json& j) const {
6✔
1024
    j["transformation_type"] = this->name();
6✔
1025
    j["parameters"] = nlohmann::json::object();
6✔
1026

1027
    serializer::JSONSerializer ser_flat(false);
6✔
1028
    j["subgraph"] = nlohmann::json::object();
6✔
1029
    j["subgraph"]["0"] = nlohmann::json::object();
6✔
1030
    ser_flat.serialize_node(j["subgraph"]["0"], first_map_);
6✔
1031

1032
    j["subgraph"]["1"] = nlohmann::json::object();
6✔
1033
    ser_flat.serialize_node(j["subgraph"]["1"], second_map_);
6✔
1034
}
6✔
1035

1036
TileFusion TileFusion::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
1037
    auto first_map_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
1038
    auto second_map_id = desc["subgraph"]["1"]["element_id"].get<size_t>();
1✔
1039

1040
    auto first_element = builder.find_element_by_id(first_map_id);
1✔
1041
    auto second_element = builder.find_element_by_id(second_map_id);
1✔
1042

1043
    if (first_element == nullptr) {
1✔
1044
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(first_map_id) + " not found.");
×
1045
    }
×
1046
    if (second_element == nullptr) {
1✔
UNCOV
1047
        throw InvalidTransformationDescriptionException(
×
UNCOV
1048
            "Element with ID " + std::to_string(second_map_id) + " not found."
×
UNCOV
1049
        );
×
UNCOV
1050
    }
×
1051

1052
    auto* first_map = dynamic_cast<structured_control_flow::Map*>(first_element);
1✔
1053
    auto* second_map = dynamic_cast<structured_control_flow::Map*>(second_element);
1✔
1054

1055
    if (first_map == nullptr) {
1✔
UNCOV
1056
        throw InvalidTransformationDescriptionException(
×
1057
            "Element with ID " + std::to_string(first_map_id) + " is not a Map."
×
1058
        );
×
1059
    }
×
1060
    if (second_map == nullptr) {
1✔
UNCOV
1061
        throw InvalidTransformationDescriptionException(
×
UNCOV
1062
            "Element with ID " + std::to_string(second_map_id) + " is not a Map."
×
UNCOV
1063
        );
×
UNCOV
1064
    }
×
1065

1066
    return TileFusion(*first_map, *second_map);
1✔
1067
}
1✔
1068

1069
structured_control_flow::StructuredLoop* TileFusion::fused_loop() const {
×
1070
    if (!applied_) {
×
UNCOV
1071
        throw InvalidTransformationException("Accessing fused loop before apply.");
×
UNCOV
1072
    }
×
UNCOV
1073
    return fused_loop_;
×
UNCOV
1074
}
×
1075

1076
int TileFusion::radius() const { return radius_; }
4✔
1077

1078
} // namespace transformations
1079
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc