• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 25289367548

03 May 2026 08:01PM UTC coverage: 64.317% (+0.1%) from 64.206%
25289367548

push

github

web-flow
Merge pull request #698 from daisytuner/tile-fusion-double-buffering

Tile fusion double buffering

306 of 453 new or added lines in 4 files covered. (67.55%)

3 existing lines in 2 files now uncovered.

31682 of 49259 relevant lines covered (64.32%)

1432.27 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.29
/opt/src/transformations/tile_fusion.cpp
1
#include "sdfg/transformations/tile_fusion.h"
2

3
#include <climits>
4
#include <cmath>
5
#include <stdexcept>
6

7
#include <symengine/solve.h>
8

9
#include "sdfg/analysis/arguments_analysis.h"
10
#include "sdfg/analysis/assumptions_analysis.h"
11
#include "sdfg/analysis/loop_analysis.h"
12
#include "sdfg/analysis/memory_layout_analysis.h"
13
#include "sdfg/analysis/scope_analysis.h"
14
#include "sdfg/analysis/users.h"
15
#include "sdfg/builder/structured_sdfg_builder.h"
16
#include "sdfg/data_flow/data_flow_graph.h"
17
#include "sdfg/structured_control_flow/block.h"
18
#include "sdfg/structured_control_flow/for.h"
19
#include "sdfg/structured_control_flow/if_else.h"
20
#include "sdfg/structured_control_flow/map.h"
21
#include "sdfg/symbolic/conjunctive_normal_form.h"
22
#include "sdfg/symbolic/delinearization.h"
23
#include "sdfg/symbolic/polynomials.h"
24
#include "sdfg/symbolic/symbolic.h"
25
#include "sdfg/symbolic/utils.h"
26
#include "sdfg/types/array.h"
27
#include "sdfg/types/pointer.h"
28
#include "sdfg/types/scalar.h"
29

30
namespace sdfg {
31
namespace transformations {
32

33
namespace {
34

35
/// Collect all Block nodes reachable from a Sequence, including through nested For/Map loops.
36
void collect_blocks(structured_control_flow::Sequence& seq, std::vector<structured_control_flow::Block*>& blocks) {
32✔
37
    for (size_t i = 0; i < seq.size(); ++i) {
102✔
38
        auto& child = seq.at(i).first;
70✔
39
        if (auto* block = dynamic_cast<structured_control_flow::Block*>(&child)) {
70✔
40
            blocks.push_back(block);
62✔
41
        } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&child)) {
62✔
42
            collect_blocks(loop->root(), blocks);
8✔
43
        }
8✔
44
    }
70✔
45
}
32✔
46

47
/// Get the first block from a sequence, recursively descending into loops.
48
structured_control_flow::Block* get_first_block(structured_control_flow::Sequence& seq) {
32✔
49
    for (size_t i = 0; i < seq.size(); ++i) {
32✔
50
        auto& child = seq.at(i).first;
32✔
51
        if (auto* block = dynamic_cast<structured_control_flow::Block*>(&child)) {
32✔
52
            return block;
24✔
53
        } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&child)) {
24✔
54
            auto* result = get_first_block(loop->root());
8✔
55
            if (result) return result;
8✔
56
        }
8✔
57
    }
32✔
58
    return nullptr;
×
59
}
32✔
60

61
} // namespace
62

63
TileFusion::TileFusion(structured_control_flow::Map& first_map, structured_control_flow::Map& second_map)
64
    : first_map_(first_map), second_map_(second_map) {}
16✔
65

66
std::string TileFusion::name() const { return "TileFusion"; }
6✔
67

68
int TileFusion::compute_radius(
69
    const data_flow::Subset& producer_write_subset,
70
    const std::vector<data_flow::Subset>& consumer_read_subsets,
71
    const std::vector<structured_control_flow::StructuredLoop*>& producer_loops,
72
    const std::vector<structured_control_flow::StructuredLoop*>& consumer_loops,
73
    const symbolic::Assumptions& producer_assumptions,
74
    const symbolic::Assumptions& consumer_assumptions
75
) {
12✔
76
    if (producer_loops.empty() || consumer_loops.empty()) {
12✔
77
        return -1;
×
78
    }
×
79

80
    // Delinearize the producer write subset
81
    auto producer_sub = producer_write_subset;
12✔
82
    if (producer_sub.size() == 1) {
12✔
83
        auto producer_result = symbolic::delinearize(producer_write_subset.at(0), producer_assumptions);
9✔
84
        if (producer_result.success) {
9✔
85
            producer_sub = producer_result.indices;
9✔
86
        }
9✔
87
    }
9✔
88

89
    // Extract the innermost producer loop indvar (the spatial iteration variable)
90
    auto* producer_inner = producer_loops.back();
12✔
91
    auto producer_indvar = producer_inner->indvar();
12✔
92

93
    // Extract the innermost consumer loop indvar
94
    auto* consumer_inner = consumer_loops.back();
12✔
95
    auto consumer_indvar = consumer_inner->indvar();
12✔
96

97
    int max_radius = 0;
12✔
98

99
    for (const auto& consumer_read_subset : consumer_read_subsets) {
32✔
100
        auto consumer_sub = consumer_read_subset;
32✔
101
        if (consumer_sub.size() == 1) {
32✔
102
            auto consumer_result = symbolic::delinearize(consumer_read_subset.at(0), consumer_assumptions);
26✔
103
            if (consumer_result.success) {
26✔
104
                consumer_sub = consumer_result.indices;
26✔
105
            }
26✔
106
        }
26✔
107
        if (consumer_sub.size() != producer_sub.size()) {
32✔
108
            return -1;
×
109
        }
×
110

111
        // Solve: producer_sub[d] = consumer_sub[d] for producer_indvar
112
        // This gives us: producer_indvar = f(consumer_indvar)
113
        SymEngine::vec_sym producer_vars;
32✔
114
        producer_vars.push_back(SymEngine::rcp_static_cast<const SymEngine::Symbol>(producer_indvar));
32✔
115

116
        SymEngine::vec_basic equations;
32✔
117
        for (size_t d = 0; d < producer_sub.size(); ++d) {
75✔
118
            auto diff = symbolic::sub(producer_sub.at(d), consumer_sub.at(d));
43✔
119
            if (symbolic::uses(diff, producer_indvar->get_name())) {
43✔
120
                // This dimension depends on the producer indvar — include in system
121
                equations.push_back(diff);
32✔
122
            }
32✔
123
            // Dimensions not involving the producer indvar are independent
124
            // (e.g., inner loop variables in 2D stencils) — skip them.
125
        }
43✔
126

127
        if (equations.size() != producer_vars.size()) {
32✔
128
            return -1;
×
129
        }
×
130

131
        SymEngine::vec_basic solution;
32✔
132
        try {
32✔
133
            solution = SymEngine::linsolve(equations, producer_vars);
32✔
134
        } catch (...) {
32✔
135
            return -1;
×
136
        }
×
137

138
        if (solution.size() != 1) {
32✔
139
            return -1;
×
140
        }
×
141

142
        auto& sol = solution[0];
32✔
143
        if (SymEngine::is_a<SymEngine::NaN>(*sol) || SymEngine::is_a<SymEngine::Infty>(*sol)) {
32✔
144
            return -1;
×
145
        }
×
146

147
        // The solution is: producer_indvar = f(consumer_indvar)
148
        // The offset is: f(consumer_indvar) - consumer_indvar
149
        // For a stencil B[1+i] and consumer reads B[j], B[1+j], B[2+j]:
150
        //   solve 1+i=j   -> i = j-1, offset = -1
151
        //   solve 1+i=1+j -> i = j,   offset = 0
152
        //   solve 1+i=2+j -> i = j+1, offset = +1
153
        auto offset_expr = symbolic::expand(symbolic::sub(sol, consumer_indvar));
32✔
154

155
        // The offset should be a constant integer
156
        if (!SymEngine::is_a<SymEngine::Integer>(*offset_expr)) {
32✔
157
            return -1;
×
158
        }
×
159

160
        int offset = std::abs(static_cast<int>(SymEngine::down_cast<const SymEngine::Integer&>(*offset_expr).as_int()));
32✔
161

162
        if (offset > max_radius) {
32✔
163
            max_radius = offset;
8✔
164
        }
8✔
165
    }
32✔
166

167
    return max_radius;
12✔
168
}
12✔
169

170
bool TileFusion::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
16✔
171
    candidates_.clear();
16✔
172
    radius_ = 0;
16✔
173

174
    // Get analyses upfront
175
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
16✔
176
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
16✔
177
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
16✔
178
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
16✔
179

180
    // ==========================================================================
181
    // Criterion 1: Both maps are in the same parent sequence, consecutive
182
    // ==========================================================================
183
    auto* first_parent = scope_analysis.parent_scope(&first_map_);
16✔
184
    auto* second_parent = scope_analysis.parent_scope(&second_map_);
16✔
185
    if (first_parent == nullptr || second_parent == nullptr) {
16✔
186
        return false;
×
187
    }
×
188
    if (first_parent != second_parent) {
16✔
189
        return false;
×
190
    }
×
191

192
    auto* parent_sequence = dynamic_cast<structured_control_flow::Sequence*>(first_parent);
16✔
193
    if (parent_sequence == nullptr) {
16✔
194
        return false;
×
195
    }
×
196

197
    int first_index = parent_sequence->index(first_map_);
16✔
198
    int second_index = parent_sequence->index(second_map_);
16✔
199
    if (first_index == -1 || second_index == -1) {
16✔
200
        return false;
×
201
    }
×
202
    if (second_index != first_index + 1) {
16✔
203
        return false;
1✔
204
    }
1✔
205

206
    // ==========================================================================
207
    // Criterion 2: Transition between them is empty
208
    // ==========================================================================
209
    auto& transition = parent_sequence->at(first_index).second;
15✔
210
    if (!transition.empty()) {
15✔
211
        return false;
×
212
    }
×
213

214
    // ==========================================================================
215
    // Criterion 3: Both are perfectly nested tile structures with Maps
216
    // Structure required: outer tile Map -> inner iteration Map(s) -> Block(s)
217
    // ==========================================================================
218
    auto first_loop_info = loop_analysis.loop_info(&first_map_);
15✔
219
    auto second_loop_info = loop_analysis.loop_info(&second_map_);
15✔
220

221
    // Must be perfectly nested (no side code between loop levels)
222
    if (!first_loop_info.is_perfectly_nested) {
15✔
223
        return false;
×
224
    }
×
225
    if (!second_loop_info.is_perfectly_nested) {
15✔
226
        return false;
×
227
    }
×
228

229
    // Must be all Maps (perfectly parallel) - this is a tile fusion on parallel maps
230
    if (!first_loop_info.is_perfectly_parallel) {
15✔
231
        return false;
×
232
    }
×
233
    if (!second_loop_info.is_perfectly_parallel) {
15✔
234
        return false;
×
235
    }
×
236

237
    // Must have at least depth 2 (outer tile + inner iteration)
238
    if (first_loop_info.max_depth < 2) {
15✔
239
        return false;
×
240
    }
×
241
    if (second_loop_info.max_depth < 2) {
15✔
242
        return false;
×
243
    }
×
244

245
    // The immediate child must be a Map (the inner iteration map)
246
    if (first_map_.root().size() != 1) {
15✔
247
        return false;
×
248
    }
×
249
    auto* first_inner_map = dynamic_cast<structured_control_flow::Map*>(&first_map_.root().at(0).first);
15✔
250
    if (first_inner_map == nullptr) {
15✔
251
        return false;
×
252
    }
×
253

254
    if (second_map_.root().size() != 1) {
15✔
255
        return false;
×
256
    }
×
257
    auto* second_inner_map = dynamic_cast<structured_control_flow::Map*>(&second_map_.root().at(0).first);
15✔
258
    if (second_inner_map == nullptr) {
15✔
259
        return false;
×
260
    }
×
261

262
    // ==========================================================================
263
    // Criterion 4: Compatible tile bounds (same init and stride)
264
    // ==========================================================================
265
    if (!symbolic::eq(first_map_.init(), second_map_.init())) {
15✔
266
        return false;
×
267
    }
×
268

269
    auto first_stride = first_map_.stride();
15✔
270
    auto second_stride = second_map_.stride();
15✔
271
    if (first_stride.is_null() || second_stride.is_null()) {
15✔
272
        return false;
×
273
    }
×
274
    if (!symbolic::eq(first_stride, second_stride)) {
15✔
275
        return false;
1✔
276
    }
1✔
277

278
    // Extract tile size as integer
279
    if (!SymEngine::is_a<SymEngine::Integer>(*first_stride)) {
14✔
280
        return false;
×
281
    }
×
282
    int tile_size = static_cast<int>(SymEngine::down_cast<const SymEngine::Integer&>(*first_stride).as_int());
14✔
283
    if (tile_size <= 0) {
14✔
284
        return false;
×
285
    }
×
286

287
    // ==========================================================================
288
    // Criterion 5: Shared intermediate container exists
289
    // First writes C, second reads C, second does NOT write C
290
    // Also detect cyclic containers: second writes D, first reads D
291
    // ==========================================================================
292
    auto first_args = arguments_analysis.arguments(analysis_manager, first_map_);
14✔
293
    auto second_args = arguments_analysis.arguments(analysis_manager, second_map_);
14✔
294

295
    std::unordered_set<std::string> first_outputs;
14✔
296
    std::unordered_set<std::string> first_inputs;
14✔
297
    for (const auto& [name, arg] : first_args) {
47✔
298
        if (arg.is_output) {
47✔
299
            first_outputs.insert(name);
14✔
300
        }
14✔
301
        if (arg.is_input) {
47✔
302
            first_inputs.insert(name);
47✔
303
        }
47✔
304
    }
47✔
305

306
    std::unordered_set<std::string> shared_containers;
14✔
307
    std::unordered_set<std::string> cyclic_container_names;
14✔
308
    for (const auto& [name, arg] : second_args) {
45✔
309
        if (first_outputs.contains(name)) {
45✔
310
            if (arg.is_output) {
13✔
311
                return false; // Consumer also writes the shared container
1✔
312
            }
1✔
313
            if (arg.is_input) {
12✔
314
                shared_containers.insert(name);
12✔
315
            }
12✔
316
        }
12✔
317
        // Detect cyclic: K2 writes container that K1 reads
318
        if (arg.is_output && first_inputs.contains(name) && !first_outputs.contains(name)) {
44✔
319
            cyclic_container_names.insert(name);
11✔
320
        }
11✔
321
    }
44✔
322
    if (shared_containers.empty()) {
13✔
323
        return false;
1✔
324
    }
1✔
325

326
    // ==========================================================================
327
    // Criterion 6: Compute radius for each shared container
328
    // ==========================================================================
329
    // Collect the loop hierarchy for producer and consumer
330
    std::vector<structured_control_flow::StructuredLoop*> producer_loops = {&first_map_, first_inner_map};
12✔
331
    std::vector<structured_control_flow::StructuredLoop*> consumer_loops = {&second_map_, second_inner_map};
12✔
332

333
    // Get assumptions from the innermost block (recursively find it)
334
    auto* first_block = get_first_block(first_inner_map->root());
12✔
335
    if (first_block == nullptr) {
12✔
336
        return false;
×
337
    }
×
338
    auto& producer_assumptions = assumptions_analysis.get(*first_block);
12✔
339

340
    auto* second_block = get_first_block(second_inner_map->root());
12✔
341
    if (second_block == nullptr) {
12✔
342
        return false;
×
343
    }
×
344
    auto& consumer_assumptions = assumptions_analysis.get(*second_block);
12✔
345

346
    // Collect all blocks in producer and consumer
347
    std::vector<structured_control_flow::Block*> producer_blocks;
12✔
348
    collect_blocks(first_inner_map->root(), producer_blocks);
12✔
349
    std::vector<structured_control_flow::Block*> consumer_blocks;
12✔
350
    collect_blocks(second_inner_map->root(), consumer_blocks);
12✔
351

352
    int overall_radius = 0;
12✔
353

354
    for (const auto& container : shared_containers) {
12✔
355
        // Find the producer write subset for this container
356
        data_flow::Subset producer_write_subset;
12✔
357
        bool found_producer = false;
12✔
358

359
        for (auto* block : producer_blocks) {
31✔
360
            auto& dataflow = block->dataflow();
31✔
361
            for (auto& node : dataflow.nodes()) {
112✔
362
                auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
112✔
363
                if (access == nullptr || access->data() != container) {
112✔
364
                    continue;
97✔
365
                }
97✔
366
                // It's a write if it has incoming edges and no outgoing edges
367
                if (dataflow.in_degree(*access) > 0 && dataflow.out_degree(*access) == 0) {
15✔
368
                    auto& iedge = *dataflow.in_edges(*access).begin();
12✔
369
                    if (iedge.type() != data_flow::MemletType::Computational) {
12✔
370
                        continue;
×
371
                    }
×
372
                    if (found_producer) {
12✔
373
                        return false; // Multiple writes to same container
×
374
                    }
×
375
                    producer_write_subset = iedge.subset();
12✔
376
                    found_producer = true;
12✔
377
                }
12✔
378
            }
15✔
379
        }
31✔
380
        if (!found_producer || producer_write_subset.empty()) {
12✔
381
            return false;
×
382
        }
×
383

384
        // Collect all consumer read subsets for this container
385
        std::vector<data_flow::Subset> consumer_read_subsets;
12✔
386
        for (auto* block : consumer_blocks) {
31✔
387
            auto& dataflow = block->dataflow();
31✔
388
            for (auto& node : dataflow.nodes()) {
112✔
389
                auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
112✔
390
                if (access == nullptr || access->data() != container) {
112✔
391
                    continue;
91✔
392
                }
91✔
393
                // It's a read if it has outgoing edges
394
                if (dataflow.out_degree(*access) > 0) {
21✔
395
                    for (auto& memlet : dataflow.out_edges(*access)) {
32✔
396
                        if (memlet.type() != data_flow::MemletType::Computational) {
32✔
397
                            continue;
×
398
                        }
×
399
                        auto& subset = memlet.subset();
32✔
400
                        // Deduplicate
401
                        bool found = false;
32✔
402
                        for (const auto& existing : consumer_read_subsets) {
32✔
403
                            if (existing.size() != subset.size()) continue;
32✔
404
                            bool match = true;
32✔
405
                            for (size_t d = 0; d < existing.size(); ++d) {
35✔
406
                                if (!symbolic::eq(existing[d], subset[d])) {
35✔
407
                                    match = false;
32✔
408
                                    break;
32✔
409
                                }
32✔
410
                            }
35✔
411
                            if (match) {
32✔
412
                                found = true;
×
413
                                break;
×
414
                            }
×
415
                        }
32✔
416
                        if (!found) {
32✔
417
                            consumer_read_subsets.push_back(subset);
32✔
418
                        }
32✔
419
                    }
32✔
420
                }
21✔
421
            }
21✔
422
        }
31✔
423
        if (consumer_read_subsets.empty()) {
12✔
424
            return false;
×
425
        }
×
426

427
        int radius = compute_radius(
12✔
428
            producer_write_subset,
12✔
429
            consumer_read_subsets,
12✔
430
            producer_loops,
12✔
431
            consumer_loops,
12✔
432
            producer_assumptions,
12✔
433
            consumer_assumptions
12✔
434
        );
12✔
435
        if (radius < 0) {
12✔
436
            return false;
×
437
        }
×
438

439
        // ==========================================================================
440
        // Criterion 7: Radius must be less than tile size
441
        // ==========================================================================
442
        if (radius >= tile_size) {
12✔
443
            return false;
×
444
        }
×
445

446
        if (radius > overall_radius) {
12✔
447
            overall_radius = radius;
8✔
448
        }
8✔
449

450
        TileFusionCandidate candidate;
12✔
451
        candidate.container = container;
12✔
452
        candidate.radius = radius;
12✔
453
        candidates_.push_back(candidate);
12✔
454
    }
12✔
455

456
    radius_ = overall_radius;
12✔
457

458
    // ==========================================================================
459
    // Criterion 8: Analyze cyclic containers for double buffering
460
    // Use MemoryLayoutAnalysis to delinearize accesses and reason about
461
    // multi-dimensional stencil patterns (e.g., A[i*N+j] → A[i][j]).
462
    // For each cyclic container, identify the tiled dimension, extract
463
    // stencil offsets in that dimension, and compute buffer geometry.
464
    // Only needed when radius > 0 (tiles overlap).
465
    // ==========================================================================
466
    cyclic_containers_.clear();
12✔
467

468
    if (overall_radius > 0) {
12✔
469
        auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
8✔
470

471
        for (const auto& container : cyclic_container_names) {
8✔
472
            // Collect all K1 read memlets for this container
473
            struct ReadMemletInfo {
7✔
474
                const data_flow::Memlet* memlet;
7✔
475
            };
7✔
476
            std::vector<ReadMemletInfo> read_memlets;
7✔
477
            for (auto* block : producer_blocks) {
23✔
478
                auto& dataflow = block->dataflow();
23✔
479
                for (auto& node : dataflow.nodes()) {
85✔
480
                    auto* access = dynamic_cast<data_flow::AccessNode*>(&node);
85✔
481
                    if (access == nullptr || access->data() != container) continue;
85✔
482
                    if (dataflow.out_degree(*access) > 0) {
16✔
483
                        for (auto& memlet : dataflow.out_edges(*access)) {
23✔
484
                            if (memlet.type() != data_flow::MemletType::Computational) continue;
23✔
485
                            read_memlets.push_back({&memlet});
23✔
486
                        }
23✔
487
                    }
16✔
488
                }
16✔
489
            }
23✔
490
            if (read_memlets.empty()) {
7✔
NEW
491
                return false;
×
NEW
492
            }
×
493

494
            auto inner_indvar = first_inner_map->indvar();
7✔
495

496
            int min_offset = std::numeric_limits<int>::max();
7✔
497
            int max_offset = std::numeric_limits<int>::min();
7✔
498
            int tiled_dim = -1;
7✔
499
            std::vector<symbolic::Expression> layout_dimensions;
7✔
500
            std::vector<symbolic::Expression> layout_strides;
7✔
501
            symbolic::Expression layout_offset = symbolic::integer(0);
7✔
502

503
            for (auto& info : read_memlets) {
23✔
504
                // Use MemoryLayoutAnalysis to get delinearized access
505
                auto* mem_access = mla.access(*info.memlet);
23✔
506
                if (mem_access == nullptr) {
23✔
NEW
507
                    return false; // Delinearization failed
×
NEW
508
                }
×
509

510
                auto& delin_subset = mem_access->subset;
23✔
511

512
                // Find which dimension contains the inner (tiled) indvar
513
                for (size_t d = 0; d < delin_subset.size(); d++) {
51✔
514
                    if (symbolic::uses(delin_subset[d], inner_indvar->get_name())) {
28✔
515
                        if (tiled_dim == -1) {
23✔
516
                            tiled_dim = static_cast<int>(d);
7✔
517
                        } else if (tiled_dim != static_cast<int>(d)) {
16✔
NEW
518
                            return false; // Indvar appears in multiple dimensions
×
NEW
519
                        }
×
520

521
                        // Decompose this dimension: should be coeff*indvar + offset
522
                        auto decomp = symbolic::affine_decomposition(delin_subset[d], inner_indvar);
23✔
523
                        if (!decomp.success) {
23✔
NEW
524
                            return false;
×
NEW
525
                        }
×
526
                        auto coeff_int = SymEngine::rcp_static_cast<const SymEngine::Integer>(decomp.coeff)->as_int();
23✔
527
                        if (coeff_int != 1) {
23✔
NEW
528
                            return false; // Non-unit stride in tiled dimension
×
NEW
529
                        }
×
530
                        auto offset_expr = decomp.offset;
23✔
531
                        if (!SymEngine::is_a<SymEngine::Integer>(*offset_expr)) {
23✔
NEW
532
                            return false; // Non-constant offset
×
NEW
533
                        }
×
534
                        int offset =
23✔
535
                            static_cast<int>(SymEngine::down_cast<const SymEngine::Integer&>(*offset_expr).as_int());
23✔
536
                        min_offset = std::min(min_offset, offset);
23✔
537
                        max_offset = std::max(max_offset, offset);
23✔
538
                    }
23✔
539
                }
28✔
540

541
                // Capture layout info from first access (all accesses to same container share layout)
542
                if (layout_dimensions.empty()) {
23✔
543
                    auto& layout = mem_access->layout;
7✔
544
                    for (size_t d = 0; d < layout.dims(); d++) {
15✔
545
                        layout_dimensions.push_back(layout.shape().at(d));
8✔
546
                        layout_strides.push_back(layout.strides().at(d));
8✔
547
                    }
8✔
548
                    layout_offset = layout.offset();
7✔
549
                }
7✔
550
            }
23✔
551

552
            if (tiled_dim == -1) {
7✔
NEW
553
                return false; // No dimension depends on inner indvar
×
NEW
554
            }
×
555

556
            int tiled_dim_buf_size = tile_size + 2 * overall_radius + max_offset - min_offset;
7✔
557

558
            // Compute total buffer size: tiled_dim_buf_size * product(other dimensions)
559
            symbolic::Expression total_buf_size = symbolic::integer(tiled_dim_buf_size);
7✔
560
            for (size_t d = 0; d < layout_dimensions.size(); d++) {
15✔
561
                if (static_cast<int>(d) != tiled_dim) {
8✔
562
                    total_buf_size = symbolic::mul(total_buf_size, layout_dimensions[d]);
1✔
563
                }
1✔
564
            }
8✔
565

566
            // Total buffer size must be a constant integer for now
567
            // (non-tiled dimensions may be symbolic like N — that's fine, buffer is symbolic-sized)
568
            CyclicContainerInfo cyc_info;
7✔
569
            cyc_info.container = container;
7✔
570
            cyc_info.min_offset = min_offset;
7✔
571
            cyc_info.max_offset = max_offset;
7✔
572
            cyc_info.tiled_dim = tiled_dim;
7✔
573
            cyc_info.tiled_dim_buf_size = tiled_dim_buf_size;
7✔
574
            cyc_info.dimensions = layout_dimensions;
7✔
575
            cyc_info.strides = layout_strides;
7✔
576
            cyc_info.layout_offset = layout_offset;
7✔
577
            // buffer_size is now used as a symbolic expression in apply, but store int for 1D compat
578
            if (SymEngine::is_a<SymEngine::Integer>(*total_buf_size)) {
7✔
579
                cyc_info.buffer_size =
6✔
580
                    static_cast<int>(SymEngine::down_cast<const SymEngine::Integer&>(*total_buf_size).as_int());
6✔
581
            } else {
6✔
582
                cyc_info.buffer_size = -1; // Symbolic — will use expression in apply
1✔
583
            }
1✔
584
            cyclic_containers_.push_back(cyc_info);
7✔
585
        }
7✔
586
    } // if (overall_radius > 0)
8✔
587

588
    return true;
12✔
589
}
12✔
590

591
void TileFusion::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
10✔
592
    auto& sdfg = builder.subject();
10✔
593
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
10✔
594
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
10✔
595

596
    // Get parent sequence
597
    auto* parent = static_cast<structured_control_flow::Sequence*>(scope_analysis.parent_scope(&first_map_));
10✔
598
    size_t first_index = parent->index(first_map_);
10✔
599

600
    // Extract tile loop properties from first map (representative)
601
    auto tile_indvar = first_map_.indvar();
10✔
602
    auto tile_init = first_map_.init();
10✔
603
    auto tile_condition = first_map_.condition();
10✔
604
    auto tile_update = first_map_.update();
10✔
605

606
    // Extract tile size
607
    auto stride = first_map_.stride();
10✔
608
    auto tile_size_expr = stride;
10✔
609

610
    // Get references to inner maps before moving
611
    auto* first_inner_map = dynamic_cast<structured_control_flow::Map*>(&first_map_.root().at(0).first);
10✔
612
    auto* second_inner_map = dynamic_cast<structured_control_flow::Map*>(&second_map_.root().at(0).first);
10✔
613

614
    // Extract inner map properties before they get moved
615
    auto first_inner_indvar = first_inner_map->indvar();
10✔
616
    auto first_inner_init = first_inner_map->init();
10✔
617
    auto first_inner_condition = first_inner_map->condition();
10✔
618
    auto first_inner_update = first_inner_map->update();
10✔
619
    auto first_inner_schedule = first_inner_map->schedule_type();
10✔
620

621
    auto second_inner_indvar = second_inner_map->indvar();
10✔
622
    auto second_inner_init = second_inner_map->init();
10✔
623
    auto second_inner_condition = second_inner_map->condition();
10✔
624
    auto second_inner_update = second_inner_map->update();
10✔
625
    auto second_inner_schedule = second_inner_map->schedule_type();
10✔
626

627
    // Get the old tile indvars to substitute later
628
    auto first_tile_indvar = first_map_.indvar();
10✔
629
    auto second_tile_indvar = second_map_.indvar();
10✔
630

631
    // Step 1: Create a new For tile loop with the same bounds as the first tile Map
632
    // Use the condition from the first map (which uses first_tile_indvar)
633
    // We need a fresh indvar for the new tile loop
634
    auto new_tile_indvar_name = builder.find_new_name(tile_indvar->get_name());
10✔
635
    builder.add_container(new_tile_indvar_name, sdfg.type(tile_indvar->get_name()));
10✔
636
    auto new_tile_indvar = symbolic::symbol(new_tile_indvar_name);
10✔
637

638
    // Substitute the old tile indvar in the condition with the new one
639
    auto new_tile_condition = symbolic::subs(tile_condition, tile_indvar, new_tile_indvar);
10✔
640

641
    auto& fused_for = builder.add_for_before(
10✔
642
        *parent,
10✔
643
        first_map_,
10✔
644
        new_tile_indvar,
10✔
645
        new_tile_condition,
10✔
646
        tile_init,
10✔
647
        symbolic::subs(tile_update, tile_indvar, new_tile_indvar),
10✔
648
        {},
10✔
649
        first_map_.debug_info()
10✔
650
    );
10✔
651

652
    // Step 2: Create the producer inner Map inside the fused For
653
    // Its init and condition need to reference the new tile indvar
654
    auto new_first_inner_init = symbolic::subs(first_inner_init, first_tile_indvar, new_tile_indvar);
10✔
655
    auto new_first_inner_condition = symbolic::subs(first_inner_condition, first_tile_indvar, new_tile_indvar);
10✔
656

657
    // Extend the producer's range by the radius
658
    if (radius_ > 0) {
10✔
659
        // Extend init: max(0, tile - radius) -> take the original init and subtract radius
660
        // Original init is typically: tile_indvar (i.e., the iteration starts at the tile boundary)
661
        // New init: max(original_lower_bound, new_init - radius)
662
        auto radius_expr = symbolic::integer(radius_);
6✔
663
        auto extended_init = symbolic::max(symbolic::integer(0), symbolic::sub(new_first_inner_init, radius_expr));
6✔
664
        new_first_inner_init = extended_init;
6✔
665

666
        // Extend condition: the condition has form like And(i < tile + S, i < N)
667
        // We need to adjust the tile-relative bound: i < tile + S becomes i < tile + S + radius
668
        // We do this by substituting new_tile_indvar with (new_tile_indvar + radius) in the
669
        // tile-relative part. Since the condition is a conjunction, we can reconstruct it:
670
        // Original: And(inner_indvar < new_tile_indvar + S, inner_indvar < N)
671
        // Extended: And(inner_indvar < new_tile_indvar + S + radius, inner_indvar < N)
672
        auto extended_tile_bound =
6✔
673
            symbolic::Lt(first_inner_indvar, symbolic::add(new_tile_indvar, symbolic::add(tile_size_expr, radius_expr)));
6✔
674

675
        // Extract non-tile upper bounds from the original inner map condition.
676
        // canonical_bound() returns min of ALL bounds including the tile bound,
677
        // but we only want bounds independent of the tile indvar (e.g., array size).
678
        auto original_condition = first_inner_map->condition();
6✔
679
        symbolic::CNF cnf;
6✔
680
        try {
6✔
681
            cnf = symbolic::conjunctive_normal_form(original_condition);
6✔
682
        } catch (...) {
6✔
NEW
683
            cnf.clear();
×
NEW
684
        }
×
685

686
        // Collect non-tile upper bounds on the inner indvar
687
        std::vector<symbolic::Expression> non_tile_bounds;
6✔
688
        for (auto& clause : cnf) {
12✔
689
            for (auto& literal : clause) {
12✔
690
                // Check for i < bound forms
691
                if (SymEngine::is_a<SymEngine::StrictLessThan>(*literal)) {
12✔
692
                    auto lt = SymEngine::rcp_static_cast<const SymEngine::StrictLessThan>(literal);
12✔
693
                    auto lhs = lt->get_arg1();
12✔
694
                    auto rhs = lt->get_arg2();
12✔
695
                    if (symbolic::eq(lhs, first_inner_indvar) && !symbolic::uses(rhs, first_tile_indvar->get_name())) {
12✔
696
                        non_tile_bounds.push_back(rhs);
6✔
697
                    }
6✔
698
                }
12✔
699
            }
12✔
700
        }
12✔
701

702
        if (!non_tile_bounds.empty()) {
6✔
703
            // Build condition: extended_tile_bound AND all non-tile bounds
704
            auto result_condition = extended_tile_bound;
6✔
705
            for (auto& bound : non_tile_bounds) {
6✔
706
                result_condition = symbolic::And(result_condition, symbolic::Lt(first_inner_indvar, bound));
6✔
707
            }
6✔
708
            new_first_inner_condition = result_condition;
6✔
709
        } else {
6✔
710
            new_first_inner_condition = extended_tile_bound;
×
711
        }
×
712
    }
6✔
713

714
    auto& new_first_inner = builder.add_map(
10✔
715
        fused_for.root(),
10✔
716
        first_inner_indvar,
10✔
717
        new_first_inner_condition,
10✔
718
        new_first_inner_init,
10✔
719
        first_inner_update,
10✔
720
        first_inner_schedule
10✔
721
    );
10✔
722

723
    // Move the producer body into the new inner map
724
    builder.move_children(first_inner_map->root(), new_first_inner.root());
10✔
725

726
    // Step 3: Create the consumer inner Map inside the fused For, after the producer
727
    auto new_second_inner_init = symbolic::subs(second_inner_init, second_tile_indvar, new_tile_indvar);
10✔
728
    auto new_second_inner_condition = symbolic::subs(second_inner_condition, second_tile_indvar, new_tile_indvar);
10✔
729

730
    auto& new_second_inner = builder.add_map(
10✔
731
        fused_for.root(),
10✔
732
        second_inner_indvar,
10✔
733
        new_second_inner_condition,
10✔
734
        new_second_inner_init,
10✔
735
        second_inner_update,
10✔
736
        second_inner_schedule
10✔
737
    );
10✔
738

739
    // Move the consumer body into the new inner map
740
    builder.move_children(second_inner_map->root(), new_second_inner.root());
10✔
741

742
    // ==========================================================================
743
    // Step 3.5: Double-buffer pre-fetch for cyclic containers
744
    //
745
    // Uses layout info from MemoryLayoutAnalysis (computed in can_be_applied)
746
    // to handle multi-dimensional stencils. For each cyclic container,
747
    // generates nested copy loops and rewrites K1's reads to use the buffer.
748
    // ==========================================================================
749

750
    for (const auto& cyc : cyclic_containers_) {
10✔
751
        auto& container_type = sdfg.type(cyc.container);
5✔
752
        // Get scalar element type from container (Pointer or Array)
753
        const types::IType* scalar_type_ptr = nullptr;
5✔
754
        if (auto* ptr_type = dynamic_cast<const types::Pointer*>(&container_type)) {
5✔
755
            scalar_type_ptr = &ptr_type->pointee_type();
1✔
756
        } else if (auto* arr_type = dynamic_cast<const types::Array*>(&container_type)) {
4✔
757
            scalar_type_ptr = &arr_type->element_type();
4✔
758
        } else {
4✔
NEW
759
            throw std::runtime_error("TileFusion: cyclic container '" + cyc.container + "' has unsupported type");
×
NEW
760
        }
×
761
        auto& scalar_type = *scalar_type_ptr;
5✔
762
        bool is_pointer = dynamic_cast<const types::Pointer*>(&container_type) != nullptr;
5✔
763

764
        auto radius_expr = symbolic::integer(radius_);
5✔
765
        auto min_offset_expr = symbolic::integer(cyc.min_offset);
5✔
766
        types::Scalar idx_type(types::PrimitiveType::UInt64);
5✔
767

768
        // Build buffer dimensions: same as layout but with tiled dim replaced by tiled_dim_buf_size
769
        std::vector<symbolic::Expression> buf_dimensions;
5✔
770
        for (size_t d = 0; d < cyc.dimensions.size(); d++) {
11✔
771
            if (static_cast<int>(d) == cyc.tiled_dim) {
6✔
772
                buf_dimensions.push_back(symbolic::integer(cyc.tiled_dim_buf_size));
5✔
773
            } else {
5✔
774
                buf_dimensions.push_back(cyc.dimensions[d]);
1✔
775
            }
1✔
776
        }
6✔
777

778
        // Compute total buffer size
779
        symbolic::Expression total_buf_size = symbolic::integer(1);
5✔
780
        for (auto& dim : buf_dimensions) {
6✔
781
            total_buf_size = symbolic::mul(total_buf_size, dim);
6✔
782
        }
6✔
783

784
        // Create two buffer containers (flat 1D arrays of total_buf_size)
785
        auto buf_cur_name = "__tf_buf_cur_" + cyc.container;
5✔
786
        auto buf_pf_name = "__tf_buf_pf_" + cyc.container;
5✔
787
        types::Array buf_type(scalar_type, total_buf_size);
5✔
788
        builder.add_container(buf_cur_name, buf_type);
5✔
789
        builder.add_container(buf_pf_name, buf_type);
5✔
790

791
        // Helper: linearize multi-dim indices into flat buffer index (row-major)
792
        auto linearize_buf = [&](const std::vector<symbolic::Expression>& indices) -> symbolic::Expression {
27✔
793
            symbolic::Expression linear_idx = symbolic::integer(0);
27✔
794
            symbolic::Expression stride = symbolic::integer(1);
27✔
795
            for (int i = static_cast<int>(indices.size()) - 1; i >= 0; i--) {
57✔
796
                linear_idx = symbolic::add(linear_idx, symbolic::mul(indices[i], stride));
30✔
797
                stride = symbolic::mul(stride, buf_dimensions[i]);
30✔
798
            }
30✔
799
            return linear_idx;
27✔
800
        };
27✔
801

802
        // Helper: build original source subset from multi-dim copy indices
803
        // For the tiled dim, the source index is: base + copy_indvar
804
        // For non-tiled dims, the source index is just: copy_indvar
805
        // If the container is a Pointer, re-linearize using layout strides
806
        auto build_src_subset = [&](const std::vector<symbolic::Expression>& copy_indices,
5✔
807
                                    const symbolic::Expression& tiled_base) -> data_flow::Subset {
10✔
808
            std::vector<symbolic::Expression> full_indices;
10✔
809
            for (size_t d = 0; d < cyc.dimensions.size(); d++) {
22✔
810
                if (static_cast<int>(d) == cyc.tiled_dim) {
12✔
811
                    full_indices.push_back(symbolic::add(tiled_base, copy_indices[d]));
10✔
812
                } else {
10✔
813
                    full_indices.push_back(copy_indices[d]);
2✔
814
                }
2✔
815
            }
12✔
816
            if (is_pointer) {
10✔
817
                // Re-linearize using layout strides: offset + sum(stride[d] * index[d])
818
                symbolic::Expression linear = cyc.layout_offset;
2✔
819
                for (size_t d = 0; d < full_indices.size(); d++) {
6✔
820
                    linear = symbolic::add(linear, symbolic::mul(cyc.strides[d], full_indices[d]));
4✔
821
                }
4✔
822
                return {linear};
2✔
823
            } else {
8✔
824
                return data_flow::Subset(full_indices.begin(), full_indices.end());
8✔
825
            }
8✔
826
        };
10✔
827

828
        // Helper: generate nested copy loops and a copy block
829
        // copy_kind: 0 = src→buf_cur, 1 = src→buf_pf, 2 = buf_pf→buf_cur
830
        auto generate_copy_loops = [&](structured_control_flow::Sequence& parent_scope,
5✔
831
                                       structured_control_flow::ControlFlowNode* insert_before,
5✔
832
                                       const symbolic::Expression& tiled_base,
5✔
833
                                       int copy_kind) {
15✔
834
            // Create indvars for each dimension
835
            std::vector<symbolic::Symbol> copy_indvars;
15✔
836
            structured_control_flow::Sequence* scope = &parent_scope;
15✔
837
            bool first_loop = true;
15✔
838

839
            for (size_t d = 0; d < buf_dimensions.size(); d++) {
33✔
840
                std::string suffix = (copy_kind == 0) ? "init" : (copy_kind == 1) ? "pf" : "sw";
18✔
841
                auto indvar_name = builder.find_new_name("__tf_" + suffix + "_d" + std::to_string(d));
18✔
842
                builder.add_container(indvar_name, idx_type);
18✔
843
                auto indvar = symbolic::symbol(indvar_name);
18✔
844
                copy_indvars.push_back(indvar);
18✔
845

846
                auto init = symbolic::integer(0);
18✔
847
                auto condition = symbolic::Lt(indvar, buf_dimensions[d]);
18✔
848
                auto update = symbolic::add(indvar, symbolic::integer(1));
18✔
849

850
                if (first_loop && insert_before != nullptr) {
18✔
851
                    auto& loop = builder.add_map_before(
5✔
852
                        *scope,
5✔
853
                        *insert_before,
5✔
854
                        indvar,
5✔
855
                        condition,
5✔
856
                        init,
5✔
857
                        update,
5✔
858
                        structured_control_flow::ScheduleType_Sequential::create()
5✔
859
                    );
5✔
860
                    scope = &loop.root();
5✔
861
                    first_loop = false;
5✔
862
                } else if (first_loop) {
13✔
863
                    auto& loop = builder.add_map(
10✔
864
                        *scope,
10✔
865
                        indvar,
10✔
866
                        condition,
10✔
867
                        init,
10✔
868
                        update,
10✔
869
                        structured_control_flow::ScheduleType_Sequential::create()
10✔
870
                    );
10✔
871
                    scope = &loop.root();
10✔
872
                    first_loop = false;
10✔
873
                } else {
10✔
874
                    auto& loop = builder.add_map(
3✔
875
                        *scope,
3✔
876
                        indvar,
3✔
877
                        condition,
3✔
878
                        init,
3✔
879
                        update,
3✔
880
                        structured_control_flow::ScheduleType_Sequential::create()
3✔
881
                    );
3✔
882
                    scope = &loop.root();
3✔
883
                }
3✔
884
            }
18✔
885

886
            // Create copy block
887
            auto& blk = builder.add_block(*scope);
15✔
888
            std::vector<symbolic::Expression> copy_exprs(copy_indvars.begin(), copy_indvars.end());
15✔
889
            auto buf_subset = data_flow::Subset{linearize_buf(copy_exprs)};
15✔
890

891
            if (copy_kind == 0 || copy_kind == 1) {
15✔
892
                // src container → buffer
893
                auto src_subset = build_src_subset(copy_exprs, tiled_base);
10✔
894
                auto& src = builder.add_access(blk, cyc.container);
10✔
895
                auto& dst = builder.add_access(blk, (copy_kind == 0) ? buf_cur_name : buf_pf_name);
10✔
896
                auto& tasklet = builder.add_tasklet(blk, data_flow::TaskletCode::assign, "_out", {"_in"});
10✔
897
                builder.add_computational_memlet(blk, src, tasklet, "_in", src_subset, container_type);
10✔
898
                builder.add_computational_memlet(blk, tasklet, "_out", dst, buf_subset, buf_type);
10✔
899
            } else {
10✔
900
                // buf_pf → buf_cur
901
                auto& src = builder.add_access(blk, buf_pf_name);
5✔
902
                auto& dst = builder.add_access(blk, buf_cur_name);
5✔
903
                auto& tasklet = builder.add_tasklet(blk, data_flow::TaskletCode::assign, "_out", {"_in"});
5✔
904
                builder.add_computational_memlet(blk, src, tasklet, "_in", buf_subset, buf_type);
5✔
905
                builder.add_computational_memlet(blk, tasklet, "_out", dst, buf_subset, buf_type);
5✔
906
            }
5✔
907
        };
15✔
908

909
        // ==================================================================
910
        // 3.5a: Initial copy INSIDE the fused For, guarded by if (tile == init)
911
        // On the first iteration, we load from the original array.
912
        // On subsequent iterations, buf_cur already has the right data from swap.
913
        // Base for tiled dim: tile_init - radius + min_offset
914
        // ==================================================================
915
        auto init_tiled_base = symbolic::sub(symbolic::add(tile_init, min_offset_expr), radius_expr);
5✔
916

917
        // Get a reference to the pre-fetch map (first child of fused_for) to insert before it
918
        auto& first_child_in_fused = fused_for.root().at(0).first;
5✔
919

920
        auto& init_if_else =
5✔
921
            builder.add_if_else_before(fused_for.root(), first_child_in_fused, sdfg::control_flow::Assignments{});
5✔
922
        auto init_cond = symbolic::Eq(new_tile_indvar, tile_init);
5✔
923
        auto& init_then = builder.add_case(init_if_else, init_cond);
5✔
924
        // Empty else branch (on subsequent iters, buf_cur has data from swap)
925
        builder.add_case(init_if_else, symbolic::Not(init_cond));
5✔
926

927
        generate_copy_loops(init_then, nullptr, init_tiled_base, 0);
5✔
928

929
        // ==================================================================
930
        // 3.5b: Pre-fetch INSIDE the fused For, BEFORE K1
931
        // Base for tiled dim: tile + S - radius + min_offset
932
        // ==================================================================
933
        auto pf_tiled_base =
5✔
934
            symbolic::sub(symbolic::add(new_tile_indvar, symbolic::add(tile_size_expr, min_offset_expr)), radius_expr);
5✔
935
        generate_copy_loops(fused_for.root(), &new_first_inner, pf_tiled_base, 1);
5✔
936

937
        // ==================================================================
938
        // 3.5c: Redirect K1's reads of container to buf_cur
939
        // Use MemoryLayoutAnalysis to get delinearized indices, rebase
940
        // the tiled dimension, then re-linearize into the flat buffer.
941
        // ==================================================================
942
        auto buf_cur_base_tiled = symbolic::sub(symbolic::add(new_tile_indvar, min_offset_expr), radius_expr);
5✔
943

944
        // Helper: rebase a raw subset for the buffer.
945
        // For Pointer (linearized) access A[expr]:
946
        //   buf_index = expr - stride[tiled_dim] * buf_cur_base_tiled
947
        // For Array (multi-dim) access A[idx0, idx1, ...]:
948
        //   rebase tiled_dim, linearize into flat buffer index
949
        auto rebase_to_buffer = [&](const data_flow::Subset& subset) -> data_flow::Subset {
17✔
950
            if (subset.size() == 1 && cyc.dimensions.size() > 1 && is_pointer) {
17✔
951
                // Pointer type: subtract the tiled dimension's contribution to the linear base
952
                auto linear_base = symbolic::mul(cyc.strides[cyc.tiled_dim], buf_cur_base_tiled);
5✔
953
                return {symbolic::sub(subset.at(0), linear_base)};
5✔
954
            }
5✔
955
            if (subset.size() == cyc.dimensions.size()) {
12✔
956
                // Multi-dim: rebase tiled dimension, linearize into buffer
957
                std::vector<symbolic::Expression> buf_indices;
12✔
958
                for (size_t d = 0; d < cyc.dimensions.size(); d++) {
24✔
959
                    if (static_cast<int>(d) == cyc.tiled_dim) {
12✔
960
                        buf_indices.push_back(symbolic::sub(subset[d], buf_cur_base_tiled));
12✔
961
                    } else {
12✔
NEW
962
                        buf_indices.push_back(subset[d]);
×
NEW
963
                    }
×
964
                }
12✔
965
                return {linearize_buf(buf_indices)};
12✔
966
            }
12✔
967
            // 1D Array: rebase directly
NEW
968
            if (subset.size() == 1) {
×
NEW
969
                return {symbolic::sub(subset.at(0), buf_cur_base_tiled)};
×
NEW
970
            }
×
NEW
971
            return subset; // Fallback: no rebase
×
NEW
972
        };
×
973

974
        std::function<void(structured_control_flow::ControlFlowNode&)> rewrite_accesses;
5✔
975
        rewrite_accesses = [&](structured_control_flow::ControlFlowNode& node) {
24✔
976
            if (auto* block = dynamic_cast<structured_control_flow::Block*>(&node)) {
24✔
977
                auto& dfg = block->dataflow();
17✔
978
                for (auto* access : dfg.data_nodes()) {
46✔
979
                    if (access->data() != cyc.container) continue;
46✔
980
                    bool has_writes = dfg.in_degree(*access) > 0;
12✔
981
                    for (auto& memlet : dfg.out_edges(*access)) {
17✔
982
                        if (memlet.type() != data_flow::MemletType::Computational) continue;
17✔
983
                        memlet.set_subset(rebase_to_buffer(memlet.subset()));
17✔
984
                        memlet.set_base_type(buf_type);
17✔
985
                    }
17✔
986
                    if (!has_writes) {
12✔
987
                        access->data(buf_cur_name);
12✔
988
                    }
12✔
989
                }
12✔
990
            } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
17✔
991
                for (size_t i = 0; i < seq->size(); i++) {
24✔
992
                    rewrite_accesses(seq->at(i).first);
18✔
993
                }
18✔
994
            } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
6✔
995
                rewrite_accesses(loop->root());
1✔
996
            } else if (auto* if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
1✔
NEW
997
                for (size_t i = 0; i < if_else->size(); i++) {
×
NEW
998
                    rewrite_accesses(if_else->at(i).first);
×
NEW
999
                }
×
NEW
1000
            }
×
1001
        };
24✔
1002
        rewrite_accesses(new_first_inner.root());
5✔
1003

1004
        // ==================================================================
1005
        // 3.5d: Swap INSIDE the fused For, AFTER K2
1006
        // buf_cur = buf_pf
1007
        // ==================================================================
1008
        generate_copy_loops(fused_for.root(), nullptr, symbolic::integer(0), 2);
5✔
1009
    }
5✔
1010

1011
    // Step 4: Remove the original two tile Map nests
1012
    // After we've moved the children out, remove the old maps from the parent
1013
    // The fused_for was inserted before first_map_, so first_map_ is now at first_index + 1
1014
    // and second_map_ is at first_index + 2
1015
    // Remove second first (higher index) to avoid invalidating first's index
1016
    size_t current_first_index = parent->index(first_map_);
10✔
1017
    size_t current_second_index = parent->index(second_map_);
10✔
1018
    builder.remove_child(*parent, current_second_index);
10✔
1019
    builder.remove_child(*parent, current_first_index);
10✔
1020

1021
    analysis_manager.invalidate_all();
10✔
1022
    applied_ = true;
10✔
1023
    fused_loop_ = &fused_for;
10✔
1024
}
10✔
1025

1026
void TileFusion::to_json(nlohmann::json& j) const {
6✔
1027
    j["transformation_type"] = this->name();
6✔
1028
    j["subgraph"] = {
6✔
1029
        {"0", {{"element_id", first_map_.element_id()}, {"type", "map"}}},
6✔
1030
        {"1", {{"element_id", second_map_.element_id()}, {"type", "map"}}}
6✔
1031
    };
6✔
1032
    j["parameters"] = {{"radius", radius_}};
6✔
1033
}
6✔
1034

1035
TileFusion TileFusion::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
1036
    auto first_map_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
1037
    auto second_map_id = desc["subgraph"]["1"]["element_id"].get<size_t>();
1✔
1038

1039
    auto first_element = builder.find_element_by_id(first_map_id);
1✔
1040
    auto second_element = builder.find_element_by_id(second_map_id);
1✔
1041

1042
    if (first_element == nullptr) {
1✔
1043
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(first_map_id) + " not found.");
×
1044
    }
×
1045
    if (second_element == nullptr) {
1✔
1046
        throw InvalidTransformationDescriptionException(
×
1047
            "Element with ID " + std::to_string(second_map_id) + " not found."
×
1048
        );
×
1049
    }
×
1050

1051
    auto* first_map = dynamic_cast<structured_control_flow::Map*>(first_element);
1✔
1052
    auto* second_map = dynamic_cast<structured_control_flow::Map*>(second_element);
1✔
1053

1054
    if (first_map == nullptr) {
1✔
1055
        throw InvalidTransformationDescriptionException(
×
1056
            "Element with ID " + std::to_string(first_map_id) + " is not a Map."
×
1057
        );
×
1058
    }
×
1059
    if (second_map == nullptr) {
1✔
1060
        throw InvalidTransformationDescriptionException(
×
1061
            "Element with ID " + std::to_string(second_map_id) + " is not a Map."
×
1062
        );
×
1063
    }
×
1064

1065
    return TileFusion(*first_map, *second_map);
1✔
1066
}
1✔
1067

1068
structured_control_flow::StructuredLoop* TileFusion::fused_loop() const {
×
1069
    if (!applied_) {
×
1070
        throw InvalidTransformationException("Accessing fused loop before apply.");
×
1071
    }
×
1072
    return fused_loop_;
×
1073
}
×
1074

1075
int TileFusion::radius() const { return radius_; }
4✔
1076

1077
} // namespace transformations
1078
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc