• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 25316236512

04 May 2026 11:24AM UTC coverage: 64.448% (+0.1%) from 64.317%
25316236512

push

github

web-flow
Merge pull request #699 from daisytuner/memory-tile-groups

Extends memory layout analysis to support groups of memlets

306 of 355 new or added lines in 7 files covered. (86.2%)

2 existing lines in 1 file now uncovered.

31949 of 49573 relevant lines covered (64.45%)

2301.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.52
/sdfg/src/analysis/memory_layout_analysis.cpp
1
#include "sdfg/analysis/memory_layout_analysis.h"
2

3
#include <algorithm>
4
#include <optional>
5
#include <set>
6
#include <unordered_set>
7

8
#include "sdfg/analysis/assumptions_analysis.h"
9
#include "sdfg/data_flow/access_node.h"
10
#include "sdfg/structured_control_flow/block.h"
11
#include "sdfg/structured_control_flow/if_else.h"
12
#include "sdfg/structured_control_flow/sequence.h"
13
#include "sdfg/structured_control_flow/structured_loop.h"
14
#include "sdfg/structured_control_flow/while.h"
15
#include "sdfg/symbolic/delinearization.h"
16
#include "sdfg/symbolic/extreme_values.h"
17
#include "sdfg/symbolic/polynomials.h"
18

19
namespace sdfg {
20
namespace analysis {
21

22
namespace {
23
// Collect StructuredLoop nodes that are direct children of the given node,
24
// stopping at loop boundaries (does not recurse into nested loops).
25
void collect_direct_child_loops(
26
    structured_control_flow::ControlFlowNode& node, std::set<const structured_control_flow::StructuredLoop*>& result
27
) {
481✔
28
    if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
481✔
29
        result.insert(loop);
144✔
30
        return;
144✔
31
    }
144✔
32
    if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
337✔
33
        for (size_t i = 0; i < seq->size(); i++) {
481✔
34
            collect_direct_child_loops(seq->at(i).first, result);
270✔
35
        }
270✔
36
    } else if (auto* ife = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
211✔
37
        for (size_t i = 0; i < ife->size(); i++) {
12✔
38
            collect_direct_child_loops(ife->at(i).first, result);
6✔
39
        }
6✔
40
    } else if (auto* w = dynamic_cast<structured_control_flow::While*>(&node)) {
120✔
41
        collect_direct_child_loops(w->root(), result);
×
42
    }
×
43
}
337✔
44
} // namespace
45

46
MemoryLayoutAnalysis::MemoryLayoutAnalysis(StructuredSDFG& sdfg) : Analysis(sdfg) {}
60✔
47

48
void MemoryLayoutAnalysis::run(analysis::AnalysisManager& analysis_manager) {
60✔
49
    accesses_.clear();
60✔
50
    tiles_.clear();
60✔
51
    tile_groups_.clear();
60✔
52
    traverse(sdfg_.root(), analysis_manager);
60✔
53
}
60✔
54

55
void MemoryLayoutAnalysis::
56
    traverse(structured_control_flow::ControlFlowNode& node, analysis::AnalysisManager& analysis_manager) {
602✔
57
    if (auto block = dynamic_cast<structured_control_flow::Block*>(&node)) {
602✔
58
        process_block(*block, analysis_manager);
120✔
59
    } else if (auto sequence = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
482✔
60
        for (size_t i = 0; i < sequence->size(); i++) {
602✔
61
            traverse(sequence->at(i).first, analysis_manager);
331✔
62
        }
331✔
63
    } else if (auto if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
271✔
64
        for (size_t i = 0; i < if_else->size(); i++) {
12✔
65
            traverse(if_else->at(i).first, analysis_manager);
6✔
66
        }
6✔
67
    } else if (auto while_stmt = dynamic_cast<structured_control_flow::While*>(&node)) {
205✔
68
        traverse(while_stmt->root(), analysis_manager);
×
69
    } else if (auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
205✔
70
        // Snapshot current memlets before traversing loop body
71
        std::vector<const data_flow::Memlet*> memlets_before;
205✔
72
        memlets_before.reserve(accesses_.size());
205✔
73
        for (const auto& entry : accesses_) {
205✔
74
            memlets_before.push_back(entry.first);
89✔
75
        }
89✔
76

77
        // Snapshot tile keys before traversal
78
        std::set<std::pair<const structured_control_flow::StructuredLoop*, std::string>> tiles_before;
205✔
79
        for (const auto& entry : tiles_) {
205✔
80
            tiles_before.insert(entry.first);
110✔
81
        }
110✔
82

83
        traverse(loop->root(), analysis_manager);
205✔
84

85
        // Merge layouts for containers accessed within this loop
86
        merge_loop_layouts(*loop, memlets_before, tiles_before, analysis_manager);
205✔
87
    }
205✔
88
    // Break, Continue, Return nodes don't contain blocks
89
}
602✔
90

91
void MemoryLayoutAnalysis::
92
    process_block(structured_control_flow::Block& block, analysis::AnalysisManager& analysis_manager) {
120✔
93
    auto& assumptions_analysis = analysis_manager.get<AssumptionsAnalysis>();
120✔
94
    auto& assumptions = assumptions_analysis.get(block);
120✔
95

96
    auto& dfg = block.dataflow();
120✔
97
    for (auto& memlet : dfg.edges()) {
322✔
98
        const auto& subset = memlet.subset();
322✔
99
        if (subset.empty()) {
322✔
100
            continue;
99✔
101
        }
99✔
102

103
        // Get container name from the AccessNode (either src or dst)
104
        std::string container_name;
223✔
105
        if (auto* access = dynamic_cast<const data_flow::AccessNode*>(&memlet.src())) {
223✔
106
            container_name = access->data();
153✔
107
        } else if (auto* access = dynamic_cast<const data_flow::AccessNode*>(&memlet.dst())) {
153✔
108
            container_name = access->data();
70✔
109
        } else {
70✔
110
            continue; // Skip memlets without AccessNode
×
111
        }
×
112

113
        auto& base_type = memlet.base_type();
223✔
114
        switch (base_type.type_id()) {
223✔
115
            case types::TypeID::Scalar:
×
116
            case types::TypeID::Structure:
×
117
                continue; // Skip scalars and structures
×
118
            case types::TypeID::Tensor: {
×
119
                // Tensor types already contain layout information, so we can directly store it without delinearization
120
                auto& tensor_type = dynamic_cast<const types::Tensor&>(memlet.base_type());
×
121

122
                MemoryLayout layout(tensor_type.shape(), tensor_type.strides(), tensor_type.offset());
×
123
                MemoryAccess layout_info{container_name, subset, layout, true};
×
124
                this->accesses_.emplace(&memlet, layout_info);
×
125
                continue;
×
126
            }
×
127
            case types::TypeID::Array: {
64✔
128
                // Arrays are c-like stack array, so we can infer a simple row-major layout without needing
129
                // delinearization
130
                auto* array_type = dynamic_cast<const types::Array*>(&memlet.base_type());
64✔
131
                symbolic::MultiExpression shape = {array_type->num_elements()};
64✔
132
                while (array_type->element_type().type_id() == types::TypeID::Array) {
68✔
133
                    array_type = dynamic_cast<const types::Array*>(&array_type->element_type());
4✔
134
                }
4✔
135
                if (array_type->element_type().type_id() != types::TypeID::Scalar) {
64✔
136
                    continue; // Skip non-scalar arrays
×
137
                }
×
138

139
                MemoryLayout layout(shape);
64✔
140
                MemoryAccess layout_info{container_name, subset, layout, true};
64✔
141
                this->accesses_.emplace(&memlet, layout_info);
64✔
142
                continue;
64✔
143
            }
64✔
144
            case types::TypeID::Pointer: {
159✔
145
                // For pointers, we attempt to delinearize the access pattern to infer the layout based
146
                // on assumptions from loop bounds
147
                auto* pointer_type = dynamic_cast<const types::Pointer*>(&memlet.base_type());
159✔
148
                if (pointer_type->pointee_type().type_id() != types::TypeID::Scalar) {
159✔
149
                    continue; // Skip non-scalar pointers
4✔
150
                }
4✔
151

152
                if (subset.size() != 1) {
155✔
153
                    continue; // Require full linearization
×
154
                }
×
155
                auto& linearized_expr = subset.at(0);
155✔
156

157
                auto result = symbolic::delinearize(linearized_expr, assumptions);
155✔
158
                if (!result.success) {
155✔
159
                    continue; // Delinearization failed, skip
4✔
160
                }
4✔
161

162
                // Delinearization returns N indices but only N-1 dimensions (from stride division)
163
                // The first dimension is unbounded - insert a placeholder that will be filled in by merge
164
                // Using a special symbol as placeholder for the first dimension
165
                symbolic::MultiExpression shape;
151✔
166
                shape.push_back(symbolic::symbol("__unbounded__"));
151✔
167
                for (const auto& dim : result.dimensions) {
151✔
168
                    shape.push_back(dim);
117✔
169
                }
117✔
170

171
                // Store symbolic indices and dimensions with unbounded first dimension
172
                // The merge phase will attempt to bound the first dimension using loop assumptions
173
                MemoryLayout layout(shape);
151✔
174
                MemoryAccess layout_info{container_name, result.indices, layout, false};
151✔
175
                this->accesses_.emplace(&memlet, layout_info);
151✔
176
                continue;
151✔
177
            }
155✔
178
            default:
×
179
                continue; // Skip unsupported types
×
180
        }
223✔
181
    }
223✔
182
}
120✔
183

184
const MemoryAccess* MemoryLayoutAnalysis::access(const data_flow::Memlet& memlet) const {
94✔
185
    auto layout_it = accesses_.find(&memlet);
94✔
186
    if (layout_it == accesses_.end()) {
94✔
187
        return nullptr;
×
188
    }
×
189
    return &layout_it->second;
94✔
190
}
94✔
191

192
void MemoryLayoutAnalysis::merge_loop_layouts(
193
    structured_control_flow::StructuredLoop& loop,
194
    const std::vector<const data_flow::Memlet*>& memlets_before,
195
    const std::set<std::pair<const structured_control_flow::StructuredLoop*, std::string>>& tiles_before,
196
    analysis::AnalysisManager& analysis_manager
197
) {
205✔
198
    // Convert memlets_before to a set for O(1) lookup
199
    std::unordered_set<const data_flow::Memlet*> before_set(memlets_before.begin(), memlets_before.end());
205✔
200

201
    // Group all new accesses by container
202
    std::unordered_map<std::string, std::vector<const data_flow::Memlet*>> all_container_groups;
205✔
203
    for (auto& [memlet_ptr, acc] : accesses_) {
776✔
204
        if (before_set.find(memlet_ptr) != before_set.end()) {
776✔
205
            continue;
89✔
206
        }
89✔
207
        all_container_groups[acc.container].push_back(memlet_ptr);
687✔
208
    }
687✔
209

210
    // Sort memlets within each container group by element_id for deterministic processing order
211
    for (auto& [container, memlets] : all_container_groups) {
391✔
212
        std::sort(memlets.begin(), memlets.end(), [](const data_flow::Memlet* a, const data_flow::Memlet* b) {
640✔
213
            return a->element_id() < b->element_id();
640✔
214
        });
640✔
215
    }
391✔
216

217
    auto& assumptions_analysis = analysis_manager.get<AssumptionsAnalysis>();
205✔
218
    auto& assumptions = assumptions_analysis.get(loop.root());
205✔
219
    // Start with SDFG-level parameters (read-only arguments like N, M)
220
    // then add any additional constant symbols from loop assumptions
221
    symbolic::SymbolSet parameters = assumptions_analysis.parameters();
205✔
222
    for (auto& entry : assumptions) {
813✔
223
        if (symbolic::eq(entry.first, loop.indvar())) {
813✔
224
            continue; // Skip induction variable itself
205✔
225
        }
205✔
226

227
        if (entry.second.constant()) {
608✔
228
            parameters.insert(entry.first);
608✔
229
        }
608✔
230
    }
608✔
231

232
    // Find direct child loops of this loop (not grandchildren)
233
    std::set<const structured_control_flow::StructuredLoop*> direct_child_loops;
205✔
234
    collect_direct_child_loops(loop.root(), direct_child_loops);
205✔
235

236
    for (auto& [container, memlets] : all_container_groups) {
391✔
237
        if (memlets.empty()) continue;
391✔
238

239
        // Find inner tiles from direct child loops only
240
        std::vector<const MemoryTile*> inner_tiles;
391✔
241
        for (auto& [key, tile] : tiles_) {
1,865✔
242
            if (tiles_before.count(key) > 0) continue;
1,865✔
243
            if (key.second != container) continue;
1,611✔
244
            if (direct_child_loops.count(key.first) == 0) continue;
569✔
245
            inner_tiles.push_back(&tile);
273✔
246
        }
273✔
247

248
        size_t ndims = 0;
391✔
249
        MemoryLayout reference_layout({symbolic::one()});
391✔
250
        // Separate min/max index lists to avoid unnecessary symbolic min/max
251
        std::vector<std::vector<symbolic::Expression>> min_indices;
391✔
252
        std::vector<std::vector<symbolic::Expression>> max_indices;
391✔
253

254
        if (!inner_tiles.empty()) {
391✔
255
            // Use inner tile min/max as representative values
256
            // Inner tiles have already resolved inner loop variables to their bounds
257
            ndims = inner_tiles[0]->min_subset.size();
255✔
258
            reference_layout = inner_tiles[0]->layout;
255✔
259
            min_indices.resize(ndims);
255✔
260
            max_indices.resize(ndims);
255✔
261

262
            for (const auto* tile : inner_tiles) {
273✔
263
                if (tile->min_subset.size() != ndims) continue;
273✔
264
                for (size_t d = 0; d < ndims; ++d) {
706✔
265
                    min_indices[d].push_back(tile->min_subset[d]);
433✔
266
                    max_indices[d].push_back(tile->max_subset[d]);
433✔
267
                }
433✔
268
            }
273✔
269

270
            // Propagate tile groups from child loops upward using the same
271
            // base-partitioning logic: group inner groups by their min_subset
272
            // base at this loop level, then merge each partition.
273
            std::vector<const MemoryTileGroup*> inner_groups;
255✔
274
            for (auto& [key, groups] : tile_groups_) {
1,702✔
275
                if (tiles_before.count({key.first, key.second}) > 0) continue;
1,702✔
276
                if (key.second != container) continue;
1,538✔
277
                if (direct_child_loops.count(key.first) == 0) continue;
569✔
278
                for (const auto& g : groups) {
284✔
279
                    inner_groups.push_back(&g);
284✔
280
                }
284✔
281
            }
273✔
282

283
            if (!inner_groups.empty()) {
255✔
284
                // Group inner groups by their base at this level
285
                struct OuterGroupEntry {
255✔
286
                    data_flow::Subset base;
255✔
287
                    std::vector<const MemoryTileGroup*> constituents;
255✔
288
                };
255✔
289
                std::vector<OuterGroupEntry> outer_partitions;
255✔
290

291
                for (const auto* ig : inner_groups) {
284✔
292
                    if (ig->tile.min_subset.size() != ndims) continue;
284✔
293

294
                    // Compute base: minimum of the inner group's min_subset per dim
295
                    data_flow::Subset base;
284✔
296
                    bool base_ok = true;
284✔
297
                    for (size_t d = 0; d < ndims; ++d) {
739✔
298
                        auto lb = symbolic::minimum(ig->tile.min_subset[d], parameters, assumptions, true);
455✔
299
                        if (lb.is_null()) {
455✔
NEW
300
                            lb = symbolic::minimum(ig->tile.min_subset[d], parameters, assumptions, false);
×
NEW
301
                        }
×
302
                        if (lb.is_null()) {
455✔
NEW
303
                            base_ok = false;
×
NEW
304
                            break;
×
NEW
305
                        }
×
306
                        base.push_back(symbolic::simplify(lb));
455✔
307
                    }
455✔
308
                    if (!base_ok) continue;
284✔
309

310
                    // Find matching partition (same base OR constant-offset base)
311
                    bool found = false;
284✔
312
                    for (auto& op : outer_partitions) {
284✔
313
                        bool const_diff = true;
29✔
314
                        for (size_t d = 0; d < ndims; ++d) {
58✔
315
                            auto diff = symbolic::simplify(symbolic::sub(base[d], op.base[d]));
36✔
316
                            if (!SymEngine::is_a<SymEngine::Integer>(*diff)) {
36✔
317
                                const_diff = false;
7✔
318
                                break;
7✔
319
                            }
7✔
320
                        }
36✔
321
                        if (const_diff) {
29✔
322
                            op.constituents.push_back(ig);
22✔
323
                            found = true;
22✔
324
                            break;
22✔
325
                        }
22✔
326
                    }
29✔
327
                    if (!found) {
284✔
328
                        outer_partitions.push_back({base, {ig}});
262✔
329
                    }
262✔
330
                }
284✔
331

332
                // For each partition, merge constituent tile bounds and collect memlets
333
                std::vector<MemoryTileGroup> result_groups;
255✔
334
                for (auto& op : outer_partitions) {
262✔
335
                    data_flow::Subset grp_min, grp_max;
262✔
336
                    bool grp_bounded = true;
262✔
337

338
                    for (size_t d = 0; d < ndims; ++d) {
688✔
339
                        symbolic::Expression d_min = SymEngine::null;
426✔
340
                        symbolic::Expression d_max = SymEngine::null;
426✔
341

342
                        for (const auto* c : op.constituents) {
455✔
343
                            // min from min_subset
344
                            auto lb = symbolic::minimum(c->tile.min_subset[d], parameters, assumptions, true);
455✔
345
                            if (lb.is_null())
455✔
NEW
346
                                lb = symbolic::minimum(c->tile.min_subset[d], parameters, assumptions, false);
×
347
                            if (lb.is_null()) {
455✔
NEW
348
                                grp_bounded = false;
×
NEW
349
                                break;
×
NEW
350
                            }
×
351
                            d_min = d_min.is_null() ? lb : symbolic::min(d_min, lb);
455✔
352

353
                            // max from max_subset
354
                            auto ub = symbolic::maximum(c->tile.max_subset[d], parameters, assumptions, true);
455✔
355
                            if (ub.is_null())
455✔
NEW
356
                                ub = symbolic::maximum(c->tile.max_subset[d], parameters, assumptions, false);
×
357
                            if (ub.is_null()) {
455✔
NEW
358
                                grp_bounded = false;
×
NEW
359
                                break;
×
NEW
360
                            }
×
361
                            d_max = d_max.is_null() ? ub : symbolic::max(d_max, ub);
455✔
362
                        }
455✔
363
                        if (!grp_bounded) break;
426✔
364
                        grp_min.push_back(symbolic::simplify(d_min));
426✔
365
                        grp_max.push_back(symbolic::simplify(d_max));
426✔
366
                    }
426✔
367
                    if (!grp_bounded) continue;
262✔
368

369
                    // Collect all memlets from constituent groups
370
                    std::vector<const data_flow::Memlet*> grp_memlets;
262✔
371
                    for (const auto* c : op.constituents) {
284✔
372
                        grp_memlets.insert(grp_memlets.end(), c->memlets.begin(), c->memlets.end());
284✔
373
                    }
284✔
374

375
                    MemoryTile grp_tile{container, grp_min, grp_max, reference_layout, true};
262✔
376
                    result_groups.push_back({grp_tile, std::move(grp_memlets)});
262✔
377
                }
262✔
378

379
                if (!result_groups.empty()) {
255✔
380
                    tile_groups_.insert({{&loop, container}, std::move(result_groups)});
255✔
381
                }
255✔
382
            }
255✔
383
        } else {
255✔
384
            // Use raw access indices (no inner tiles available)
385
            auto& first_access = accesses_.at(memlets[0]);
136✔
386
            auto& reference_shape = first_access.layout.shape();
136✔
387
            ndims = reference_shape.size();
136✔
388
            reference_layout = first_access.layout;
136✔
389
            min_indices.resize(ndims);
136✔
390
            max_indices.resize(ndims);
136✔
391

392
            bool consistent = true;
136✔
393
            for (const auto* memlet_ptr : memlets) {
217✔
394
                auto& acc = accesses_.at(memlet_ptr);
217✔
395
                auto& shape = acc.layout.shape();
217✔
396

397
                if (shape.size() != ndims) {
217✔
398
                    consistent = false;
×
399
                    break;
×
400
                }
×
401
                // Check inner dimensions match (all except first which may be unbounded)
402
                for (size_t d = 1; d < ndims; ++d) {
334✔
403
                    if (!symbolic::eq(shape[d], reference_shape[d])) {
117✔
404
                        consistent = false;
×
405
                        break;
×
406
                    }
×
407
                }
117✔
408
                if (!consistent) break;
217✔
409

410
                // Collect indices for each dimension
411
                if (acc.subset.size() != ndims) {
217✔
412
                    consistent = false;
8✔
413
                    break;
8✔
414
                }
8✔
415
                for (size_t d = 0; d < ndims; ++d) {
535✔
416
                    min_indices[d].push_back(acc.subset[d]);
326✔
417
                    max_indices[d].push_back(acc.subset[d]);
326✔
418
                }
326✔
419
            }
209✔
420

421
            if (!consistent) continue;
136✔
422

423
            // Compute tile groups for raw memlets
424
            compute_tile_groups(loop, container, memlets, reference_layout, ndims, parameters, assumptions);
128✔
425
        }
128✔
426

427
        if (ndims == 0) continue;
383✔
428

429
        // Compute min/max bounds for each dimension
430
        data_flow::Subset min_subset;
383✔
431
        data_flow::Subset max_subset;
383✔
432
        bool all_bounded = true;
383✔
433

434
        for (size_t d = 0; d < ndims; ++d) {
986✔
435
            symbolic::Expression dim_min = SymEngine::null;
603✔
436
            symbolic::Expression dim_max = SymEngine::null;
603✔
437

438
            // Compute dim_min from min_indices
439
            for (const auto& idx : min_indices[d]) {
759✔
440
                auto lb = symbolic::minimum(idx, parameters, assumptions, true);
759✔
441
                if (lb.is_null()) {
759✔
442
                    lb = symbolic::minimum(idx, parameters, assumptions, false);
6✔
443
                }
6✔
444
                if (lb.is_null()) {
759✔
445
                    all_bounded = false;
×
446
                    break;
×
447
                }
×
448
                if (dim_min.is_null()) {
759✔
449
                    dim_min = lb;
603✔
450
                } else {
603✔
451
                    dim_min = symbolic::min(dim_min, lb);
156✔
452
                }
156✔
453
            }
759✔
454
            if (!all_bounded) break;
603✔
455

456
            // Compute dim_max from max_indices
457
            for (const auto& idx : max_indices[d]) {
759✔
458
                auto ub = symbolic::maximum(idx, parameters, assumptions, true);
759✔
459
                if (ub.is_null()) {
759✔
460
                    ub = symbolic::maximum(idx, parameters, assumptions, false);
×
461
                }
×
462
                if (ub.is_null()) {
759✔
463
                    all_bounded = false;
×
464
                    break;
×
465
                }
×
466
                if (dim_max.is_null()) {
759✔
467
                    dim_max = ub;
603✔
468
                } else {
603✔
469
                    dim_max = symbolic::max(dim_max, ub);
156✔
470
                }
156✔
471
            }
759✔
472
            if (!all_bounded) break;
603✔
473

474
            min_subset.push_back(symbolic::simplify(dim_min));
603✔
475
            max_subset.push_back(symbolic::simplify(dim_max));
603✔
476
        }
603✔
477

478
        if (!all_bounded) continue;
383✔
479

480
        // Store this loop's tile with the original memory layout
481
        MemoryTile merged_tile{container, min_subset, max_subset, reference_layout, true};
383✔
482
        tiles_.insert({{&loop, container}, merged_tile});
383✔
483
    }
383✔
484
}
205✔
485

486
const MemoryTile* MemoryLayoutAnalysis::
487
    tile(const structured_control_flow::StructuredLoop& loop, const std::string& container) const {
26✔
488
    auto key = std::make_pair(&loop, container);
26✔
489
    auto it = tiles_.find(key);
26✔
490
    if (it == tiles_.end()) {
26✔
UNCOV
491
        return nullptr;
×
UNCOV
492
    }
×
493
    return &it->second;
26✔
494
}
26✔
495

496
void MemoryLayoutAnalysis::compute_tile_groups(
497
    structured_control_flow::StructuredLoop& loop,
498
    const std::string& container,
499
    const std::vector<const data_flow::Memlet*>& memlets,
500
    const MemoryLayout& reference_layout,
501
    size_t ndims,
502
    const symbolic::SymbolSet& parameters,
503
    const symbolic::Assumptions& assumptions
504
) {
128✔
505
    // For each memlet, compute per-dimension base (minimum of index expression)
506
    // Group memlets whose bases are symbolically equal in all dimensions
507
    struct GroupEntry {
128✔
508
        data_flow::Subset base; // per-dim minimum
128✔
509
        std::vector<const data_flow::Memlet*> group_memlets;
128✔
510
    };
128✔
511

512
    std::vector<GroupEntry> groups;
128✔
513

514
    for (const auto* memlet_ptr : memlets) {
209✔
515
        auto& acc = accesses_.at(memlet_ptr);
209✔
516
        if (acc.subset.size() != ndims) continue;
209✔
517

518
        // Compute per-dimension base (minimum)
519
        data_flow::Subset base;
209✔
520
        bool base_ok = true;
209✔
521
        for (size_t d = 0; d < ndims; ++d) {
535✔
522
            auto lb = symbolic::minimum(acc.subset[d], parameters, assumptions, true);
326✔
523
            if (lb.is_null()) {
326✔
524
                lb = symbolic::minimum(acc.subset[d], parameters, assumptions, false);
6✔
525
            }
6✔
526
            if (lb.is_null()) {
326✔
NEW
527
                base_ok = false;
×
NEW
528
                break;
×
NEW
529
            }
×
530
            base.push_back(symbolic::simplify(lb));
326✔
531
        }
326✔
532
        if (!base_ok) continue;
209✔
533

534
        // Find existing group with same base
535
        bool found = false;
209✔
536
        for (auto& group : groups) {
209✔
537
            if (group.base.size() != ndims) continue;
116✔
538
            bool match = true;
116✔
539
            for (size_t d = 0; d < ndims; ++d) {
197✔
540
                if (!symbolic::eq(group.base[d], base[d])) {
160✔
541
                    match = false;
79✔
542
                    break;
79✔
543
                }
79✔
544
            }
160✔
545
            if (match) {
116✔
546
                group.group_memlets.push_back(memlet_ptr);
37✔
547
                found = true;
37✔
548
                break;
37✔
549
            }
37✔
550
        }
116✔
551
        if (!found) {
209✔
552
            groups.push_back({base, {memlet_ptr}});
172✔
553
        }
172✔
554
    }
209✔
555

556
    if (groups.empty()) return;
128✔
557

558
    // Merge groups whose bases differ only by integer constants.
559
    // E.g. stencil bases [i-1, j], [i, j], [i+1, j] should merge (constant offsets in dim0).
560
    // But SYR2K bases [i, 0] vs [j, 0] should NOT merge (symbolic difference).
561
    std::vector<GroupEntry> merged_groups;
128✔
562
    for (auto& group : groups) {
172✔
563
        bool merged = false;
172✔
564
        for (auto& existing : merged_groups) {
172✔
565
            bool const_diff = true;
44✔
566
            for (size_t d = 0; d < ndims; ++d) {
102✔
567
                auto diff = symbolic::simplify(symbolic::sub(group.base[d], existing.base[d]));
62✔
568
                if (!SymEngine::is_a<SymEngine::Integer>(*diff)) {
62✔
569
                    const_diff = false;
4✔
570
                    break;
4✔
571
                }
4✔
572
            }
62✔
573
            if (const_diff) {
44✔
574
                existing.group_memlets
40✔
575
                    .insert(existing.group_memlets.end(), group.group_memlets.begin(), group.group_memlets.end());
40✔
576
                merged = true;
40✔
577
                break;
40✔
578
            }
40✔
579
        }
44✔
580
        if (!merged) {
172✔
581
            merged_groups.push_back(std::move(group));
132✔
582
        }
132✔
583
    }
172✔
584

585
    // Compute tile for each merged group
586
    std::vector<MemoryTileGroup> result_groups;
128✔
587
    for (auto& group : merged_groups) {
132✔
588
        std::vector<std::vector<symbolic::Expression>> min_indices(ndims);
132✔
589
        std::vector<std::vector<symbolic::Expression>> max_indices(ndims);
132✔
590

591
        for (const auto* memlet_ptr : group.group_memlets) {
209✔
592
            auto& acc = accesses_.at(memlet_ptr);
209✔
593
            for (size_t d = 0; d < ndims; ++d) {
535✔
594
                min_indices[d].push_back(acc.subset[d]);
326✔
595
                max_indices[d].push_back(acc.subset[d]);
326✔
596
            }
326✔
597
        }
209✔
598

599
        data_flow::Subset min_subset;
132✔
600
        data_flow::Subset max_subset;
132✔
601
        bool all_bounded = true;
132✔
602

603
        for (size_t d = 0; d < ndims; ++d) {
331✔
604
            symbolic::Expression dim_min = SymEngine::null;
199✔
605
            symbolic::Expression dim_max = SymEngine::null;
199✔
606

607
            for (const auto& idx : min_indices[d]) {
326✔
608
                auto lb = symbolic::minimum(idx, parameters, assumptions, true);
326✔
609
                if (lb.is_null()) {
326✔
610
                    lb = symbolic::minimum(idx, parameters, assumptions, false);
6✔
611
                }
6✔
612
                if (lb.is_null()) {
326✔
NEW
613
                    all_bounded = false;
×
NEW
614
                    break;
×
NEW
615
                }
×
616
                if (dim_min.is_null()) {
326✔
617
                    dim_min = lb;
199✔
618
                } else {
199✔
619
                    dim_min = symbolic::min(dim_min, lb);
127✔
620
                }
127✔
621
            }
326✔
622
            if (!all_bounded) break;
199✔
623

624
            for (const auto& idx : max_indices[d]) {
326✔
625
                auto ub = symbolic::maximum(idx, parameters, assumptions, true);
326✔
626
                if (ub.is_null()) {
326✔
NEW
627
                    ub = symbolic::maximum(idx, parameters, assumptions, false);
×
NEW
628
                }
×
629
                if (ub.is_null()) {
326✔
NEW
630
                    all_bounded = false;
×
NEW
631
                    break;
×
NEW
632
                }
×
633
                if (dim_max.is_null()) {
326✔
634
                    dim_max = ub;
199✔
635
                } else {
199✔
636
                    dim_max = symbolic::max(dim_max, ub);
127✔
637
                }
127✔
638
            }
326✔
639
            if (!all_bounded) break;
199✔
640

641
            min_subset.push_back(symbolic::simplify(dim_min));
199✔
642
            max_subset.push_back(symbolic::simplify(dim_max));
199✔
643
        }
199✔
644

645
        if (!all_bounded) continue;
132✔
646

647
        MemoryTile tile{container, min_subset, max_subset, reference_layout, true};
132✔
648
        result_groups.push_back({tile, group.group_memlets});
132✔
649
    }
132✔
650

651
    if (!result_groups.empty()) {
128✔
652
        tile_groups_.insert({{&loop, container}, std::move(result_groups)});
128✔
653
    }
128✔
654
}
128✔
655

656
const std::vector<MemoryTileGroup>* MemoryLayoutAnalysis::
657
    tile_groups(const structured_control_flow::StructuredLoop& loop, const std::string& container) const {
4✔
658
    auto key = std::make_pair(&loop, container);
4✔
659
    auto it = tile_groups_.find(key);
4✔
660
    if (it == tile_groups_.end()) {
4✔
NEW
661
        return nullptr;
×
NEW
662
    }
×
663
    return &it->second;
4✔
664
}
4✔
665

666
const MemoryTileGroup* MemoryLayoutAnalysis::
667
    tile_group_for(const structured_control_flow::StructuredLoop& loop, const data_flow::Memlet& memlet) const {
58✔
668
    // Find which container this memlet accesses
669
    auto acc_it = accesses_.find(&memlet);
58✔
670
    if (acc_it == accesses_.end()) {
58✔
671
        return nullptr;
1✔
672
    }
1✔
673
    auto& container = acc_it->second.container;
57✔
674

675
    auto key = std::make_pair(&loop, container);
57✔
676
    auto groups_it = tile_groups_.find(key);
57✔
677
    if (groups_it == tile_groups_.end()) {
57✔
NEW
678
        return nullptr;
×
NEW
679
    }
×
680

681
    for (const auto& group : groups_it->second) {
59✔
682
        for (const auto* m : group.memlets) {
67✔
683
            if (m == &memlet) {
67✔
684
                return &group;
57✔
685
            }
57✔
686
        }
67✔
687
    }
59✔
NEW
688
    return nullptr;
×
689
}
57✔
690

691
symbolic::MultiExpression MemoryTile::extents() const {
25✔
692
    symbolic::MultiExpression result;
25✔
693
    for (size_t d = 0; d < min_subset.size(); ++d) {
81✔
694
        result.push_back(symbolic::simplify(
56✔
695
            symbolic::expand(symbolic::add(symbolic::sub(max_subset[d], min_subset[d]), symbolic::one()))
56✔
696
        ));
56✔
697
    }
56✔
698
    return result;
25✔
699
}
25✔
700

701
symbolic::MultiExpression MemoryTile::extents_approx() const {
52✔
702
    symbolic::MultiExpression result;
52✔
703
    for (size_t d = 0; d < min_subset.size(); ++d) {
139✔
704
        result.push_back(symbolic::simplify(symbolic::expand(
87✔
705
            symbolic::overapproximate(symbolic::add(symbolic::sub(max_subset[d], min_subset[d]), symbolic::one()))
87✔
706
        )));
87✔
707
    }
87✔
708
    return result;
52✔
709
}
52✔
710

711
std::pair<symbolic::Expression, symbolic::Expression> MemoryTile::contiguous_range() const {
20✔
712
    auto& strides = layout.strides();
20✔
713
    auto first = layout.offset();
20✔
714
    auto last = layout.offset();
20✔
715
    for (size_t d = 0; d < min_subset.size(); ++d) {
66✔
716
        first = symbolic::add(first, symbolic::mul(strides[d], min_subset[d]));
46✔
717
        last = symbolic::add(last, symbolic::mul(strides[d], max_subset[d]));
46✔
718
    }
46✔
719
    return {symbolic::simplify(symbolic::expand(first)), symbolic::simplify(symbolic::expand(last))};
20✔
720
}
20✔
721

722
} // namespace analysis
723
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc