• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 25310443138

04 May 2026 09:03AM UTC coverage: 64.445%. First build
25310443138

Pull #699

github

web-flow
Merge 1fc263bae into eedda3adc
Pull Request #699: Extends memory layout analysis to support groups of memlets

291 of 338 new or added lines in 4 files covered. (86.09%)

31940 of 49562 relevant lines covered (64.44%)

2310.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.41
/sdfg/src/analysis/memory_layout_analysis.cpp
1
#include "sdfg/analysis/memory_layout_analysis.h"
2

3
#include <optional>
4
#include <set>
5
#include <unordered_set>
6

7
#include "sdfg/analysis/assumptions_analysis.h"
8
#include "sdfg/data_flow/access_node.h"
9
#include "sdfg/structured_control_flow/block.h"
10
#include "sdfg/structured_control_flow/if_else.h"
11
#include "sdfg/structured_control_flow/sequence.h"
12
#include "sdfg/structured_control_flow/structured_loop.h"
13
#include "sdfg/structured_control_flow/while.h"
14
#include "sdfg/symbolic/delinearization.h"
15
#include "sdfg/symbolic/extreme_values.h"
16
#include "sdfg/symbolic/polynomials.h"
17

18
namespace sdfg {
19
namespace analysis {
20

21
namespace {
22
// Collect StructuredLoop nodes that are direct children of the given node,
23
// stopping at loop boundaries (does not recurse into nested loops).
24
void collect_direct_child_loops(
25
    structured_control_flow::ControlFlowNode& node, std::set<const structured_control_flow::StructuredLoop*>& result
26
) {
481✔
27
    if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
481✔
28
        result.insert(loop);
144✔
29
        return;
144✔
30
    }
144✔
31
    if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
337✔
32
        for (size_t i = 0; i < seq->size(); i++) {
481✔
33
            collect_direct_child_loops(seq->at(i).first, result);
270✔
34
        }
270✔
35
    } else if (auto* ife = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
211✔
36
        for (size_t i = 0; i < ife->size(); i++) {
12✔
37
            collect_direct_child_loops(ife->at(i).first, result);
6✔
38
        }
6✔
39
    } else if (auto* w = dynamic_cast<structured_control_flow::While*>(&node)) {
120✔
40
        collect_direct_child_loops(w->root(), result);
×
41
    }
×
42
}
337✔
43
} // namespace
44

45
MemoryLayoutAnalysis::MemoryLayoutAnalysis(StructuredSDFG& sdfg) : Analysis(sdfg) {}
60✔
46

47
void MemoryLayoutAnalysis::run(analysis::AnalysisManager& analysis_manager) {
60✔
48
    accesses_.clear();
60✔
49
    tiles_.clear();
60✔
50
    tile_groups_.clear();
60✔
51
    traverse(sdfg_.root(), analysis_manager);
60✔
52
}
60✔
53

54
void MemoryLayoutAnalysis::
55
    traverse(structured_control_flow::ControlFlowNode& node, analysis::AnalysisManager& analysis_manager) {
602✔
56
    if (auto block = dynamic_cast<structured_control_flow::Block*>(&node)) {
602✔
57
        process_block(*block, analysis_manager);
120✔
58
    } else if (auto sequence = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
482✔
59
        for (size_t i = 0; i < sequence->size(); i++) {
602✔
60
            traverse(sequence->at(i).first, analysis_manager);
331✔
61
        }
331✔
62
    } else if (auto if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
271✔
63
        for (size_t i = 0; i < if_else->size(); i++) {
12✔
64
            traverse(if_else->at(i).first, analysis_manager);
6✔
65
        }
6✔
66
    } else if (auto while_stmt = dynamic_cast<structured_control_flow::While*>(&node)) {
205✔
67
        traverse(while_stmt->root(), analysis_manager);
×
68
    } else if (auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
205✔
69
        // Snapshot current memlets before traversing loop body
70
        std::vector<const data_flow::Memlet*> memlets_before;
205✔
71
        memlets_before.reserve(accesses_.size());
205✔
72
        for (const auto& entry : accesses_) {
205✔
73
            memlets_before.push_back(entry.first);
89✔
74
        }
89✔
75

76
        // Snapshot tile keys before traversal
77
        std::set<std::pair<const structured_control_flow::StructuredLoop*, std::string>> tiles_before;
205✔
78
        for (const auto& entry : tiles_) {
205✔
79
            tiles_before.insert(entry.first);
110✔
80
        }
110✔
81

82
        traverse(loop->root(), analysis_manager);
205✔
83

84
        // Merge layouts for containers accessed within this loop
85
        merge_loop_layouts(*loop, memlets_before, tiles_before, analysis_manager);
205✔
86
    }
205✔
87
    // Break, Continue, Return nodes don't contain blocks
88
}
602✔
89

90
void MemoryLayoutAnalysis::
91
    process_block(structured_control_flow::Block& block, analysis::AnalysisManager& analysis_manager) {
120✔
92
    auto& assumptions_analysis = analysis_manager.get<AssumptionsAnalysis>();
120✔
93
    auto& assumptions = assumptions_analysis.get(block);
120✔
94

95
    auto& dfg = block.dataflow();
120✔
96
    for (auto& memlet : dfg.edges()) {
322✔
97
        const auto& subset = memlet.subset();
322✔
98
        if (subset.empty()) {
322✔
99
            continue;
99✔
100
        }
99✔
101

102
        // Get container name from the AccessNode (either src or dst)
103
        std::string container_name;
223✔
104
        if (auto* access = dynamic_cast<const data_flow::AccessNode*>(&memlet.src())) {
223✔
105
            container_name = access->data();
153✔
106
        } else if (auto* access = dynamic_cast<const data_flow::AccessNode*>(&memlet.dst())) {
153✔
107
            container_name = access->data();
70✔
108
        } else {
70✔
109
            continue; // Skip memlets without AccessNode
×
110
        }
×
111

112
        auto& base_type = memlet.base_type();
223✔
113
        switch (base_type.type_id()) {
223✔
114
            case types::TypeID::Scalar:
×
115
            case types::TypeID::Structure:
×
116
                continue; // Skip scalars and structures
×
117
            case types::TypeID::Tensor: {
×
118
                // Tensor types already contain layout information, so we can directly store it without delinearization
119
                auto& tensor_type = dynamic_cast<const types::Tensor&>(memlet.base_type());
×
120

121
                MemoryLayout layout(tensor_type.shape(), tensor_type.strides(), tensor_type.offset());
×
122
                MemoryAccess layout_info{container_name, subset, layout, true};
×
123
                this->accesses_.emplace(&memlet, layout_info);
×
124
                continue;
×
125
            }
×
126
            case types::TypeID::Array: {
64✔
127
                // Arrays are c-like stack array, so we can infer a simple row-major layout without needing
128
                // delinearization
129
                auto* array_type = dynamic_cast<const types::Array*>(&memlet.base_type());
64✔
130
                symbolic::MultiExpression shape = {array_type->num_elements()};
64✔
131
                while (array_type->element_type().type_id() == types::TypeID::Array) {
68✔
132
                    array_type = dynamic_cast<const types::Array*>(&array_type->element_type());
4✔
133
                }
4✔
134
                if (array_type->element_type().type_id() != types::TypeID::Scalar) {
64✔
135
                    continue; // Skip non-scalar arrays
×
136
                }
×
137

138
                MemoryLayout layout(shape);
64✔
139
                MemoryAccess layout_info{container_name, subset, layout, true};
64✔
140
                this->accesses_.emplace(&memlet, layout_info);
64✔
141
                continue;
64✔
142
            }
64✔
143
            case types::TypeID::Pointer: {
159✔
144
                // For pointers, we attempt to delinearize the access pattern to infer the layout based
145
                // on assumptions from loop bounds
146
                auto* pointer_type = dynamic_cast<const types::Pointer*>(&memlet.base_type());
159✔
147
                if (pointer_type->pointee_type().type_id() != types::TypeID::Scalar) {
159✔
148
                    continue; // Skip non-scalar pointers
4✔
149
                }
4✔
150

151
                if (subset.size() != 1) {
155✔
152
                    continue; // Require full linearization
×
153
                }
×
154
                auto& linearized_expr = subset.at(0);
155✔
155

156
                auto result = symbolic::delinearize(linearized_expr, assumptions);
155✔
157
                if (!result.success) {
155✔
158
                    continue; // Delinearization failed, skip
4✔
159
                }
4✔
160

161
                // Delinearization returns N indices but only N-1 dimensions (from stride division)
162
                // The first dimension is unbounded - insert a placeholder that will be filled in by merge
163
                // Using a special symbol as placeholder for the first dimension
164
                symbolic::MultiExpression shape;
151✔
165
                shape.push_back(symbolic::symbol("__unbounded__"));
151✔
166
                for (const auto& dim : result.dimensions) {
151✔
167
                    shape.push_back(dim);
117✔
168
                }
117✔
169

170
                // Store symbolic indices and dimensions with unbounded first dimension
171
                // The merge phase will attempt to bound the first dimension using loop assumptions
172
                MemoryLayout layout(shape);
151✔
173
                MemoryAccess layout_info{container_name, result.indices, layout, false};
151✔
174
                this->accesses_.emplace(&memlet, layout_info);
151✔
175
                continue;
151✔
176
            }
155✔
177
            default:
×
178
                continue; // Skip unsupported types
×
179
        }
223✔
180
    }
223✔
181
}
120✔
182

183
const MemoryAccess* MemoryLayoutAnalysis::access(const data_flow::Memlet& memlet) const {
94✔
184
    auto layout_it = accesses_.find(&memlet);
94✔
185
    if (layout_it == accesses_.end()) {
94✔
186
        return nullptr;
×
187
    }
×
188
    return &layout_it->second;
94✔
189
}
94✔
190

191
void MemoryLayoutAnalysis::merge_loop_layouts(
192
    structured_control_flow::StructuredLoop& loop,
193
    const std::vector<const data_flow::Memlet*>& memlets_before,
194
    const std::set<std::pair<const structured_control_flow::StructuredLoop*, std::string>>& tiles_before,
195
    analysis::AnalysisManager& analysis_manager
196
) {
205✔
197
    // Convert memlets_before to a set for O(1) lookup
198
    std::unordered_set<const data_flow::Memlet*> before_set(memlets_before.begin(), memlets_before.end());
205✔
199

200
    // Group all new accesses by container
201
    std::unordered_map<std::string, std::vector<const data_flow::Memlet*>> all_container_groups;
205✔
202
    for (auto& [memlet_ptr, acc] : accesses_) {
776✔
203
        if (before_set.find(memlet_ptr) != before_set.end()) {
776✔
204
            continue;
89✔
205
        }
89✔
206
        all_container_groups[acc.container].push_back(memlet_ptr);
687✔
207
    }
687✔
208

209
    auto& assumptions_analysis = analysis_manager.get<AssumptionsAnalysis>();
205✔
210
    auto& assumptions = assumptions_analysis.get(loop.root());
205✔
211
    // Start with SDFG-level parameters (read-only arguments like N, M)
212
    // then add any additional constant symbols from loop assumptions
213
    symbolic::SymbolSet parameters = assumptions_analysis.parameters();
205✔
214
    for (auto& entry : assumptions) {
813✔
215
        if (symbolic::eq(entry.first, loop.indvar())) {
813✔
216
            continue; // Skip induction variable itself
205✔
217
        }
205✔
218

219
        if (entry.second.constant()) {
608✔
220
            parameters.insert(entry.first);
608✔
221
        }
608✔
222
    }
608✔
223

224
    // Find direct child loops of this loop (not grandchildren)
225
    std::set<const structured_control_flow::StructuredLoop*> direct_child_loops;
205✔
226
    collect_direct_child_loops(loop.root(), direct_child_loops);
205✔
227

228
    for (auto& [container, memlets] : all_container_groups) {
391✔
229
        if (memlets.empty()) continue;
391✔
230

231
        // Find inner tiles from direct child loops only
232
        std::vector<const MemoryTile*> inner_tiles;
391✔
233
        for (auto& [key, tile] : tiles_) {
1,865✔
234
            if (tiles_before.count(key) > 0) continue;
1,865✔
235
            if (key.second != container) continue;
1,611✔
236
            if (direct_child_loops.count(key.first) == 0) continue;
569✔
237
            inner_tiles.push_back(&tile);
273✔
238
        }
273✔
239

240
        size_t ndims = 0;
391✔
241
        MemoryLayout reference_layout({symbolic::one()});
391✔
242
        // Separate min/max index lists to avoid unnecessary symbolic min/max
243
        std::vector<std::vector<symbolic::Expression>> min_indices;
391✔
244
        std::vector<std::vector<symbolic::Expression>> max_indices;
391✔
245

246
        if (!inner_tiles.empty()) {
391✔
247
            // Use inner tile min/max as representative values
248
            // Inner tiles have already resolved inner loop variables to their bounds
249
            ndims = inner_tiles[0]->min_subset.size();
255✔
250
            reference_layout = inner_tiles[0]->layout;
255✔
251
            min_indices.resize(ndims);
255✔
252
            max_indices.resize(ndims);
255✔
253

254
            for (const auto* tile : inner_tiles) {
273✔
255
                if (tile->min_subset.size() != ndims) continue;
273✔
256
                for (size_t d = 0; d < ndims; ++d) {
706✔
257
                    min_indices[d].push_back(tile->min_subset[d]);
433✔
258
                    max_indices[d].push_back(tile->max_subset[d]);
433✔
259
                }
433✔
260
            }
273✔
261

262
            // Propagate tile groups from child loops upward using the same
263
            // base-partitioning logic: group inner groups by their min_subset
264
            // base at this loop level, then merge each partition.
265
            std::vector<const MemoryTileGroup*> inner_groups;
255✔
266
            for (auto& [key, groups] : tile_groups_) {
1,702✔
267
                if (tiles_before.count({key.first, key.second}) > 0) continue;
1,702✔
268
                if (key.second != container) continue;
1,538✔
269
                if (direct_child_loops.count(key.first) == 0) continue;
569✔
270
                for (const auto& g : groups) {
292✔
271
                    inner_groups.push_back(&g);
292✔
272
                }
292✔
273
            }
273✔
274

275
            if (!inner_groups.empty()) {
255✔
276
                // Group inner groups by their base at this level
277
                struct OuterGroupEntry {
255✔
278
                    data_flow::Subset base;
255✔
279
                    std::vector<const MemoryTileGroup*> constituents;
255✔
280
                };
255✔
281
                std::vector<OuterGroupEntry> outer_partitions;
255✔
282

283
                for (const auto* ig : inner_groups) {
292✔
284
                    if (ig->tile.min_subset.size() != ndims) continue;
292✔
285

286
                    // Compute base: minimum of the inner group's min_subset per dim
287
                    data_flow::Subset base;
292✔
288
                    bool base_ok = true;
292✔
289
                    for (size_t d = 0; d < ndims; ++d) {
755✔
290
                        auto lb = symbolic::minimum(ig->tile.min_subset[d], parameters, assumptions, true);
463✔
291
                        if (lb.is_null()) {
463✔
NEW
292
                            lb = symbolic::minimum(ig->tile.min_subset[d], parameters, assumptions, false);
×
NEW
293
                        }
×
294
                        if (lb.is_null()) {
463✔
NEW
295
                            base_ok = false;
×
NEW
296
                            break;
×
NEW
297
                        }
×
298
                        base.push_back(symbolic::simplify(lb));
463✔
299
                    }
463✔
300
                    if (!base_ok) continue;
292✔
301

302
                    // Find matching partition (same base OR constant-offset base)
303
                    bool found = false;
292✔
304
                    for (auto& op : outer_partitions) {
292✔
305
                        bool const_diff = true;
37✔
306
                        for (size_t d = 0; d < ndims; ++d) {
74✔
307
                            auto diff = symbolic::simplify(symbolic::sub(base[d], op.base[d]));
44✔
308
                            if (!SymEngine::is_a<SymEngine::Integer>(*diff)) {
44✔
309
                                const_diff = false;
7✔
310
                                break;
7✔
311
                            }
7✔
312
                        }
44✔
313
                        if (const_diff) {
37✔
314
                            op.constituents.push_back(ig);
30✔
315
                            found = true;
30✔
316
                            break;
30✔
317
                        }
30✔
318
                    }
37✔
319
                    if (!found) {
292✔
320
                        outer_partitions.push_back({base, {ig}});
262✔
321
                    }
262✔
322
                }
292✔
323

324
                // For each partition, merge constituent tile bounds and collect memlets
325
                std::vector<MemoryTileGroup> result_groups;
255✔
326
                for (auto& op : outer_partitions) {
262✔
327
                    data_flow::Subset grp_min, grp_max;
262✔
328
                    bool grp_bounded = true;
262✔
329

330
                    for (size_t d = 0; d < ndims; ++d) {
688✔
331
                        symbolic::Expression d_min = SymEngine::null;
426✔
332
                        symbolic::Expression d_max = SymEngine::null;
426✔
333

334
                        for (const auto* c : op.constituents) {
463✔
335
                            // min from min_subset
336
                            auto lb = symbolic::minimum(c->tile.min_subset[d], parameters, assumptions, true);
463✔
337
                            if (lb.is_null())
463✔
NEW
338
                                lb = symbolic::minimum(c->tile.min_subset[d], parameters, assumptions, false);
×
339
                            if (lb.is_null()) {
463✔
NEW
340
                                grp_bounded = false;
×
NEW
341
                                break;
×
NEW
342
                            }
×
343
                            d_min = d_min.is_null() ? lb : symbolic::min(d_min, lb);
463✔
344

345
                            // max from max_subset
346
                            auto ub = symbolic::maximum(c->tile.max_subset[d], parameters, assumptions, true);
463✔
347
                            if (ub.is_null())
463✔
NEW
348
                                ub = symbolic::maximum(c->tile.max_subset[d], parameters, assumptions, false);
×
349
                            if (ub.is_null()) {
463✔
NEW
350
                                grp_bounded = false;
×
NEW
351
                                break;
×
NEW
352
                            }
×
353
                            d_max = d_max.is_null() ? ub : symbolic::max(d_max, ub);
463✔
354
                        }
463✔
355
                        if (!grp_bounded) break;
426✔
356
                        grp_min.push_back(symbolic::simplify(d_min));
426✔
357
                        grp_max.push_back(symbolic::simplify(d_max));
426✔
358
                    }
426✔
359
                    if (!grp_bounded) continue;
262✔
360

361
                    // Collect all memlets from constituent groups
362
                    std::vector<const data_flow::Memlet*> grp_memlets;
262✔
363
                    for (const auto* c : op.constituents) {
292✔
364
                        grp_memlets.insert(grp_memlets.end(), c->memlets.begin(), c->memlets.end());
292✔
365
                    }
292✔
366

367
                    MemoryTile grp_tile{container, grp_min, grp_max, reference_layout, true};
262✔
368
                    result_groups.push_back({grp_tile, std::move(grp_memlets)});
262✔
369
                }
262✔
370

371
                if (!result_groups.empty()) {
255✔
372
                    tile_groups_.insert({{&loop, container}, std::move(result_groups)});
255✔
373
                }
255✔
374
            }
255✔
375
        } else {
255✔
376
            // Use raw access indices (no inner tiles available)
377
            auto& first_access = accesses_.at(memlets[0]);
136✔
378
            auto& reference_shape = first_access.layout.shape();
136✔
379
            ndims = reference_shape.size();
136✔
380
            reference_layout = first_access.layout;
136✔
381
            min_indices.resize(ndims);
136✔
382
            max_indices.resize(ndims);
136✔
383

384
            bool consistent = true;
136✔
385
            for (const auto* memlet_ptr : memlets) {
217✔
386
                auto& acc = accesses_.at(memlet_ptr);
217✔
387
                auto& shape = acc.layout.shape();
217✔
388

389
                if (shape.size() != ndims) {
217✔
390
                    consistent = false;
×
391
                    break;
×
392
                }
×
393
                // Check inner dimensions match (all except first which may be unbounded)
394
                for (size_t d = 1; d < ndims; ++d) {
334✔
395
                    if (!symbolic::eq(shape[d], reference_shape[d])) {
117✔
396
                        consistent = false;
×
397
                        break;
×
398
                    }
×
399
                }
117✔
400
                if (!consistent) break;
217✔
401

402
                // Collect indices for each dimension
403
                if (acc.subset.size() != ndims) {
217✔
404
                    consistent = false;
8✔
405
                    break;
8✔
406
                }
8✔
407
                for (size_t d = 0; d < ndims; ++d) {
535✔
408
                    min_indices[d].push_back(acc.subset[d]);
326✔
409
                    max_indices[d].push_back(acc.subset[d]);
326✔
410
                }
326✔
411
            }
209✔
412

413
            if (!consistent) continue;
136✔
414

415
            // Compute tile groups for raw memlets
416
            compute_tile_groups(loop, container, memlets, reference_layout, ndims, parameters, assumptions);
128✔
417
        }
128✔
418

419
        if (ndims == 0) continue;
383✔
420

421
        // Compute min/max bounds for each dimension
422
        data_flow::Subset min_subset;
383✔
423
        data_flow::Subset max_subset;
383✔
424
        bool all_bounded = true;
383✔
425

426
        for (size_t d = 0; d < ndims; ++d) {
986✔
427
            symbolic::Expression dim_min = SymEngine::null;
603✔
428
            symbolic::Expression dim_max = SymEngine::null;
603✔
429

430
            // Compute dim_min from min_indices
431
            for (const auto& idx : min_indices[d]) {
759✔
432
                auto lb = symbolic::minimum(idx, parameters, assumptions, true);
759✔
433
                if (lb.is_null()) {
759✔
434
                    lb = symbolic::minimum(idx, parameters, assumptions, false);
6✔
435
                }
6✔
436
                if (lb.is_null()) {
759✔
437
                    all_bounded = false;
×
438
                    break;
×
439
                }
×
440
                if (dim_min.is_null()) {
759✔
441
                    dim_min = lb;
603✔
442
                } else {
603✔
443
                    dim_min = symbolic::min(dim_min, lb);
156✔
444
                }
156✔
445
            }
759✔
446
            if (!all_bounded) break;
603✔
447

448
            // Compute dim_max from max_indices
449
            for (const auto& idx : max_indices[d]) {
759✔
450
                auto ub = symbolic::maximum(idx, parameters, assumptions, true);
759✔
451
                if (ub.is_null()) {
759✔
452
                    ub = symbolic::maximum(idx, parameters, assumptions, false);
×
453
                }
×
454
                if (ub.is_null()) {
759✔
455
                    all_bounded = false;
×
456
                    break;
×
457
                }
×
458
                if (dim_max.is_null()) {
759✔
459
                    dim_max = ub;
603✔
460
                } else {
603✔
461
                    dim_max = symbolic::max(dim_max, ub);
156✔
462
                }
156✔
463
            }
759✔
464
            if (!all_bounded) break;
603✔
465

466
            min_subset.push_back(symbolic::simplify(dim_min));
603✔
467
            max_subset.push_back(symbolic::simplify(dim_max));
603✔
468
        }
603✔
469

470
        if (!all_bounded) continue;
383✔
471

472
        // Store this loop's tile with the original memory layout
473
        MemoryTile merged_tile{container, min_subset, max_subset, reference_layout, true};
383✔
474
        tiles_.insert({{&loop, container}, merged_tile});
383✔
475
    }
383✔
476
}
205✔
477

478
const MemoryTile* MemoryLayoutAnalysis::
479
    tile(const structured_control_flow::StructuredLoop& loop, const std::string& container) const {
26✔
480
    auto key = std::make_pair(&loop, container);
26✔
481
    auto it = tiles_.find(key);
26✔
482
    if (it == tiles_.end()) {
26✔
483
        return nullptr;
×
484
    }
×
485
    return &it->second;
26✔
486
}
26✔
487

488
void MemoryLayoutAnalysis::compute_tile_groups(
489
    structured_control_flow::StructuredLoop& loop,
490
    const std::string& container,
491
    const std::vector<const data_flow::Memlet*>& memlets,
492
    const MemoryLayout& reference_layout,
493
    size_t ndims,
494
    const symbolic::SymbolSet& parameters,
495
    const symbolic::Assumptions& assumptions
496
) {
128✔
497
    // For each memlet, compute per-dimension base (minimum of index expression)
498
    // Group memlets whose bases are symbolically equal in all dimensions
499
    struct GroupEntry {
128✔
500
        data_flow::Subset base; // per-dim minimum
128✔
501
        std::vector<const data_flow::Memlet*> group_memlets;
128✔
502
    };
128✔
503

504
    std::vector<GroupEntry> groups;
128✔
505

506
    for (const auto* memlet_ptr : memlets) {
209✔
507
        auto& acc = accesses_.at(memlet_ptr);
209✔
508
        if (acc.subset.size() != ndims) continue;
209✔
509

510
        // Compute per-dimension base (minimum)
511
        data_flow::Subset base;
209✔
512
        bool base_ok = true;
209✔
513
        for (size_t d = 0; d < ndims; ++d) {
535✔
514
            auto lb = symbolic::minimum(acc.subset[d], parameters, assumptions, true);
326✔
515
            if (lb.is_null()) {
326✔
516
                lb = symbolic::minimum(acc.subset[d], parameters, assumptions, false);
6✔
517
            }
6✔
518
            if (lb.is_null()) {
326✔
NEW
519
                base_ok = false;
×
NEW
520
                break;
×
NEW
521
            }
×
522
            base.push_back(symbolic::simplify(lb));
326✔
523
        }
326✔
524
        if (!base_ok) continue;
209✔
525

526
        // Find existing group with same base
527
        bool found = false;
209✔
528
        for (auto& group : groups) {
209✔
529
            if (group.base.size() != ndims) continue;
116✔
530
            bool match = true;
116✔
531
            for (size_t d = 0; d < ndims; ++d) {
197✔
532
                if (!symbolic::eq(group.base[d], base[d])) {
160✔
533
                    match = false;
79✔
534
                    break;
79✔
535
                }
79✔
536
            }
160✔
537
            if (match) {
116✔
538
                group.group_memlets.push_back(memlet_ptr);
37✔
539
                found = true;
37✔
540
                break;
37✔
541
            }
37✔
542
        }
116✔
543
        if (!found) {
209✔
544
            groups.push_back({base, {memlet_ptr}});
172✔
545
        }
172✔
546
    }
209✔
547

548
    if (groups.empty()) return;
128✔
549

550
    // Merge groups whose bases differ only by integer constants.
551
    // E.g. stencil bases [i-1, j], [i, j], [i+1, j] should merge (constant offsets in dim0).
552
    // But SYR2K bases [i, 0] vs [j, 0] should NOT merge (symbolic difference).
553
    std::vector<GroupEntry> merged_groups;
128✔
554
    for (auto& group : groups) {
172✔
555
        bool merged = false;
172✔
556
        for (auto& existing : merged_groups) {
172✔
557
            bool const_diff = true;
52✔
558
            for (size_t d = 0; d < ndims; ++d) {
102✔
559
                auto diff = symbolic::simplify(symbolic::sub(group.base[d], existing.base[d]));
70✔
560
                if (!SymEngine::is_a<SymEngine::Integer>(*diff)) {
70✔
561
                    const_diff = false;
20✔
562
                    break;
20✔
563
                }
20✔
564
            }
70✔
565
            if (const_diff) {
52✔
566
                existing.group_memlets
32✔
567
                    .insert(existing.group_memlets.end(), group.group_memlets.begin(), group.group_memlets.end());
32✔
568
                merged = true;
32✔
569
                break;
32✔
570
            }
32✔
571
        }
52✔
572
        if (!merged) {
172✔
573
            merged_groups.push_back(std::move(group));
140✔
574
        }
140✔
575
    }
172✔
576

577
    // Compute tile for each merged group
578
    std::vector<MemoryTileGroup> result_groups;
128✔
579
    for (auto& group : merged_groups) {
140✔
580
        std::vector<std::vector<symbolic::Expression>> min_indices(ndims);
140✔
581
        std::vector<std::vector<symbolic::Expression>> max_indices(ndims);
140✔
582

583
        for (const auto* memlet_ptr : group.group_memlets) {
209✔
584
            auto& acc = accesses_.at(memlet_ptr);
209✔
585
            for (size_t d = 0; d < ndims; ++d) {
535✔
586
                min_indices[d].push_back(acc.subset[d]);
326✔
587
                max_indices[d].push_back(acc.subset[d]);
326✔
588
            }
326✔
589
        }
209✔
590

591
        data_flow::Subset min_subset;
140✔
592
        data_flow::Subset max_subset;
140✔
593
        bool all_bounded = true;
140✔
594

595
        for (size_t d = 0; d < ndims; ++d) {
347✔
596
            symbolic::Expression dim_min = SymEngine::null;
207✔
597
            symbolic::Expression dim_max = SymEngine::null;
207✔
598

599
            for (const auto& idx : min_indices[d]) {
326✔
600
                auto lb = symbolic::minimum(idx, parameters, assumptions, true);
326✔
601
                if (lb.is_null()) {
326✔
602
                    lb = symbolic::minimum(idx, parameters, assumptions, false);
6✔
603
                }
6✔
604
                if (lb.is_null()) {
326✔
NEW
605
                    all_bounded = false;
×
NEW
606
                    break;
×
NEW
607
                }
×
608
                if (dim_min.is_null()) {
326✔
609
                    dim_min = lb;
207✔
610
                } else {
207✔
611
                    dim_min = symbolic::min(dim_min, lb);
119✔
612
                }
119✔
613
            }
326✔
614
            if (!all_bounded) break;
207✔
615

616
            for (const auto& idx : max_indices[d]) {
326✔
617
                auto ub = symbolic::maximum(idx, parameters, assumptions, true);
326✔
618
                if (ub.is_null()) {
326✔
NEW
619
                    ub = symbolic::maximum(idx, parameters, assumptions, false);
×
NEW
620
                }
×
621
                if (ub.is_null()) {
326✔
NEW
622
                    all_bounded = false;
×
NEW
623
                    break;
×
NEW
624
                }
×
625
                if (dim_max.is_null()) {
326✔
626
                    dim_max = ub;
207✔
627
                } else {
207✔
628
                    dim_max = symbolic::max(dim_max, ub);
119✔
629
                }
119✔
630
            }
326✔
631
            if (!all_bounded) break;
207✔
632

633
            min_subset.push_back(symbolic::simplify(dim_min));
207✔
634
            max_subset.push_back(symbolic::simplify(dim_max));
207✔
635
        }
207✔
636

637
        if (!all_bounded) continue;
140✔
638

639
        MemoryTile tile{container, min_subset, max_subset, reference_layout, true};
140✔
640
        result_groups.push_back({tile, group.group_memlets});
140✔
641
    }
140✔
642

643
    if (!result_groups.empty()) {
128✔
644
        tile_groups_.insert({{&loop, container}, std::move(result_groups)});
128✔
645
    }
128✔
646
}
128✔
647

648
const std::vector<MemoryTileGroup>* MemoryLayoutAnalysis::
649
    tile_groups(const structured_control_flow::StructuredLoop& loop, const std::string& container) const {
4✔
650
    auto key = std::make_pair(&loop, container);
4✔
651
    auto it = tile_groups_.find(key);
4✔
652
    if (it == tile_groups_.end()) {
4✔
NEW
653
        return nullptr;
×
NEW
654
    }
×
655
    return &it->second;
4✔
656
}
4✔
657

658
const MemoryTileGroup* MemoryLayoutAnalysis::
659
    tile_group_for(const structured_control_flow::StructuredLoop& loop, const data_flow::Memlet& memlet) const {
58✔
660
    // Find which container this memlet accesses
661
    auto acc_it = accesses_.find(&memlet);
58✔
662
    if (acc_it == accesses_.end()) {
58✔
663
        return nullptr;
1✔
664
    }
1✔
665
    auto& container = acc_it->second.container;
57✔
666

667
    auto key = std::make_pair(&loop, container);
57✔
668
    auto groups_it = tile_groups_.find(key);
57✔
669
    if (groups_it == tile_groups_.end()) {
57✔
NEW
670
        return nullptr;
×
NEW
671
    }
×
672

673
    for (const auto& group : groups_it->second) {
59✔
674
        for (const auto* m : group.memlets) {
70✔
675
            if (m == &memlet) {
70✔
676
                return &group;
57✔
677
            }
57✔
678
        }
70✔
679
    }
59✔
NEW
680
    return nullptr;
×
681
}
57✔
682

683
symbolic::MultiExpression MemoryTile::extents() const {
25✔
684
    symbolic::MultiExpression result;
25✔
685
    for (size_t d = 0; d < min_subset.size(); ++d) {
81✔
686
        result.push_back(symbolic::simplify(
56✔
687
            symbolic::expand(symbolic::add(symbolic::sub(max_subset[d], min_subset[d]), symbolic::one()))
56✔
688
        ));
56✔
689
    }
56✔
690
    return result;
25✔
691
}
25✔
692

693
symbolic::MultiExpression MemoryTile::extents_approx() const {
52✔
694
    symbolic::MultiExpression result;
52✔
695
    for (size_t d = 0; d < min_subset.size(); ++d) {
139✔
696
        result.push_back(symbolic::simplify(symbolic::expand(
87✔
697
            symbolic::overapproximate(symbolic::add(symbolic::sub(max_subset[d], min_subset[d]), symbolic::one()))
87✔
698
        )));
87✔
699
    }
87✔
700
    return result;
52✔
701
}
52✔
702

703
std::pair<symbolic::Expression, symbolic::Expression> MemoryTile::contiguous_range() const {
20✔
704
    auto& strides = layout.strides();
20✔
705
    auto first = layout.offset();
20✔
706
    auto last = layout.offset();
20✔
707
    for (size_t d = 0; d < min_subset.size(); ++d) {
66✔
708
        first = symbolic::add(first, symbolic::mul(strides[d], min_subset[d]));
46✔
709
        last = symbolic::add(last, symbolic::mul(strides[d], max_subset[d]));
46✔
710
    }
46✔
711
    return {symbolic::simplify(symbolic::expand(first)), symbolic::simplify(symbolic::expand(last))};
20✔
712
}
20✔
713

714
} // namespace analysis
715
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc