• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 27159720397

08 Jun 2026 06:51PM UTC coverage: 61.311% (+0.04%) from 61.275%
27159720397

Pull #741

github

web-flow
Merge ba8498347 into aacd50c09
Pull Request #741: replaces MemAccessRangeAnalysis with MemoryLayoutAnalysis

383 of 423 new or added lines in 11 files covered. (90.54%)

56 existing lines in 12 files now uncovered.

35645 of 58138 relevant lines covered (61.31%)

744.5 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.47
/sdfg/src/analysis/memory_layout_analysis.cpp
1
#include "sdfg/analysis/memory_layout_analysis.h"
2

3
#include <algorithm>
4
#include <optional>
5
#include <set>
6
#include <unordered_set>
7

8
#include "sdfg/analysis/assumptions_analysis.h"
9
#include "sdfg/data_flow/access_node.h"
10
#include "sdfg/structured_control_flow/block.h"
11
#include "sdfg/structured_control_flow/if_else.h"
12
#include "sdfg/structured_control_flow/sequence.h"
13
#include "sdfg/structured_control_flow/structured_loop.h"
14
#include "sdfg/structured_control_flow/while.h"
15
#include "sdfg/symbolic/delinearization.h"
16
#include "sdfg/symbolic/extreme_values.h"
17
#include "sdfg/symbolic/polynomials.h"
18

19
namespace sdfg {
20
namespace analysis {
21

22
namespace {
23

24
// Sentinel symbol stored in shape[0] of a MemoryLayout when the leading dimension's
25
// extent is unknown (raw pointer accesses). The symbol never escapes the analysis:
26
// any expression that mentions it must be reported to the caller as `SymEngine::null`
27
// from the public size accessors (see `MemoryTile::extents()` etc.).
28
constexpr const char* kUnboundedName = "__unbounded__";
29

30
bool is_unbounded_dim(const symbolic::Expression& e) {
11,079✔
31
    if (e.is_null()) return false;
11,079✔
32
    if (!SymEngine::is_a<SymEngine::Symbol>(*e)) return false;
11,079✔
33
    return SymEngine::down_cast<const SymEngine::Symbol&>(*e).get_name() == kUnboundedName;
10,370✔
34
}
11,079✔
35

36
bool depends_on_unbounded(const symbolic::Expression& e) {
249✔
37
    if (e.is_null()) return false;
249✔
38
    for (const auto& a : symbolic::atoms(e)) {
249✔
39
        if (is_unbounded_dim(a)) return true;
158✔
40
    }
158✔
41
    return false;
249✔
42
}
249✔
43

44
bool layout_has_unbounded_first_dim(const MemoryLayout& layout) {
10,921✔
45
    const auto& shape = layout.shape();
10,921✔
46
    return !shape.empty() && is_unbounded_dim(shape[0]);
10,921✔
47
}
10,921✔
48

49
// Collect immediate child scopes (Sequence/IfElse/While/StructuredLoop) of a given
50
// scope that carry their own MemoryTile entries. Blocks are excluded because their
51
// per-memlet info is held in `accesses_`, not in `tiles_`/`tile_groups_`.
52
void collect_direct_child_scopes(
53
    structured_control_flow::ControlFlowNode& scope, std::set<const structured_control_flow::ControlFlowNode*>& result
54
) {
2,687✔
55
    if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&scope)) {
2,687✔
56
        result.insert(&loop->root());
870✔
57
    } else if (auto* w = dynamic_cast<structured_control_flow::While*>(&scope)) {
1,817✔
58
        result.insert(&w->root());
3✔
59
    } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&scope)) {
1,814✔
60
        for (size_t i = 0; i < seq->size(); i++) {
2,688✔
61
            auto& child = seq->at(i).first;
1,553✔
62
            if (!dynamic_cast<structured_control_flow::Block*>(&child)) {
1,553✔
63
                result.insert(&child);
886✔
64
            }
886✔
65
        }
1,553✔
66
    } else if (auto* ife = dynamic_cast<structured_control_flow::IfElse*>(&scope)) {
1,135✔
67
        for (size_t i = 0; i < ife->size(); i++) {
28✔
68
            result.insert(&ife->at(i).first);
16✔
69
        }
16✔
70
    }
12✔
71
}
2,687✔
72
} // namespace
73

74
MemoryLayoutAnalysis::MemoryLayoutAnalysis(StructuredSDFG& sdfg) : Analysis(sdfg) {}
245✔
75

76
void MemoryLayoutAnalysis::run(analysis::AnalysisManager& analysis_manager) {
245✔
77
    accesses_.clear();
245✔
78
    tiles_.clear();
245✔
79
    tile_groups_.clear();
245✔
80
    traverse(sdfg_.root(), analysis_manager);
245✔
81
}
245✔
82

83
void MemoryLayoutAnalysis::
84
    traverse(structured_control_flow::ControlFlowNode& node, analysis::AnalysisManager& analysis_manager) {
2,687✔
85
    // Snapshot current memlets and tile keys before recursing into the scope's children
86
    std::vector<const data_flow::Memlet*> memlets_before;
2,687✔
87
    memlets_before.reserve(accesses_.size());
2,687✔
88
    for (const auto& entry : accesses_) {
4,257✔
89
        memlets_before.push_back(entry.first);
4,257✔
90
    }
4,257✔
91
    std::set<std::pair<const structured_control_flow::ControlFlowNode*, std::string>> tiles_before;
2,687✔
92
    for (const auto& entry : tiles_) {
10,526✔
93
        tiles_before.insert(entry.first);
10,526✔
94
    }
10,526✔
95

96
    if (auto block = dynamic_cast<structured_control_flow::Block*>(&node)) {
2,687✔
97
        process_block(*block, analysis_manager);
667✔
98
    } else if (auto sequence = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
2,020✔
99
        for (size_t i = 0; i < sequence->size(); i++) {
2,688✔
100
            traverse(sequence->at(i).first, analysis_manager);
1,553✔
101
        }
1,553✔
102
    } else if (auto if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
1,135✔
103
        for (size_t i = 0; i < if_else->size(); i++) {
28✔
104
            traverse(if_else->at(i).first, analysis_manager);
16✔
105
        }
16✔
106
    } else if (auto while_stmt = dynamic_cast<structured_control_flow::While*>(&node)) {
873✔
107
        traverse(while_stmt->root(), analysis_manager);
3✔
108
    } else if (auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
870✔
109
        traverse(loop->root(), analysis_manager);
870✔
110
    } else {
870✔
111
        // Break, Continue, Return nodes don't contain blocks
NEW
112
        return;
×
UNCOV
113
    }
×
114

115
    // Merge tiles for containers accessed within this scope
116
    merge_scope_layouts(node, memlets_before, tiles_before, analysis_manager);
2,687✔
117
}
2,687✔
118

119
void MemoryLayoutAnalysis::
120
    process_block(structured_control_flow::Block& block, analysis::AnalysisManager& analysis_manager) {
667✔
121
    auto& assumptions_analysis = analysis_manager.get<AssumptionsAnalysis>();
667✔
122
    // Use trivial bounds (type-derived, e.g. unsigned >= 0) so delinearization
123
    // can soundly discharge non-negativity proof obligations on parameters.
124
    auto& assumptions = assumptions_analysis.get(block, /*include_trivial_bounds=*/true);
667✔
125

126
    auto& dfg = block.dataflow();
667✔
127
    for (auto& memlet : dfg.edges()) {
1,938✔
128
        const auto& subset = memlet.subset();
1,938✔
129
        if (subset.empty()) {
1,938✔
130
            continue;
561✔
131
        }
561✔
132

133
        // Get container name from the AccessNode (either src or dst)
134
        std::string container_name;
1,377✔
135
        if (auto* access = dynamic_cast<const data_flow::AccessNode*>(&memlet.src())) {
1,377✔
136
            container_name = access->data();
889✔
137
        } else if (auto* access = dynamic_cast<const data_flow::AccessNode*>(&memlet.dst())) {
889✔
138
            container_name = access->data();
488✔
139
        } else {
488✔
140
            continue; // Skip memlets without AccessNode
×
141
        }
×
142

143
        auto& base_type = memlet.base_type();
1,377✔
144
        switch (base_type.type_id()) {
1,377✔
145
            case types::TypeID::Scalar:
×
146
            case types::TypeID::Structure:
×
147
                continue; // Skip scalars and structures
×
148
            case types::TypeID::Tensor: {
×
149
                // Tensor types already contain layout information, so we can directly store it without delinearization
150
                auto& tensor_type = dynamic_cast<const types::Tensor&>(memlet.base_type());
×
151

152
                MemoryLayout layout(tensor_type.shape(), tensor_type.strides(), tensor_type.offset());
×
153
                MemoryAccess layout_info{container_name, subset, layout, true};
×
154
                this->accesses_.emplace(&memlet, layout_info);
×
155
                continue;
×
156
            }
×
157
            case types::TypeID::Array: {
440✔
158
                // Arrays are c-like stack array, so we can infer a simple row-major layout without needing
159
                // delinearization
160
                auto* array_type = dynamic_cast<const types::Array*>(&memlet.base_type());
440✔
161
                symbolic::MultiExpression shape = {array_type->num_elements()};
440✔
162
                while (array_type->element_type().type_id() == types::TypeID::Array) {
454✔
163
                    array_type = dynamic_cast<const types::Array*>(&array_type->element_type());
14✔
164
                }
14✔
165
                if (array_type->element_type().type_id() != types::TypeID::Scalar) {
440✔
166
                    continue; // Skip non-scalar arrays
×
167
                }
×
168

169
                MemoryLayout layout(shape);
440✔
170
                MemoryAccess layout_info{container_name, subset, layout, true};
440✔
171
                this->accesses_.emplace(&memlet, layout_info);
440✔
172
                continue;
440✔
173
            }
440✔
174
            case types::TypeID::Pointer: {
937✔
175
                // For pointers, we attempt to delinearize the access pattern to infer the layout based
176
                // on assumptions from loop bounds
177
                auto* pointer_type = dynamic_cast<const types::Pointer*>(&memlet.base_type());
937✔
178

179
                // Typed pointer to a (possibly multi-dim) fixed array of scalar,
180
                // e.g. `float (*A)[M]`. The pointer adds one unbounded leading
181
                // dimension; remaining dimensions come from the array shape. The
182
                // subset is expected to be one index per dimension — no
183
                // delinearization needed.
184
                if (pointer_type->pointee_type().type_id() == types::TypeID::Array) {
937✔
185
                    auto* array_type = dynamic_cast<const types::Array*>(&pointer_type->pointee_type());
550✔
186
                    symbolic::MultiExpression array_shape = {array_type->num_elements()};
550✔
187
                    while (array_type->element_type().type_id() == types::TypeID::Array) {
568✔
188
                        array_type = dynamic_cast<const types::Array*>(&array_type->element_type());
18✔
189
                        array_shape.push_back(array_type->num_elements());
18✔
190
                    }
18✔
191
                    if (array_type->element_type().type_id() != types::TypeID::Scalar) {
550✔
NEW
192
                        continue; // Skip non-scalar leaf
×
NEW
193
                    }
×
194
                    if (subset.size() != array_shape.size() + 1) {
550✔
NEW
195
                        continue; // Require one index per dimension (leading pointer + array dims)
×
NEW
196
                    }
×
197

198
                    symbolic::MultiExpression shape;
550✔
199
                    shape.push_back(symbolic::symbol("__unbounded__"));
550✔
200
                    for (const auto& dim : array_shape) {
568✔
201
                        shape.push_back(dim);
568✔
202
                    }
568✔
203

204
                    MemoryLayout layout(shape);
550✔
205
                    MemoryAccess layout_info{container_name, subset, layout, false};
550✔
206
                    this->accesses_.emplace(&memlet, layout_info);
550✔
207
                    continue;
550✔
208
                }
550✔
209

210
                if (pointer_type->pointee_type().type_id() != types::TypeID::Scalar) {
387✔
211
                    continue; // Skip non-scalar pointers
1✔
212
                }
1✔
213

214
                if (subset.size() != 1) {
386✔
215
                    continue; // Require full linearization
×
216
                }
×
217
                auto& linearized_expr = subset.at(0);
386✔
218

219
                auto result = symbolic::delinearize(linearized_expr, assumptions);
386✔
220
                if (!result.success) {
386✔
221
                    continue; // Delinearization failed, skip
15✔
222
                }
15✔
223

224
                // Delinearization returns N indices but only N-1 dimensions (from stride division)
225
                // The first dimension is unbounded - insert a placeholder that will be filled in by merge
226
                // Using a special symbol as placeholder for the first dimension
227
                symbolic::MultiExpression shape;
371✔
228
                shape.push_back(symbolic::symbol("__unbounded__"));
371✔
229
                for (const auto& dim : result.dimensions) {
371✔
230
                    shape.push_back(dim);
257✔
231
                }
257✔
232

233
                // Store symbolic indices and dimensions with unbounded first dimension
234
                // The merge phase will attempt to bound the first dimension using loop assumptions
235
                MemoryLayout layout(shape);
371✔
236
                MemoryAccess layout_info{container_name, result.indices, layout, false};
371✔
237
                this->accesses_.emplace(&memlet, layout_info);
371✔
238
                continue;
371✔
239
            }
386✔
240
            default:
×
241
                continue; // Skip unsupported types
×
242
        }
1,377✔
243
    }
1,377✔
244
}
667✔
245

246
const MemoryAccess* MemoryLayoutAnalysis::access(const data_flow::Memlet& memlet) const {
4,422✔
247
    auto layout_it = accesses_.find(&memlet);
4,422✔
248
    if (layout_it == accesses_.end()) {
4,422✔
249
        return nullptr;
6✔
250
    }
6✔
251
    return &layout_it->second;
4,416✔
252
}
4,422✔
253

254
void MemoryLayoutAnalysis::merge_scope_layouts(
255
    structured_control_flow::ControlFlowNode& scope,
256
    const std::vector<const data_flow::Memlet*>& memlets_before,
257
    const std::set<std::pair<const structured_control_flow::ControlFlowNode*, std::string>>& tiles_before,
258
    analysis::AnalysisManager& analysis_manager
259
) {
2,687✔
260
    // Convert memlets_before to a set for O(1) lookup
261
    std::unordered_set<const data_flow::Memlet*> before_set(memlets_before.begin(), memlets_before.end());
2,687✔
262

263
    // Group all new accesses by container
264
    std::unordered_map<std::string, std::vector<const data_flow::Memlet*>> all_container_groups;
2,687✔
265
    for (auto& [memlet_ptr, acc] : accesses_) {
14,207✔
266
        if (before_set.find(memlet_ptr) != before_set.end()) {
14,207✔
267
            continue;
4,257✔
268
        }
4,257✔
269
        all_container_groups[acc.container].push_back(memlet_ptr);
9,950✔
270
    }
9,950✔
271

272
    // Sort memlets within each container group by element_id for deterministic processing order
273
    for (auto& [container, memlets] : all_container_groups) {
5,358✔
274
        std::sort(memlets.begin(), memlets.end(), [](const data_flow::Memlet* a, const data_flow::Memlet* b) {
9,365✔
275
            return a->element_id() < b->element_id();
9,365✔
276
        });
9,365✔
277
    }
5,358✔
278

279
    auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&scope);
2,687✔
280

281
    auto& assumptions_analysis = analysis_manager.get<AssumptionsAnalysis>();
2,687✔
282
    // For loops, query at the loop body so the induction variable's bounds are visible.
283
    auto& assumption_node = loop ? static_cast<structured_control_flow::ControlFlowNode&>(loop->root()) : scope;
2,687✔
284
    // Trivial-bounds view: includes type-derived defaults (e.g. Int32 ∈ [INT_MIN, INT_MAX]).
285
    // Used as the assumption set passed to symbolic::minimum/maximum so that the
286
    // resolver has sign information for parameters.
287
    auto& assumptions = assumptions_analysis.get(assumption_node, /*include_trivial_bounds=*/true);
2,687✔
288
    // Narrowing-only view: excludes type-derived defaults. A symbol that only
289
    // appears here (or in neither) has at most its type's intrinsic range — any
290
    // min/max resolution would collapse to INT_MIN/INT_MAX-style numerics, which
291
    // is not a sound tile bound. We use this to decide whether to emit a tile.
292
    auto& narrowing_assumptions = assumptions_analysis.get(assumption_node, /*include_trivial_bounds=*/false);
2,687✔
293
    // Parameters of a scope can only be constant symbols (invariant within the
294
    // scope). SDFG-level read-only arguments are constant by construction; for
295
    // each scope-local entry, the constant() flag tells us whether the symbol
296
    // can be treated opaquely by the min/max resolver.
297
    symbolic::SymbolSet parameters = assumptions_analysis.parameters();
2,687✔
298
    for (auto& entry : assumptions) {
23,389✔
299
        if (loop && symbolic::eq(entry.first, loop->indvar())) {
23,389✔
300
            continue; // The induction variable is not a parameter of its own loop scope
870✔
301
        }
870✔
302
        if (entry.second.constant()) {
22,519✔
303
            parameters.insert(entry.first);
8,653✔
304
        }
8,653✔
305
    }
22,519✔
306

307
    // Soundness check: every free (non-parameter) symbol in an index expression
308
    // must have a narrowing assumption at this scope. Otherwise symbolic::minimum/
309
    // maximum would fall back to the symbol's type-default range and produce
310
    // bogus tile bounds (e.g. INT_MAX) that the rest of the pipeline would
311
    // silently consume as truth.
312
    auto has_narrowing = [&](const symbolic::Symbol& sym) -> bool {
2,687✔
313
        auto it = narrowing_assumptions.find(sym);
68✔
314
        if (it == narrowing_assumptions.end()) return false;
68✔
NEW
315
        return !it->second.lower_bounds().empty() || !it->second.upper_bounds().empty();
×
316
    };
68✔
317
    auto bounds_are_sound = [&](const symbolic::Expression& expr) -> bool {
16,358✔
318
        for (const auto& sym : symbolic::atoms(expr)) {
16,358✔
319
            if (parameters.contains(sym)) continue;
15,662✔
320
            if (loop && symbolic::eq(sym, loop->indvar())) continue;
2,957✔
321
            if (!has_narrowing(sym)) return false;
68✔
322
        }
68✔
323
        return true;
16,290✔
324
    };
16,358✔
325

326
    // Per-scope BoundAnalysis pair (tight + loose-fallback). Both instances
327
    // memoize their results, so repeated expressions and shared subterms
328
    // across all per-element lower/upper queries below hit the cache.
329
    symbolic::BoundAnalysis ba_tight(parameters, assumptions, true);
2,687✔
330
    symbolic::BoundAnalysis ba_loose(parameters, assumptions, false);
2,687✔
331
    auto bound_lb = [&](const symbolic::Expression& e) -> symbolic::Expression {
24,179✔
332
        auto r = ba_tight.lower_bound(e);
24,179✔
333
        if (r.is_null()) r = ba_loose.lower_bound(e);
24,179✔
334
        return r;
24,179✔
335
    };
24,179✔
336
    auto bound_ub = [&](const symbolic::Expression& e) -> symbolic::Expression {
17,183✔
337
        auto r = ba_tight.upper_bound(e);
17,183✔
338
        if (r.is_null()) r = ba_loose.upper_bound(e);
17,183✔
339
        return r;
17,183✔
340
    };
17,183✔
341

342
    // Find direct child scopes that may carry tiles for this scope
343
    std::set<const structured_control_flow::ControlFlowNode*> direct_child_scopes;
2,687✔
344
    collect_direct_child_scopes(scope, direct_child_scopes);
2,687✔
345

346
    for (auto& [container, memlets] : all_container_groups) {
5,358✔
347
        if (memlets.empty()) continue;
5,358✔
348

349
        std::vector<const MemoryTile*> inner_tiles;
5,358✔
350
        for (const auto* child : direct_child_scopes) {
5,358✔
351
            auto it = tiles_.find({child, container});
4,219✔
352
            if (it != tiles_.end()) {
4,219✔
353
                inner_tiles.push_back(&it->second);
3,668✔
354
            }
3,668✔
355
        }
4,219✔
356

357
        size_t ndims = 0;
5,358✔
358
        MemoryLayout reference_layout({symbolic::one()});
5,358✔
359
        // Separate min/max index lists to avoid unnecessary symbolic min/max
360
        std::vector<std::vector<symbolic::Expression>> min_indices;
5,358✔
361
        std::vector<std::vector<symbolic::Expression>> max_indices;
5,358✔
362

363
        if (!inner_tiles.empty()) {
5,358✔
364
            // Use inner tile min/max as representative values
365
            // Inner tiles have already resolved inner loop variables to their bounds
366
            ndims = inner_tiles[0]->min_subset.size();
3,474✔
367
            reference_layout = inner_tiles[0]->layout;
3,474✔
368
            min_indices.resize(ndims);
3,474✔
369
            max_indices.resize(ndims);
3,474✔
370

371
            for (const auto* tile : inner_tiles) {
3,668✔
372
                if (tile->min_subset.size() != ndims) continue;
3,668✔
373
                for (size_t d = 0; d < ndims; ++d) {
9,761✔
374
                    min_indices[d].push_back(tile->min_subset[d]);
6,093✔
375
                    max_indices[d].push_back(tile->max_subset[d]);
6,093✔
376
                }
6,093✔
377
            }
3,668✔
378

379
            // Propagate tile groups from child scopes upward using the same
380
            // base-partitioning logic: group inner groups by their min_subset
381
            // base at this scope level, then merge each partition.
382
            // Same direct-lookup optimization as above for inner_tiles.
383
            std::vector<const MemoryTileGroup*> inner_groups;
3,474✔
384
            for (const auto* child : direct_child_scopes) {
4,038✔
385
                auto it = tile_groups_.find({child, container});
4,038✔
386
                if (it == tile_groups_.end()) continue;
4,038✔
387
                for (const auto& g : it->second) {
4,148✔
388
                    inner_groups.push_back(&g);
4,148✔
389
                }
4,148✔
390
            }
3,668✔
391

392
            if (!inner_groups.empty()) {
3,474✔
393
                // Group inner groups by their base at this level
394
                struct OuterGroupEntry {
3,474✔
395
                    data_flow::Subset base;
3,474✔
396
                    std::vector<const MemoryTileGroup*> constituents;
3,474✔
397
                };
3,474✔
398
                std::vector<OuterGroupEntry> outer_partitions;
3,474✔
399

400
                for (const auto* ig : inner_groups) {
4,148✔
401
                    if (ig->tile.min_subset.size() != ndims) continue;
4,148✔
402

403
                    // Compute base: minimum of the inner group's min_subset per dim
404
                    data_flow::Subset base;
4,148✔
405
                    bool base_ok = true;
4,148✔
406
                    for (size_t d = 0; d < ndims; ++d) {
11,134✔
407
                        auto lb = bound_lb(ig->tile.min_subset[d]);
6,986✔
408
                        if (lb.is_null()) {
6,986✔
409
                            base_ok = false;
×
410
                            break;
×
411
                        }
×
412
                        base.push_back(symbolic::simplify(lb));
6,986✔
413
                    }
6,986✔
414
                    if (!base_ok) continue;
4,148✔
415

416
                    // Find matching partition (same base OR constant-offset base)
417
                    bool found = false;
4,148✔
418
                    for (auto& op : outer_partitions) {
4,148✔
419
                        bool const_diff = true;
866✔
420
                        for (size_t d = 0; d < ndims; ++d) {
1,524✔
421
                            auto diff = symbolic::simplify(symbolic::sub(base[d], op.base[d]));
1,226✔
422
                            if (!SymEngine::is_a<SymEngine::Integer>(*diff)) {
1,226✔
423
                                const_diff = false;
568✔
424
                                break;
568✔
425
                            }
568✔
426
                        }
1,226✔
427
                        if (const_diff) {
866✔
428
                            op.constituents.push_back(ig);
298✔
429
                            found = true;
298✔
430
                            break;
298✔
431
                        }
298✔
432
                    }
866✔
433
                    if (!found) {
4,148✔
434
                        outer_partitions.push_back({base, {ig}});
3,850✔
435
                    }
3,850✔
436
                }
4,148✔
437

438
                // For each partition, merge constituent tile bounds and collect memlets
439
                std::vector<MemoryTileGroup> result_groups;
3,474✔
440
                for (auto& op : outer_partitions) {
3,850✔
441
                    data_flow::Subset grp_min, grp_max;
3,850✔
442
                    bool grp_bounded = true;
3,850✔
443

444
                    for (size_t d = 0; d < ndims; ++d) {
10,309✔
445
                        symbolic::Expression d_min = SymEngine::null;
6,466✔
446
                        symbolic::Expression d_max = SymEngine::null;
6,466✔
447
                        for (const auto* c : op.constituents) {
6,984✔
448
                            auto lb = bound_lb(c->tile.min_subset[d]);
6,984✔
449
                            if (lb.is_null()) {
6,984✔
450
                                grp_bounded = false;
×
451
                                break;
×
452
                            }
×
453
                            d_min = d_min.is_null() ? lb : symbolic::min(d_min, lb);
6,984✔
454

455
                            auto ub = bound_ub(c->tile.max_subset[d]);
6,984✔
456
                            if (ub.is_null()) {
6,984✔
457
                                grp_bounded = false;
7✔
458
                                break;
7✔
459
                            }
7✔
460
                            d_max = d_max.is_null() ? ub : symbolic::max(d_max, ub);
6,977✔
461
                        }
6,977✔
462
                        if (!grp_bounded) break;
6,466✔
463
                        grp_min.push_back(symbolic::simplify(d_min));
6,459✔
464
                        grp_max.push_back(symbolic::simplify(d_max));
6,459✔
465
                    }
6,459✔
466
                    if (!grp_bounded) continue;
3,850✔
467

468
                    // Collect all memlets from constituent groups
469
                    std::vector<const data_flow::Memlet*> grp_memlets;
3,843✔
470
                    for (const auto* c : op.constituents) {
4,139✔
471
                        grp_memlets.insert(grp_memlets.end(), c->memlets.begin(), c->memlets.end());
4,139✔
472
                    }
4,139✔
473

474
                    MemoryTile grp_tile{
3,843✔
475
                        container, grp_min, grp_max, reference_layout, !layout_has_unbounded_first_dim(reference_layout)
3,843✔
476
                    };
3,843✔
477
                    result_groups.push_back({grp_tile, std::move(grp_memlets)});
3,843✔
478
                }
3,843✔
479

480
                if (!result_groups.empty()) {
3,474✔
481
                    tile_groups_.insert({{&scope, container}, std::move(result_groups)});
3,467✔
482
                }
3,467✔
483
            }
3,474✔
484
        } else {
3,474✔
485
            // Use raw access indices (no inner tiles available)
486
            auto& first_access = accesses_.at(memlets[0]);
1,884✔
487
            auto& reference_shape = first_access.layout.shape();
1,884✔
488
            ndims = reference_shape.size();
1,884✔
489
            reference_layout = first_access.layout;
1,884✔
490
            min_indices.resize(ndims);
1,884✔
491
            max_indices.resize(ndims);
1,884✔
492

493
            bool consistent = true;
1,884✔
494
            for (const auto* memlet_ptr : memlets) {
2,777✔
495
                auto& acc = accesses_.at(memlet_ptr);
2,777✔
496
                auto& shape = acc.layout.shape();
2,777✔
497

498
                if (shape.size() != ndims) {
2,777✔
499
                    consistent = false;
×
500
                    break;
×
501
                }
×
502
                // Check inner dimensions match (all except first which may be unbounded)
503
                for (size_t d = 1; d < ndims; ++d) {
4,425✔
504
                    if (!symbolic::eq(shape[d], reference_shape[d])) {
1,648✔
505
                        consistent = false;
×
506
                        break;
×
507
                    }
×
508
                }
1,648✔
509
                if (!consistent) break;
2,777✔
510

511
                // Collect indices for each dimension. In the raw-access path,
512
                // min and max indices are identical (both are `acc.subset[d]`),
513
                // so we only populate `min_indices[d]` and leave `max_indices[d]`
514
                // empty. The shared bound-resolution loop below detects this
515
                // alias case and fuses the two passes into one walk, halving
516
                // the soundness-check and outer-loop overhead.
517
                if (acc.subset.size() != ndims) {
2,777✔
518
                    consistent = false;
134✔
519
                    break;
134✔
520
                }
134✔
521
                for (size_t d = 0; d < ndims; ++d) {
6,934✔
522
                    min_indices[d].push_back(acc.subset[d]);
4,291✔
523
                }
4,291✔
524
            }
2,643✔
525

526
            if (!consistent) continue;
1,884✔
527

528
            // Compute tile groups for raw memlets
529
            compute_tile_groups(scope, container, memlets, reference_layout, ndims, ba_tight, ba_loose);
1,750✔
530
        }
1,750✔
531

532
        if (ndims == 0) continue;
5,224✔
533

534
        // Bound each candidate index individually via the reused BoundAnalysis
535
        // (memoized across all per-dim queries below) and combine the per-index
536
        // bounds into the dim's min/max via symbolic::min / symbolic::max.
537
        //
538
        // In the raw-access path, `max_indices[d]` is left empty as a signal
539
        // that it would otherwise alias `min_indices[d]`. When detected, we
540
        // fuse the two passes into a single walk so the soundness check and
541
        // outer-loop bookkeeping run once per index instead of twice.
542
        data_flow::Subset min_subset;
5,224✔
543
        data_flow::Subset max_subset;
5,224✔
544
        bool all_bounded = true;
5,224✔
545

546
        for (size_t d = 0; d < ndims; ++d) {
13,672✔
547
            symbolic::Expression dim_min = SymEngine::null;
8,521✔
548
            symbolic::Expression dim_max = SymEngine::null;
8,521✔
549
            const bool fused = max_indices[d].empty();
8,521✔
550

551
            for (const auto& idx : min_indices[d]) {
10,269✔
552
                if (!bounds_are_sound(idx)) {
10,269✔
553
                    all_bounded = false;
60✔
554
                    break;
60✔
555
                }
60✔
556
                auto lb = bound_lb(idx);
10,209✔
557
                if (lb.is_null()) {
10,209✔
558
                    all_bounded = false;
2✔
559
                    break;
2✔
560
                }
2✔
561
                dim_min = dim_min.is_null() ? lb : symbolic::min(dim_min, lb);
10,207✔
562

563
                if (fused) {
10,207✔
564
                    // Soundness already checked above for the same idx.
565
                    auto ub = bound_ub(idx);
4,118✔
566
                    if (ub.is_null()) {
4,118✔
NEW
567
                        all_bounded = false;
×
NEW
568
                        break;
×
NEW
569
                    }
×
570
                    dim_max = dim_max.is_null() ? ub : symbolic::max(dim_max, ub);
4,118✔
571
                }
4,118✔
572
            }
10,207✔
573
            if (!all_bounded) break;
8,521✔
574

575
            if (!fused) {
8,459✔
576
                for (const auto& idx : max_indices[d]) {
6,089✔
577
                    if (!bounds_are_sound(idx)) {
6,089✔
578
                        all_bounded = false;
8✔
579
                        break;
8✔
580
                    }
8✔
581
                    auto ub = bound_ub(idx);
6,081✔
582
                    if (ub.is_null()) {
6,081✔
583
                        all_bounded = false;
3✔
584
                        break;
3✔
585
                    }
3✔
586
                    dim_max = dim_max.is_null() ? ub : symbolic::max(dim_max, ub);
6,078✔
587
                }
6,078✔
588
                if (!all_bounded) break;
5,774✔
589
            }
5,774✔
590

591
            min_subset.push_back(symbolic::simplify(dim_min));
8,448✔
592
            max_subset.push_back(symbolic::simplify(dim_max));
8,448✔
593
        }
8,448✔
594

595
        if (!all_bounded) continue;
5,224✔
596

597
        // Store this scope's tile with the original memory layout. `first_dim_bounded`
598
        // mirrors the underlying layout: false whenever shape[0] is the unbounded sentinel.
599
        MemoryTile merged_tile{
5,151✔
600
            container, min_subset, max_subset, reference_layout, !layout_has_unbounded_first_dim(reference_layout)
5,151✔
601
        };
5,151✔
602
        tiles_.insert({{&scope, container}, merged_tile});
5,151✔
603
    }
5,151✔
604
}
2,687✔
605

606
const MemoryTile* MemoryLayoutAnalysis::
607
    tile(const structured_control_flow::ControlFlowNode& scope, const std::string& container) const {
45✔
608
    auto key = std::make_pair(&scope, container);
45✔
609
    auto it = tiles_.find(key);
45✔
610
    if (it == tiles_.end()) {
45✔
611
        return nullptr;
2✔
612
    }
2✔
613
    return &it->second;
43✔
614
}
45✔
615

616
void MemoryLayoutAnalysis::compute_tile_groups(
617
    structured_control_flow::ControlFlowNode& scope,
618
    const std::string& container,
619
    const std::vector<const data_flow::Memlet*>& memlets,
620
    const MemoryLayout& reference_layout,
621
    size_t ndims,
622
    symbolic::BoundAnalysis& ba_tight,
623
    symbolic::BoundAnalysis& ba_loose
624
) {
1,750✔
625
    // Bound helpers reusing the caller's BoundAnalysis instances. These share
626
    // their memoization cache with merge_scope_layouts for the same scope, so
627
    // expressions bounded there hit instantly when re-encountered here.
628
    auto bound_lb = [&](const symbolic::Expression& e) -> symbolic::Expression {
8,524✔
629
        auto r = ba_tight.lower_bound(e);
8,524✔
630
        if (r.is_null()) r = ba_loose.lower_bound(e);
8,524✔
631
        return r;
8,524✔
632
    };
8,524✔
633
    auto bound_ub = [&](const symbolic::Expression& e) -> symbolic::Expression {
4,189✔
634
        auto r = ba_tight.upper_bound(e);
4,189✔
635
        if (r.is_null()) r = ba_loose.upper_bound(e);
4,189✔
636
        return r;
4,189✔
637
    };
4,189✔
638

639
    // For each memlet, compute per-dimension base (minimum of index expression)
640
    // Group memlets whose bases are symbolically equal in all dimensions
641
    struct GroupEntry {
1,750✔
642
        data_flow::Subset base; // per-dim minimum
1,750✔
643
        std::vector<const data_flow::Memlet*> group_memlets;
1,750✔
644
    };
1,750✔
645

646
    std::vector<GroupEntry> groups;
1,750✔
647

648
    for (const auto* memlet_ptr : memlets) {
2,643✔
649
        auto& acc = accesses_.at(memlet_ptr);
2,643✔
650
        if (acc.subset.size() != ndims) continue;
2,643✔
651

652
        // Compute per-dimension base (minimum)
653
        data_flow::Subset base;
2,643✔
654
        bool base_ok = true;
2,643✔
655
        for (size_t d = 0; d < ndims; ++d) {
6,932✔
656
            auto lb = bound_lb(acc.subset[d]);
4,291✔
657
            if (lb.is_null()) {
4,291✔
658
                base_ok = false;
2✔
659
                break;
2✔
660
            }
2✔
661
            base.push_back(symbolic::simplify(lb));
4,289✔
662
        }
4,289✔
663
        if (!base_ok) continue;
2,643✔
664

665
        // Find existing group with same base
666
        bool found = false;
2,641✔
667
        for (auto& group : groups) {
2,641✔
668
            if (group.base.size() != ndims) continue;
1,011✔
669
            bool match = true;
1,011✔
670
            for (size_t d = 0; d < ndims; ++d) {
2,105✔
671
                if (!symbolic::eq(group.base[d], base[d])) {
1,533✔
672
                    match = false;
439✔
673
                    break;
439✔
674
                }
439✔
675
            }
1,533✔
676
            if (match) {
1,011✔
677
                group.group_memlets.push_back(memlet_ptr);
572✔
678
                found = true;
572✔
679
                break;
572✔
680
            }
572✔
681
        }
1,011✔
682
        if (!found) {
2,641✔
683
            groups.push_back({base, {memlet_ptr}});
2,069✔
684
        }
2,069✔
685
    }
2,641✔
686

687
    if (groups.empty()) return;
1,750✔
688

689
    // Merge groups whose bases differ only by integer constants.
690
    // E.g. stencil bases [i-1, j], [i, j], [i+1, j] should merge (constant offsets in dim0).
691
    // But SYR2K bases [i, 0] vs [j, 0] should NOT merge (symbolic difference).
692
    std::vector<GroupEntry> merged_groups;
1,748✔
693
    for (auto& group : groups) {
2,069✔
694
        bool merged = false;
2,069✔
695
        for (auto& existing : merged_groups) {
2,069✔
696
            bool const_diff = true;
342✔
697
            for (size_t d = 0; d < ndims; ++d) {
597✔
698
                auto diff = symbolic::simplify(symbolic::sub(group.base[d], existing.base[d]));
485✔
699
                if (!SymEngine::is_a<SymEngine::Integer>(*diff)) {
485✔
700
                    const_diff = false;
230✔
701
                    break;
230✔
702
                }
230✔
703
            }
485✔
704
            if (const_diff) {
342✔
705
                existing.group_memlets
112✔
706
                    .insert(existing.group_memlets.end(), group.group_memlets.begin(), group.group_memlets.end());
112✔
707
                merged = true;
112✔
708
                break;
112✔
709
            }
112✔
710
        }
342✔
711
        if (!merged) {
2,069✔
712
            merged_groups.push_back(std::move(group));
1,957✔
713
        }
1,957✔
714
    }
2,069✔
715

716
    // Compute tile for each merged group
717
    std::vector<MemoryTileGroup> result_groups;
1,748✔
718
    for (auto& group : merged_groups) {
1,957✔
719
        std::vector<std::vector<symbolic::Expression>> min_indices(ndims);
1,957✔
720
        std::vector<std::vector<symbolic::Expression>> max_indices(ndims);
1,957✔
721

722
        for (const auto* memlet_ptr : group.group_memlets) {
2,641✔
723
            auto& acc = accesses_.at(memlet_ptr);
2,641✔
724
            for (size_t d = 0; d < ndims; ++d) {
6,930✔
725
                min_indices[d].push_back(acc.subset[d]);
4,289✔
726
                max_indices[d].push_back(acc.subset[d]);
4,289✔
727
            }
4,289✔
728
        }
2,641✔
729

730
        data_flow::Subset min_subset;
1,957✔
731
        data_flow::Subset max_subset;
1,957✔
732
        bool all_bounded = true;
1,957✔
733

734
        for (size_t d = 0; d < ndims; ++d) {
5,079✔
735
            symbolic::Expression dim_min = SymEngine::null;
3,152✔
736
            symbolic::Expression dim_max = SymEngine::null;
3,152✔
737

738
            for (const auto& idx : min_indices[d]) {
4,233✔
739
                auto lb = bound_lb(idx);
4,233✔
740
                if (lb.is_null()) {
4,233✔
741
                    all_bounded = false;
×
742
                    break;
×
743
                }
×
744
                if (dim_min.is_null()) {
4,233✔
745
                    dim_min = lb;
3,152✔
746
                } else {
3,152✔
747
                    dim_min = symbolic::min(dim_min, lb);
1,081✔
748
                }
1,081✔
749
            }
4,233✔
750
            if (!all_bounded) break;
3,152✔
751

752
            for (const auto& idx : max_indices[d]) {
4,189✔
753
                auto ub = bound_ub(idx);
4,189✔
754
                if (ub.is_null()) {
4,189✔
755
                    all_bounded = false;
30✔
756
                    break;
30✔
757
                }
30✔
758
                if (dim_max.is_null()) {
4,159✔
759
                    dim_max = ub;
3,122✔
760
                } else {
3,122✔
761
                    dim_max = symbolic::max(dim_max, ub);
1,037✔
762
                }
1,037✔
763
            }
4,159✔
764
            if (!all_bounded) break;
3,152✔
765

766
            min_subset.push_back(symbolic::simplify(dim_min));
3,122✔
767
            max_subset.push_back(symbolic::simplify(dim_max));
3,122✔
768
        }
3,122✔
769

770
        if (!all_bounded) continue;
1,957✔
771

772
        MemoryTile tile{
1,927✔
773
            container, min_subset, max_subset, reference_layout, !layout_has_unbounded_first_dim(reference_layout)
1,927✔
774
        };
1,927✔
775
        result_groups.push_back({tile, group.group_memlets});
1,927✔
776
    }
1,927✔
777

778
    if (!result_groups.empty()) {
1,748✔
779
        tile_groups_.insert({{&scope, container}, std::move(result_groups)});
1,720✔
780
    }
1,720✔
781
}
1,748✔
782

783
const std::vector<MemoryTileGroup>* MemoryLayoutAnalysis::
784
    tile_groups(const structured_control_flow::ControlFlowNode& scope, const std::string& container) const {
6✔
785
    auto key = std::make_pair(&scope, container);
6✔
786
    auto it = tile_groups_.find(key);
6✔
787
    if (it == tile_groups_.end()) {
6✔
788
        return nullptr;
×
789
    }
×
790
    return &it->second;
6✔
791
}
6✔
792

793
const MemoryTileGroup* MemoryLayoutAnalysis::
794
    tile_group_for(const structured_control_flow::ControlFlowNode& scope, const data_flow::Memlet& memlet) const {
59✔
795
    // Find which container this memlet accesses
796
    auto acc_it = accesses_.find(&memlet);
59✔
797
    if (acc_it == accesses_.end()) {
59✔
UNCOV
798
        return nullptr;
×
UNCOV
799
    }
×
800
    auto& container = acc_it->second.container;
59✔
801

802
    auto key = std::make_pair(&scope, container);
59✔
803
    auto groups_it = tile_groups_.find(key);
59✔
804
    if (groups_it == tile_groups_.end()) {
59✔
805
        return nullptr;
×
806
    }
×
807

808
    for (const auto& group : groups_it->second) {
61✔
809
        for (const auto* m : group.memlets) {
70✔
810
            if (m == &memlet) {
70✔
811
                return &group;
59✔
812
            }
59✔
813
        }
70✔
814
    }
61✔
815
    return nullptr;
×
816
}
59✔
817

818
symbolic::MultiExpression MemoryTile::extents() const {
25✔
819
    symbolic::MultiExpression result;
25✔
820
    for (size_t d = 0; d < min_subset.size(); ++d) {
81✔
821
        auto ext =
56✔
822
            symbolic::simplify(symbolic::expand(symbolic::add(symbolic::sub(max_subset[d], min_subset[d]), symbolic::one())
56✔
823
            ));
56✔
824
        // Defensive: subset values are always proven-bounded, so this should never trigger
825
        // for row-major layouts. Guards future custom layouts whose subsets could pick up
826
        // the unbounded sentinel.
827
        if (depends_on_unbounded(ext)) {
56✔
NEW
828
            result.push_back(SymEngine::null);
×
829
        } else {
56✔
830
            result.push_back(ext);
56✔
831
        }
56✔
832
    }
56✔
833
    return result;
25✔
834
}
25✔
835

836
symbolic::MultiExpression MemoryTile::extents_approx() const {
82✔
837
    symbolic::MultiExpression result;
82✔
838
    for (size_t d = 0; d < min_subset.size(); ++d) {
219✔
839
        auto ext = symbolic::simplify(symbolic::expand(
137✔
840
            symbolic::overapproximate(symbolic::add(symbolic::sub(max_subset[d], min_subset[d]), symbolic::one()))
137✔
841
        ));
137✔
842
        if (depends_on_unbounded(ext)) {
137✔
NEW
843
            result.push_back(SymEngine::null);
×
844
        } else {
137✔
845
            result.push_back(ext);
137✔
846
        }
137✔
847
    }
137✔
848
    return result;
82✔
849
}
82✔
850

851
std::pair<symbolic::Expression, symbolic::Expression> MemoryTile::contiguous_range() const {
28✔
852
    auto& strides = layout.strides();
28✔
853
    auto first = layout.offset();
28✔
854
    auto last = layout.offset();
28✔
855
    for (size_t d = 0; d < min_subset.size(); ++d) {
84✔
856
        first = symbolic::add(first, symbolic::mul(strides[d], min_subset[d]));
56✔
857
        last = symbolic::add(last, symbolic::mul(strides[d], max_subset[d]));
56✔
858
    }
56✔
859
    first = symbolic::simplify(symbolic::expand(first));
28✔
860
    last = symbolic::simplify(symbolic::expand(last));
28✔
861
    // If either endpoint references the unbounded sentinel, the linear range is undefined
862
    // (e.g. a non-row-major layout whose stride references shape[0]). Report as unknown
863
    // rather than leaking the sentinel symbol to callers.
864
    if (depends_on_unbounded(first) || depends_on_unbounded(last)) {
28✔
NEW
865
        return {SymEngine::null, SymEngine::null};
×
NEW
866
    }
×
867
    return {first, last};
28✔
868
}
28✔
869

870
} // namespace analysis
871
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc