• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 27232600826

09 Jun 2026 08:06PM UTC coverage: 61.329% (+0.05%) from 61.275%
27232600826

Pull #741

github

web-flow
Merge 56267e327 into aacd50c09
Pull Request #741: replaces MemAccessRangeAnalysis with MemoryLayoutAnalysis

384 of 424 new or added lines in 11 files covered. (90.57%)

36 existing lines in 9 files now uncovered.

35654 of 58136 relevant lines covered (61.33%)

763.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.47
/sdfg/src/analysis/memory_layout_analysis.cpp
1
#include "sdfg/analysis/memory_layout_analysis.h"
2

3
#include <algorithm>
4
#include <optional>
5
#include <set>
6
#include <unordered_set>
7

8
#include "sdfg/analysis/assumptions_analysis.h"
9
#include "sdfg/data_flow/access_node.h"
10
#include "sdfg/structured_control_flow/block.h"
11
#include "sdfg/structured_control_flow/if_else.h"
12
#include "sdfg/structured_control_flow/sequence.h"
13
#include "sdfg/structured_control_flow/structured_loop.h"
14
#include "sdfg/structured_control_flow/while.h"
15
#include "sdfg/symbolic/delinearization.h"
16
#include "sdfg/symbolic/extreme_values.h"
17
#include "sdfg/symbolic/polynomials.h"
18

19
namespace sdfg {
20
namespace analysis {
21

22
namespace {
23

24
// Sentinel symbol stored in shape[0] of a MemoryLayout when the leading dimension's
25
// extent is unknown (raw pointer accesses). The symbol never escapes the analysis:
26
// any expression that mentions it must be reported to the caller as `SymEngine::null`
27
// from the public size accessors (see `MemoryTile::extents()` etc.).
28
constexpr const char* kUnboundedName = "__unbounded__";
29

30
bool is_unbounded_dim(const symbolic::Expression& e) {
11,216✔
31
    if (e.is_null()) return false;
11,216✔
32
    if (!SymEngine::is_a<SymEngine::Symbol>(*e)) return false;
11,216✔
33
    return SymEngine::down_cast<const SymEngine::Symbol&>(*e).get_name() == kUnboundedName;
10,507✔
34
}
11,216✔
35

36
bool depends_on_unbounded(const symbolic::Expression& e) {
267✔
37
    if (e.is_null()) return false;
267✔
38
    for (const auto& a : symbolic::atoms(e)) {
267✔
39
        if (is_unbounded_dim(a)) return true;
158✔
40
    }
158✔
41
    return false;
267✔
42
}
267✔
43

44
bool layout_has_unbounded_first_dim(const MemoryLayout& layout) {
11,058✔
45
    const auto& shape = layout.shape();
11,058✔
46
    return !shape.empty() && is_unbounded_dim(shape[0]);
11,058✔
47
}
11,058✔
48

49
// Collect immediate child scopes (Sequence/IfElse/While/StructuredLoop) of a given
50
// scope that carry their own MemoryTile entries. Blocks are excluded because their
51
// per-memlet info is held in `accesses_`, not in `tiles_`/`tile_groups_`.
52
void collect_direct_child_scopes(
53
    structured_control_flow::ControlFlowNode& scope, std::set<const structured_control_flow::ControlFlowNode*>& result
54
) {
2,776✔
55
    if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&scope)) {
2,776✔
56
        result.insert(&loop->root());
891✔
57
    } else if (auto* w = dynamic_cast<structured_control_flow::While*>(&scope)) {
1,885✔
58
        result.insert(&w->root());
3✔
59
    } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&scope)) {
1,882✔
60
        for (size_t i = 0; i < seq->size(); i++) {
2,777✔
61
            auto& child = seq->at(i).first;
1,597✔
62
            if (!dynamic_cast<structured_control_flow::Block*>(&child)) {
1,597✔
63
                result.insert(&child);
914✔
64
            }
914✔
65
        }
1,597✔
66
    } else if (auto* ife = dynamic_cast<structured_control_flow::IfElse*>(&scope)) {
1,180✔
67
        for (size_t i = 0; i < ife->size(); i++) {
48✔
68
            result.insert(&ife->at(i).first);
29✔
69
        }
29✔
70
    }
19✔
71
}
2,776✔
72
} // namespace
73

74
MemoryLayoutAnalysis::MemoryLayoutAnalysis(StructuredSDFG& sdfg) : Analysis(sdfg) {}
256✔
75

76
void MemoryLayoutAnalysis::run(analysis::AnalysisManager& analysis_manager) {
256✔
77
    accesses_.clear();
256✔
78
    tiles_.clear();
256✔
79
    tile_groups_.clear();
256✔
80
    traverse(sdfg_.root(), analysis_manager);
256✔
81
}
256✔
82

83
void MemoryLayoutAnalysis::
84
    traverse(structured_control_flow::ControlFlowNode& node, analysis::AnalysisManager& analysis_manager) {
2,776✔
85
    // Snapshot current memlets and tile keys before recursing into the scope's children
86
    std::vector<const data_flow::Memlet*> memlets_before;
2,776✔
87
    memlets_before.reserve(accesses_.size());
2,776✔
88
    for (const auto& entry : accesses_) {
4,270✔
89
        memlets_before.push_back(entry.first);
4,270✔
90
    }
4,270✔
91
    std::set<std::pair<const structured_control_flow::ControlFlowNode*, std::string>> tiles_before;
2,776✔
92
    for (const auto& entry : tiles_) {
10,552✔
93
        tiles_before.insert(entry.first);
10,552✔
94
    }
10,552✔
95

96
    if (auto block = dynamic_cast<structured_control_flow::Block*>(&node)) {
2,776✔
97
        process_block(*block, analysis_manager);
683✔
98
    } else if (auto sequence = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
2,093✔
99
        for (size_t i = 0; i < sequence->size(); i++) {
2,777✔
100
            traverse(sequence->at(i).first, analysis_manager);
1,597✔
101
        }
1,597✔
102
    } else if (auto if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
1,180✔
103
        for (size_t i = 0; i < if_else->size(); i++) {
48✔
104
            traverse(if_else->at(i).first, analysis_manager);
29✔
105
        }
29✔
106
    } else if (auto while_stmt = dynamic_cast<structured_control_flow::While*>(&node)) {
894✔
107
        traverse(while_stmt->root(), analysis_manager);
3✔
108
    } else if (auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
891✔
109
        traverse(loop->root(), analysis_manager);
891✔
110
    } else {
891✔
111
        // Break, Continue, Return nodes don't contain blocks
NEW
112
        return;
×
UNCOV
113
    }
×
114

115
    // Merge tiles for containers accessed within this scope
116
    merge_scope_layouts(node, memlets_before, tiles_before, analysis_manager);
2,776✔
117
}
2,776✔
118

119
void MemoryLayoutAnalysis::
120
    process_block(structured_control_flow::Block& block, analysis::AnalysisManager& analysis_manager) {
683✔
121
    auto& assumptions_analysis = analysis_manager.get<AssumptionsAnalysis>();
683✔
122
    // Use trivial bounds (type-derived, e.g. unsigned >= 0) so delinearization
123
    // can soundly discharge non-negativity proof obligations on parameters.
124
    auto& assumptions = assumptions_analysis.get(block, /*include_trivial_bounds=*/true);
683✔
125

126
    auto& dfg = block.dataflow();
683✔
127
    for (auto& memlet : dfg.edges()) {
1,970✔
128
        const auto& subset = memlet.subset();
1,970✔
129
        if (subset.empty()) {
1,970✔
130
            continue;
567✔
131
        }
567✔
132

133
        // Get container name from the AccessNode (either src or dst)
134
        std::string container_name;
1,403✔
135
        if (auto* access = dynamic_cast<const data_flow::AccessNode*>(&memlet.src())) {
1,403✔
136
            container_name = access->data();
899✔
137
        } else if (auto* access = dynamic_cast<const data_flow::AccessNode*>(&memlet.dst())) {
899✔
138
            container_name = access->data();
504✔
139
        } else {
504✔
140
            continue; // Skip memlets without AccessNode
×
141
        }
×
142

143
        auto& base_type = memlet.base_type();
1,403✔
144
        switch (base_type.type_id()) {
1,403✔
145
            case types::TypeID::Scalar:
×
146
            case types::TypeID::Structure:
×
147
                continue; // Skip scalars and structures
×
148
            case types::TypeID::Tensor: {
×
149
                // Tensor types already contain layout information, so we can directly store it without delinearization
150
                auto& tensor_type = dynamic_cast<const types::Tensor&>(memlet.base_type());
×
151

152
                MemoryLayout layout(tensor_type.shape(), tensor_type.strides(), tensor_type.offset());
×
153
                MemoryAccess layout_info{container_name, subset, layout, true};
×
154
                this->accesses_.emplace(&memlet, layout_info);
×
155
                continue;
×
156
            }
×
157
            case types::TypeID::Array: {
440✔
158
                // Arrays are c-like stack array, so we can infer a simple row-major layout without needing
159
                // delinearization
160
                auto* array_type = dynamic_cast<const types::Array*>(&memlet.base_type());
440✔
161
                symbolic::MultiExpression shape = {array_type->num_elements()};
440✔
162
                while (array_type->element_type().type_id() == types::TypeID::Array) {
454✔
163
                    array_type = dynamic_cast<const types::Array*>(&array_type->element_type());
14✔
164
                }
14✔
165
                if (array_type->element_type().type_id() != types::TypeID::Scalar) {
440✔
166
                    continue; // Skip non-scalar arrays
×
167
                }
×
168

169
                MemoryLayout layout(shape);
440✔
170
                MemoryAccess layout_info{container_name, subset, layout, true};
440✔
171
                this->accesses_.emplace(&memlet, layout_info);
440✔
172
                continue;
440✔
173
            }
440✔
174
            case types::TypeID::Pointer: {
963✔
175
                // For pointers, we attempt to delinearize the access pattern to infer the layout based
176
                // on assumptions from loop bounds
177
                auto* pointer_type = dynamic_cast<const types::Pointer*>(&memlet.base_type());
963✔
178

179
                // Typed pointer to a (possibly multi-dim) fixed array of scalar,
180
                // e.g. `float (*A)[M]`. The pointer adds one unbounded leading
181
                // dimension; remaining dimensions come from the array shape. The
182
                // subset is expected to be one index per dimension — no
183
                // delinearization needed.
184
                if (pointer_type->pointee_type().type_id() == types::TypeID::Array) {
963✔
185
                    auto* array_type = dynamic_cast<const types::Array*>(&pointer_type->pointee_type());
550✔
186
                    symbolic::MultiExpression array_shape = {array_type->num_elements()};
550✔
187
                    while (array_type->element_type().type_id() == types::TypeID::Array) {
568✔
188
                        array_type = dynamic_cast<const types::Array*>(&array_type->element_type());
18✔
189
                        array_shape.push_back(array_type->num_elements());
18✔
190
                    }
18✔
191
                    if (array_type->element_type().type_id() != types::TypeID::Scalar) {
550✔
NEW
192
                        continue; // Skip non-scalar leaf
×
NEW
193
                    }
×
194
                    if (subset.size() != array_shape.size() + 1) {
550✔
NEW
195
                        continue; // Require one index per dimension (leading pointer + array dims)
×
NEW
196
                    }
×
197

198
                    symbolic::MultiExpression shape;
550✔
199
                    shape.push_back(symbolic::symbol("__unbounded__"));
550✔
200
                    for (const auto& dim : array_shape) {
568✔
201
                        shape.push_back(dim);
568✔
202
                    }
568✔
203

204
                    MemoryLayout layout(shape);
550✔
205
                    MemoryAccess layout_info{container_name, subset, layout, false};
550✔
206
                    this->accesses_.emplace(&memlet, layout_info);
550✔
207
                    continue;
550✔
208
                }
550✔
209

210
                if (pointer_type->pointee_type().type_id() != types::TypeID::Scalar) {
413✔
211
                    continue; // Skip non-scalar pointers
1✔
212
                }
1✔
213

214
                if (subset.size() != 1) {
412✔
215
                    continue; // Require full linearization
×
216
                }
×
217
                auto& linearized_expr = subset.at(0);
412✔
218

219
                auto result = symbolic::delinearize(linearized_expr, assumptions);
412✔
220
                if (!result.success) {
412✔
221
                    continue; // Delinearization failed, skip
24✔
222
                }
24✔
223

224
                // Delinearization returns N indices but only N-1 dimensions (from stride division)
225
                // The first dimension is unbounded - insert a placeholder that will be filled in by merge
226
                // Using a special symbol as placeholder for the first dimension
227
                symbolic::MultiExpression shape;
388✔
228
                shape.push_back(symbolic::symbol("__unbounded__"));
388✔
229
                for (const auto& dim : result.dimensions) {
388✔
230
                    shape.push_back(dim);
283✔
231
                }
283✔
232

233
                // Store symbolic indices and dimensions with unbounded first dimension
234
                // The merge phase will attempt to bound the first dimension using loop assumptions
235
                MemoryLayout layout(shape);
388✔
236
                MemoryAccess layout_info{container_name, result.indices, layout, false};
388✔
237
                this->accesses_.emplace(&memlet, layout_info);
388✔
238
                continue;
388✔
239
            }
412✔
240
            default:
×
241
                continue; // Skip unsupported types
×
242
        }
1,403✔
243
    }
1,403✔
244
}
683✔
245

246
const MemoryAccess* MemoryLayoutAnalysis::access(const data_flow::Memlet& memlet) const {
4,422✔
247
    auto layout_it = accesses_.find(&memlet);
4,422✔
248
    if (layout_it == accesses_.end()) {
4,422✔
249
        return nullptr;
6✔
250
    }
6✔
251
    return &layout_it->second;
4,416✔
252
}
4,422✔
253

254
void MemoryLayoutAnalysis::merge_scope_layouts(
255
    structured_control_flow::ControlFlowNode& scope,
256
    const std::vector<const data_flow::Memlet*>& memlets_before,
257
    const std::set<std::pair<const structured_control_flow::ControlFlowNode*, std::string>>& tiles_before,
258
    analysis::AnalysisManager& analysis_manager
259
) {
2,776✔
260
    // Convert memlets_before to a set for O(1) lookup
261
    std::unordered_set<const data_flow::Memlet*> before_set(memlets_before.begin(), memlets_before.end());
2,776✔
262

263
    // Group all new accesses by container
264
    std::unordered_map<std::string, std::vector<const data_flow::Memlet*>> all_container_groups;
2,776✔
265
    for (auto& [memlet_ptr, acc] : accesses_) {
14,322✔
266
        if (before_set.find(memlet_ptr) != before_set.end()) {
14,322✔
267
            continue;
4,270✔
268
        }
4,270✔
269
        all_container_groups[acc.container].push_back(memlet_ptr);
10,052✔
270
    }
10,052✔
271

272
    // Sort memlets within each container group by element_id for deterministic processing order
273
    for (auto& [container, memlets] : all_container_groups) {
5,426✔
274
        std::sort(memlets.begin(), memlets.end(), [](const data_flow::Memlet* a, const data_flow::Memlet* b) {
9,455✔
275
            return a->element_id() < b->element_id();
9,455✔
276
        });
9,455✔
277
    }
5,426✔
278

279
    auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&scope);
2,776✔
280

281
    auto& assumptions_analysis = analysis_manager.get<AssumptionsAnalysis>();
2,776✔
282
    // For loops, query at the loop body so the induction variable's bounds are visible.
283
    auto& assumption_node = loop ? static_cast<structured_control_flow::ControlFlowNode&>(loop->root()) : scope;
2,776✔
284
    // Trivial-bounds view: includes type-derived defaults (e.g. Int32 ∈ [INT_MIN, INT_MAX]).
285
    // Used as the assumption set passed to symbolic::minimum/maximum so that the
286
    // resolver has sign information for parameters.
287
    auto& assumptions = assumptions_analysis.get(assumption_node, /*include_trivial_bounds=*/true);
2,776✔
288
    // Narrowing-only view: excludes type-derived defaults. A symbol that only
289
    // appears here (or in neither) has at most its type's intrinsic range — any
290
    // min/max resolution would collapse to INT_MIN/INT_MAX-style numerics, which
291
    // is not a sound tile bound. We use this to decide whether to emit a tile.
292
    auto& narrowing_assumptions = assumptions_analysis.get(assumption_node, /*include_trivial_bounds=*/false);
2,776✔
293
    // Parameters of a scope can only be constant symbols (invariant within the
294
    // scope). SDFG-level read-only arguments are constant by construction; for
295
    // each scope-local entry, the constant() flag tells us whether the symbol
296
    // can be treated opaquely by the min/max resolver.
297
    symbolic::SymbolSet parameters = assumptions_analysis.parameters();
2,776✔
298
    for (auto& entry : assumptions) {
23,751✔
299
        if (loop && symbolic::eq(entry.first, loop->indvar())) {
23,751✔
300
            continue; // The induction variable is not a parameter of its own loop scope
891✔
301
        }
891✔
302
        if (entry.second.constant()) {
22,860✔
303
            parameters.insert(entry.first);
8,790✔
304
        }
8,790✔
305
    }
22,860✔
306

307
    // Soundness check: every free (non-parameter) symbol in an index expression
308
    // must have a narrowing assumption at this scope. Otherwise symbolic::minimum/
309
    // maximum would fall back to the symbol's type-default range and produce
310
    // bogus tile bounds (e.g. INT_MAX) that the rest of the pipeline would
311
    // silently consume as truth.
312
    auto has_narrowing = [&](const symbolic::Symbol& sym) -> bool {
2,776✔
313
        auto it = narrowing_assumptions.find(sym);
68✔
314
        if (it == narrowing_assumptions.end()) return false;
68✔
NEW
315
        return !it->second.lower_bounds().empty() || !it->second.upper_bounds().empty();
×
316
    };
68✔
317
    auto bounds_are_sound = [&](const symbolic::Expression& expr) -> bool {
16,696✔
318
        for (const auto& sym : symbolic::atoms(expr)) {
16,696✔
319
            if (parameters.contains(sym)) continue;
15,926✔
320
            if (loop && symbolic::eq(sym, loop->indvar())) continue;
3,003✔
321
            if (!has_narrowing(sym)) return false;
68✔
322
        }
68✔
323
        return true;
16,628✔
324
    };
16,696✔
325

326
    // Per-scope BoundAnalysis pair (tight + loose-fallback). Both instances
327
    // memoize their results, so repeated expressions and shared subterms
328
    // across all per-element lower/upper queries below hit the cache.
329
    symbolic::BoundAnalysis ba_tight(parameters, assumptions, true);
2,776✔
330
    symbolic::BoundAnalysis ba_loose(parameters, assumptions, false);
2,776✔
331
    auto bound_lb = [&](const symbolic::Expression& e) -> symbolic::Expression {
24,643✔
332
        auto r = ba_tight.lower_bound(e);
24,643✔
333
        if (r.is_null()) r = ba_loose.lower_bound(e);
24,643✔
334
        return r;
24,643✔
335
    };
24,643✔
336
    auto bound_ub = [&](const symbolic::Expression& e) -> symbolic::Expression {
17,521✔
337
        auto r = ba_tight.upper_bound(e);
17,521✔
338
        if (r.is_null()) r = ba_loose.upper_bound(e);
17,521✔
339
        return r;
17,521✔
340
    };
17,521✔
341

342
    // Find direct child scopes that may carry tiles for this scope
343
    std::set<const structured_control_flow::ControlFlowNode*> direct_child_scopes;
2,776✔
344
    collect_direct_child_scopes(scope, direct_child_scopes);
2,776✔
345

346
    for (auto& [container, memlets] : all_container_groups) {
5,426✔
347
        if (memlets.empty()) continue;
5,426✔
348

349
        std::vector<const MemoryTile*> inner_tiles;
5,426✔
350
        for (const auto* child : direct_child_scopes) {
5,426✔
351
            auto it = tiles_.find({child, container});
4,266✔
352
            if (it != tiles_.end()) {
4,266✔
353
                inner_tiles.push_back(&it->second);
3,712✔
354
            }
3,712✔
355
        }
4,266✔
356

357
        size_t ndims = 0;
5,426✔
358
        MemoryLayout reference_layout({symbolic::one()});
5,426✔
359
        // Separate min/max index lists to avoid unnecessary symbolic min/max
360
        std::vector<std::vector<symbolic::Expression>> min_indices;
5,426✔
361
        std::vector<std::vector<symbolic::Expression>> max_indices;
5,426✔
362

363
        if (!inner_tiles.empty()) {
5,426✔
364
            // Use inner tile min/max as representative values
365
            // Inner tiles have already resolved inner loop variables to their bounds
366
            ndims = inner_tiles[0]->min_subset.size();
3,514✔
367
            reference_layout = inner_tiles[0]->layout;
3,514✔
368
            min_indices.resize(ndims);
3,514✔
369
            max_indices.resize(ndims);
3,514✔
370

371
            for (const auto* tile : inner_tiles) {
3,712✔
372
                if (tile->min_subset.size() != ndims) continue;
3,712✔
373
                for (size_t d = 0; d < ndims; ++d) {
9,931✔
374
                    min_indices[d].push_back(tile->min_subset[d]);
6,219✔
375
                    max_indices[d].push_back(tile->max_subset[d]);
6,219✔
376
                }
6,219✔
377
            }
3,712✔
378

379
            // Propagate tile groups from child scopes upward using the same
380
            // base-partitioning logic: group inner groups by their min_subset
381
            // base at this scope level, then merge each partition.
382
            // Same direct-lookup optimization as above for inner_tiles.
383
            std::vector<const MemoryTileGroup*> inner_groups;
3,514✔
384
            for (const auto* child : direct_child_scopes) {
4,085✔
385
                auto it = tile_groups_.find({child, container});
4,085✔
386
                if (it == tile_groups_.end()) continue;
4,085✔
387
                for (const auto& g : it->second) {
4,193✔
388
                    inner_groups.push_back(&g);
4,193✔
389
                }
4,193✔
390
            }
3,712✔
391

392
            if (!inner_groups.empty()) {
3,514✔
393
                // Group inner groups by their base at this level
394
                struct OuterGroupEntry {
3,514✔
395
                    data_flow::Subset base;
3,514✔
396
                    std::vector<const MemoryTileGroup*> constituents;
3,514✔
397
                };
3,514✔
398
                std::vector<OuterGroupEntry> outer_partitions;
3,514✔
399

400
                for (const auto* ig : inner_groups) {
4,193✔
401
                    if (ig->tile.min_subset.size() != ndims) continue;
4,193✔
402

403
                    // Compute base: minimum of the inner group's min_subset per dim
404
                    data_flow::Subset base;
4,193✔
405
                    bool base_ok = true;
4,193✔
406
                    for (size_t d = 0; d < ndims; ++d) {
11,305✔
407
                        auto lb = bound_lb(ig->tile.min_subset[d]);
7,112✔
408
                        if (lb.is_null()) {
7,112✔
409
                            base_ok = false;
×
410
                            break;
×
411
                        }
×
412
                        base.push_back(symbolic::simplify(lb));
7,112✔
413
                    }
7,112✔
414
                    if (!base_ok) continue;
4,193✔
415

416
                    // Find matching partition (same base OR constant-offset base)
417
                    bool found = false;
4,193✔
418
                    for (auto& op : outer_partitions) {
4,193✔
419
                        bool const_diff = true;
872✔
420
                        for (size_t d = 0; d < ndims; ++d) {
1,538✔
421
                            auto diff = symbolic::simplify(symbolic::sub(base[d], op.base[d]));
1,236✔
422
                            if (!SymEngine::is_a<SymEngine::Integer>(*diff)) {
1,236✔
423
                                const_diff = false;
570✔
424
                                break;
570✔
425
                            }
570✔
426
                        }
1,236✔
427
                        if (const_diff) {
872✔
428
                            op.constituents.push_back(ig);
302✔
429
                            found = true;
302✔
430
                            break;
302✔
431
                        }
302✔
432
                    }
872✔
433
                    if (!found) {
4,193✔
434
                        outer_partitions.push_back({base, {ig}});
3,891✔
435
                    }
3,891✔
436
                }
4,193✔
437

438
                // For each partition, merge constituent tile bounds and collect memlets
439
                std::vector<MemoryTileGroup> result_groups;
3,514✔
440
                for (auto& op : outer_partitions) {
3,891✔
441
                    data_flow::Subset grp_min, grp_max;
3,891✔
442
                    bool grp_bounded = true;
3,891✔
443

444
                    for (size_t d = 0; d < ndims; ++d) {
10,462✔
445
                        symbolic::Expression d_min = SymEngine::null;
6,578✔
446
                        symbolic::Expression d_max = SymEngine::null;
6,578✔
447
                        for (const auto* c : op.constituents) {
7,110✔
448
                            auto lb = bound_lb(c->tile.min_subset[d]);
7,110✔
449
                            if (lb.is_null()) {
7,110✔
450
                                grp_bounded = false;
×
451
                                break;
×
452
                            }
×
453
                            d_min = d_min.is_null() ? lb : symbolic::min(d_min, lb);
7,110✔
454

455
                            auto ub = bound_ub(c->tile.max_subset[d]);
7,110✔
456
                            if (ub.is_null()) {
7,110✔
457
                                grp_bounded = false;
7✔
458
                                break;
7✔
459
                            }
7✔
460
                            d_max = d_max.is_null() ? ub : symbolic::max(d_max, ub);
7,103✔
461
                        }
7,103✔
462
                        if (!grp_bounded) break;
6,578✔
463
                        grp_min.push_back(symbolic::simplify(d_min));
6,571✔
464
                        grp_max.push_back(symbolic::simplify(d_max));
6,571✔
465
                    }
6,571✔
466
                    if (!grp_bounded) continue;
3,891✔
467

468
                    // Collect all memlets from constituent groups
469
                    std::vector<const data_flow::Memlet*> grp_memlets;
3,884✔
470
                    for (const auto* c : op.constituents) {
4,184✔
471
                        grp_memlets.insert(grp_memlets.end(), c->memlets.begin(), c->memlets.end());
4,184✔
472
                    }
4,184✔
473

474
                    MemoryTile grp_tile{
3,884✔
475
                        container, grp_min, grp_max, reference_layout, !layout_has_unbounded_first_dim(reference_layout)
3,884✔
476
                    };
3,884✔
477
                    result_groups.push_back({grp_tile, std::move(grp_memlets)});
3,884✔
478
                }
3,884✔
479

480
                if (!result_groups.empty()) {
3,514✔
481
                    tile_groups_.insert({{&scope, container}, std::move(result_groups)});
3,507✔
482
                }
3,507✔
483
            }
3,514✔
484
        } else {
3,514✔
485
            // Use raw access indices (no inner tiles available)
486
            auto& first_access = accesses_.at(memlets[0]);
1,912✔
487
            auto& reference_shape = first_access.layout.shape();
1,912✔
488
            ndims = reference_shape.size();
1,912✔
489
            reference_layout = first_access.layout;
1,912✔
490
            min_indices.resize(ndims);
1,912✔
491
            max_indices.resize(ndims);
1,912✔
492

493
            bool consistent = true;
1,912✔
494
            for (const auto* memlet_ptr : memlets) {
2,811✔
495
                auto& acc = accesses_.at(memlet_ptr);
2,811✔
496
                auto& shape = acc.layout.shape();
2,811✔
497

498
                if (shape.size() != ndims) {
2,811✔
499
                    consistent = false;
×
500
                    break;
×
501
                }
×
502
                // Check inner dimensions match (all except first which may be unbounded)
503
                for (size_t d = 1; d < ndims; ++d) {
4,511✔
504
                    if (!symbolic::eq(shape[d], reference_shape[d])) {
1,700✔
505
                        consistent = false;
×
506
                        break;
×
507
                    }
×
508
                }
1,700✔
509
                if (!consistent) break;
2,811✔
510

511
                // Collect indices for each dimension. In the raw-access path,
512
                // min and max indices are identical (both are `acc.subset[d]`),
513
                // so we only populate `min_indices[d]` and leave `max_indices[d]`
514
                // empty. The shared bound-resolution loop below detects this
515
                // alias case and fuses the two passes into one walk, halving
516
                // the soundness-check and outer-loop overhead.
517
                if (acc.subset.size() != ndims) {
2,811✔
518
                    consistent = false;
134✔
519
                    break;
134✔
520
                }
134✔
521
                for (size_t d = 0; d < ndims; ++d) {
7,054✔
522
                    min_indices[d].push_back(acc.subset[d]);
4,377✔
523
                }
4,377✔
524
            }
2,677✔
525

526
            if (!consistent) continue;
1,912✔
527

528
            // Compute tile groups for raw memlets
529
            compute_tile_groups(scope, container, memlets, reference_layout, ndims, ba_tight, ba_loose);
1,778✔
530
        }
1,778✔
531

532
        if (ndims == 0) continue;
5,292✔
533

534
        // Bound each candidate index individually via the reused BoundAnalysis
535
        // (memoized across all per-dim queries below) and combine the per-index
536
        // bounds into the dim's min/max via symbolic::min / symbolic::max.
537
        //
538
        // In the raw-access path, `max_indices[d]` is left empty as a signal
539
        // that it would otherwise alias `min_indices[d]`. When detected, we
540
        // fuse the two passes into a single walk so the soundness check and
541
        // outer-loop bookkeeping run once per index instead of twice.
542
        data_flow::Subset min_subset;
5,292✔
543
        data_flow::Subset max_subset;
5,292✔
544
        bool all_bounded = true;
5,292✔
545

546
        for (size_t d = 0; d < ndims; ++d) {
13,926✔
547
            symbolic::Expression dim_min = SymEngine::null;
8,707✔
548
            symbolic::Expression dim_max = SymEngine::null;
8,707✔
549
            const bool fused = max_indices[d].empty();
8,707✔
550

551
            for (const auto& idx : min_indices[d]) {
10,481✔
552
                if (!bounds_are_sound(idx)) {
10,481✔
553
                    all_bounded = false;
60✔
554
                    break;
60✔
555
                }
60✔
556
                auto lb = bound_lb(idx);
10,421✔
557
                if (lb.is_null()) {
10,421✔
558
                    all_bounded = false;
2✔
559
                    break;
2✔
560
                }
2✔
561
                dim_min = dim_min.is_null() ? lb : symbolic::min(dim_min, lb);
10,419✔
562

563
                if (fused) {
10,419✔
564
                    // Soundness already checked above for the same idx.
565
                    auto ub = bound_ub(idx);
4,204✔
566
                    if (ub.is_null()) {
4,204✔
NEW
567
                        all_bounded = false;
×
NEW
568
                        break;
×
NEW
569
                    }
×
570
                    dim_max = dim_max.is_null() ? ub : symbolic::max(dim_max, ub);
4,204✔
571
                }
4,204✔
572
            }
10,419✔
573
            if (!all_bounded) break;
8,707✔
574

575
            if (!fused) {
8,645✔
576
                for (const auto& idx : max_indices[d]) {
6,215✔
577
                    if (!bounds_are_sound(idx)) {
6,215✔
578
                        all_bounded = false;
8✔
579
                        break;
8✔
580
                    }
8✔
581
                    auto ub = bound_ub(idx);
6,207✔
582
                    if (ub.is_null()) {
6,207✔
583
                        all_bounded = false;
3✔
584
                        break;
3✔
585
                    }
3✔
586
                    dim_max = dim_max.is_null() ? ub : symbolic::max(dim_max, ub);
6,204✔
587
                }
6,204✔
588
                if (!all_bounded) break;
5,886✔
589
            }
5,886✔
590

591
            min_subset.push_back(symbolic::simplify(dim_min));
8,634✔
592
            max_subset.push_back(symbolic::simplify(dim_max));
8,634✔
593
        }
8,634✔
594

595
        if (!all_bounded) continue;
5,292✔
596

597
        // Store this scope's tile with the original memory layout. `first_dim_bounded`
598
        // mirrors the underlying layout: false whenever shape[0] is the unbounded sentinel.
599
        MemoryTile merged_tile{
5,219✔
600
            container, min_subset, max_subset, reference_layout, !layout_has_unbounded_first_dim(reference_layout)
5,219✔
601
        };
5,219✔
602
        tiles_.insert({{&scope, container}, merged_tile});
5,219✔
603
    }
5,219✔
604
}
2,776✔
605

606
const MemoryTile* MemoryLayoutAnalysis::
607
    tile(const structured_control_flow::ControlFlowNode& scope, const std::string& container) const {
59✔
608
    auto key = std::make_pair(&scope, container);
59✔
609
    auto it = tiles_.find(key);
59✔
610
    if (it == tiles_.end()) {
59✔
611
        return nullptr;
7✔
612
    }
7✔
613
    return &it->second;
52✔
614
}
59✔
615

616
void MemoryLayoutAnalysis::compute_tile_groups(
617
    structured_control_flow::ControlFlowNode& scope,
618
    const std::string& container,
619
    const std::vector<const data_flow::Memlet*>& memlets,
620
    const MemoryLayout& reference_layout,
621
    size_t ndims,
622
    symbolic::BoundAnalysis& ba_tight,
623
    symbolic::BoundAnalysis& ba_loose
624
) {
1,778✔
625
    // Bound helpers reusing the caller's BoundAnalysis instances. These share
626
    // their memoization cache with merge_scope_layouts for the same scope, so
627
    // expressions bounded there hit instantly when re-encountered here.
628
    auto bound_lb = [&](const symbolic::Expression& e) -> symbolic::Expression {
8,696✔
629
        auto r = ba_tight.lower_bound(e);
8,696✔
630
        if (r.is_null()) r = ba_loose.lower_bound(e);
8,696✔
631
        return r;
8,696✔
632
    };
8,696✔
633
    auto bound_ub = [&](const symbolic::Expression& e) -> symbolic::Expression {
4,275✔
634
        auto r = ba_tight.upper_bound(e);
4,275✔
635
        if (r.is_null()) r = ba_loose.upper_bound(e);
4,275✔
636
        return r;
4,275✔
637
    };
4,275✔
638

639
    // For each memlet, compute per-dimension base (minimum of index expression)
640
    // Group memlets whose bases are symbolically equal in all dimensions
641
    struct GroupEntry {
1,778✔
642
        data_flow::Subset base; // per-dim minimum
1,778✔
643
        std::vector<const data_flow::Memlet*> group_memlets;
1,778✔
644
    };
1,778✔
645

646
    std::vector<GroupEntry> groups;
1,778✔
647

648
    for (const auto* memlet_ptr : memlets) {
2,677✔
649
        auto& acc = accesses_.at(memlet_ptr);
2,677✔
650
        if (acc.subset.size() != ndims) continue;
2,677✔
651

652
        // Compute per-dimension base (minimum)
653
        data_flow::Subset base;
2,677✔
654
        bool base_ok = true;
2,677✔
655
        for (size_t d = 0; d < ndims; ++d) {
7,052✔
656
            auto lb = bound_lb(acc.subset[d]);
4,377✔
657
            if (lb.is_null()) {
4,377✔
658
                base_ok = false;
2✔
659
                break;
2✔
660
            }
2✔
661
            base.push_back(symbolic::simplify(lb));
4,375✔
662
        }
4,375✔
663
        if (!base_ok) continue;
2,677✔
664

665
        // Find existing group with same base
666
        bool found = false;
2,675✔
667
        for (auto& group : groups) {
2,675✔
668
            if (group.base.size() != ndims) continue;
1,017✔
669
            bool match = true;
1,017✔
670
            for (size_t d = 0; d < ndims; ++d) {
2,123✔
671
                if (!symbolic::eq(group.base[d], base[d])) {
1,545✔
672
                    match = false;
439✔
673
                    break;
439✔
674
                }
439✔
675
            }
1,545✔
676
            if (match) {
1,017✔
677
                group.group_memlets.push_back(memlet_ptr);
578✔
678
                found = true;
578✔
679
                break;
578✔
680
            }
578✔
681
        }
1,017✔
682
        if (!found) {
2,675✔
683
            groups.push_back({base, {memlet_ptr}});
2,097✔
684
        }
2,097✔
685
    }
2,675✔
686

687
    if (groups.empty()) return;
1,778✔
688

689
    // Merge groups whose bases differ only by integer constants.
690
    // E.g. stencil bases [i-1, j], [i, j], [i+1, j] should merge (constant offsets in dim0).
691
    // But SYR2K bases [i, 0] vs [j, 0] should NOT merge (symbolic difference).
692
    std::vector<GroupEntry> merged_groups;
1,776✔
693
    for (auto& group : groups) {
2,097✔
694
        bool merged = false;
2,097✔
695
        for (auto& existing : merged_groups) {
2,097✔
696
            bool const_diff = true;
342✔
697
            for (size_t d = 0; d < ndims; ++d) {
597✔
698
                auto diff = symbolic::simplify(symbolic::sub(group.base[d], existing.base[d]));
485✔
699
                if (!SymEngine::is_a<SymEngine::Integer>(*diff)) {
485✔
700
                    const_diff = false;
230✔
701
                    break;
230✔
702
                }
230✔
703
            }
485✔
704
            if (const_diff) {
342✔
705
                existing.group_memlets
112✔
706
                    .insert(existing.group_memlets.end(), group.group_memlets.begin(), group.group_memlets.end());
112✔
707
                merged = true;
112✔
708
                break;
112✔
709
            }
112✔
710
        }
342✔
711
        if (!merged) {
2,097✔
712
            merged_groups.push_back(std::move(group));
1,985✔
713
        }
1,985✔
714
    }
2,097✔
715

716
    // Compute tile for each merged group
717
    std::vector<MemoryTileGroup> result_groups;
1,776✔
718
    for (auto& group : merged_groups) {
1,985✔
719
        std::vector<std::vector<symbolic::Expression>> min_indices(ndims);
1,985✔
720
        std::vector<std::vector<symbolic::Expression>> max_indices(ndims);
1,985✔
721

722
        for (const auto* memlet_ptr : group.group_memlets) {
2,675✔
723
            auto& acc = accesses_.at(memlet_ptr);
2,675✔
724
            for (size_t d = 0; d < ndims; ++d) {
7,050✔
725
                min_indices[d].push_back(acc.subset[d]);
4,375✔
726
                max_indices[d].push_back(acc.subset[d]);
4,375✔
727
            }
4,375✔
728
        }
2,675✔
729

730
        data_flow::Subset min_subset;
1,985✔
731
        data_flow::Subset max_subset;
1,985✔
732
        bool all_bounded = true;
1,985✔
733

734
        for (size_t d = 0; d < ndims; ++d) {
5,181✔
735
            symbolic::Expression dim_min = SymEngine::null;
3,226✔
736
            symbolic::Expression dim_max = SymEngine::null;
3,226✔
737

738
            for (const auto& idx : min_indices[d]) {
4,319✔
739
                auto lb = bound_lb(idx);
4,319✔
740
                if (lb.is_null()) {
4,319✔
741
                    all_bounded = false;
×
742
                    break;
×
743
                }
×
744
                if (dim_min.is_null()) {
4,319✔
745
                    dim_min = lb;
3,226✔
746
                } else {
3,226✔
747
                    dim_min = symbolic::min(dim_min, lb);
1,093✔
748
                }
1,093✔
749
            }
4,319✔
750
            if (!all_bounded) break;
3,226✔
751

752
            for (const auto& idx : max_indices[d]) {
4,275✔
753
                auto ub = bound_ub(idx);
4,275✔
754
                if (ub.is_null()) {
4,275✔
755
                    all_bounded = false;
30✔
756
                    break;
30✔
757
                }
30✔
758
                if (dim_max.is_null()) {
4,245✔
759
                    dim_max = ub;
3,196✔
760
                } else {
3,196✔
761
                    dim_max = symbolic::max(dim_max, ub);
1,049✔
762
                }
1,049✔
763
            }
4,245✔
764
            if (!all_bounded) break;
3,226✔
765

766
            min_subset.push_back(symbolic::simplify(dim_min));
3,196✔
767
            max_subset.push_back(symbolic::simplify(dim_max));
3,196✔
768
        }
3,196✔
769

770
        if (!all_bounded) continue;
1,985✔
771

772
        MemoryTile tile{
1,955✔
773
            container, min_subset, max_subset, reference_layout, !layout_has_unbounded_first_dim(reference_layout)
1,955✔
774
        };
1,955✔
775
        result_groups.push_back({tile, group.group_memlets});
1,955✔
776
    }
1,955✔
777

778
    if (!result_groups.empty()) {
1,776✔
779
        tile_groups_.insert({{&scope, container}, std::move(result_groups)});
1,748✔
780
    }
1,748✔
781
}
1,776✔
782

783
const std::vector<MemoryTileGroup>* MemoryLayoutAnalysis::
784
    tile_groups(const structured_control_flow::ControlFlowNode& scope, const std::string& container) const {
6✔
785
    auto key = std::make_pair(&scope, container);
6✔
786
    auto it = tile_groups_.find(key);
6✔
787
    if (it == tile_groups_.end()) {
6✔
788
        return nullptr;
×
789
    }
×
790
    return &it->second;
6✔
791
}
6✔
792

793
const MemoryTileGroup* MemoryLayoutAnalysis::
794
    tile_group_for(const structured_control_flow::ControlFlowNode& scope, const data_flow::Memlet& memlet) const {
59✔
795
    // Find which container this memlet accesses
796
    auto acc_it = accesses_.find(&memlet);
59✔
797
    if (acc_it == accesses_.end()) {
59✔
UNCOV
798
        return nullptr;
×
UNCOV
799
    }
×
800
    auto& container = acc_it->second.container;
59✔
801

802
    auto key = std::make_pair(&scope, container);
59✔
803
    auto groups_it = tile_groups_.find(key);
59✔
804
    if (groups_it == tile_groups_.end()) {
59✔
805
        return nullptr;
×
806
    }
×
807

808
    for (const auto& group : groups_it->second) {
61✔
809
        for (const auto* m : group.memlets) {
72✔
810
            if (m == &memlet) {
72✔
811
                return &group;
59✔
812
            }
59✔
813
        }
72✔
814
    }
61✔
815
    return nullptr;
×
816
}
59✔
817

818
symbolic::MultiExpression MemoryTile::extents() const {
25✔
819
    symbolic::MultiExpression result;
25✔
820
    for (size_t d = 0; d < min_subset.size(); ++d) {
81✔
821
        auto ext =
56✔
822
            symbolic::simplify(symbolic::expand(symbolic::add(symbolic::sub(max_subset[d], min_subset[d]), symbolic::one())
56✔
823
            ));
56✔
824
        // Defensive: subset values are always proven-bounded, so this should never trigger
825
        // for row-major layouts. Guards future custom layouts whose subsets could pick up
826
        // the unbounded sentinel.
827
        if (depends_on_unbounded(ext)) {
56✔
NEW
828
            result.push_back(SymEngine::null);
×
829
        } else {
56✔
830
            result.push_back(ext);
56✔
831
        }
56✔
832
    }
56✔
833
    return result;
25✔
834
}
25✔
835

836
symbolic::MultiExpression MemoryTile::extents_approx() const {
82✔
837
    symbolic::MultiExpression result;
82✔
838
    for (size_t d = 0; d < min_subset.size(); ++d) {
219✔
839
        auto ext = symbolic::simplify(symbolic::expand(
137✔
840
            symbolic::overapproximate(symbolic::add(symbolic::sub(max_subset[d], min_subset[d]), symbolic::one()))
137✔
841
        ));
137✔
842
        if (depends_on_unbounded(ext)) {
137✔
NEW
843
            result.push_back(SymEngine::null);
×
844
        } else {
137✔
845
            result.push_back(ext);
137✔
846
        }
137✔
847
    }
137✔
848
    return result;
82✔
849
}
82✔
850

851
std::pair<symbolic::Expression, symbolic::Expression> MemoryTile::contiguous_range() const {
37✔
852
    auto& strides = layout.strides();
37✔
853
    auto first = layout.offset();
37✔
854
    auto last = layout.offset();
37✔
855
    for (size_t d = 0; d < min_subset.size(); ++d) {
110✔
856
        first = symbolic::add(first, symbolic::mul(strides[d], min_subset[d]));
73✔
857
        last = symbolic::add(last, symbolic::mul(strides[d], max_subset[d]));
73✔
858
    }
73✔
859
    first = symbolic::simplify(symbolic::expand(first));
37✔
860
    last = symbolic::simplify(symbolic::expand(last));
37✔
861
    // If either endpoint references the unbounded sentinel, the linear range is undefined
862
    // (e.g. a non-row-major layout whose stride references shape[0]). Report as unknown
863
    // rather than leaking the sentinel symbol to callers.
864
    if (depends_on_unbounded(first) || depends_on_unbounded(last)) {
37✔
NEW
865
        return {SymEngine::null, SymEngine::null};
×
NEW
866
    }
×
867
    return {first, last};
37✔
868
}
37✔
869

870
} // namespace analysis
871
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc