• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In
Build has been canceled!

daisytuner / docc / 27263464741

10 Jun 2026 08:27AM UTC coverage: 61.383% (+0.1%) from 61.275%
27263464741

push

github

web-flow
Merge pull request #741 from daisytuner/mem-access-range-replacement

replaces MemAccessRangeAnalysis with MemoryLayoutAnalysis

481 of 523 new or added lines in 12 files covered. (91.97%)

44 existing lines in 11 files now uncovered.

35745 of 58233 relevant lines covered (61.38%)

757.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.35
/opt/src/transformations/in_local_storage.cpp
1
#include "sdfg/transformations/in_local_storage.h"
2

3
#include <cstddef>
4
#include <functional>
5
#include <string>
6

7
#include "sdfg/analysis/memory_layout_analysis.h"
8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/analysis/users.h"
10
#include "sdfg/builder/structured_sdfg_builder.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
13
#include "sdfg/data_flow/memlet.h"
14
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
15
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
16
#include "sdfg/structured_control_flow/if_else.h"
17
#include "sdfg/structured_control_flow/sequence.h"
18
#include "sdfg/structured_control_flow/structured_loop.h"
19
#include "sdfg/symbolic/symbolic.h"
20
#include "sdfg/targets/gpu/gpu_schedule_type.h"
21
#include "sdfg/types/array.h"
22
#include "sdfg/types/pointer.h"
23
#include "sdfg/types/scalar.h"
24

25
namespace sdfg {
26
namespace transformations {
27

28
InLocalStorage::InLocalStorage(
29
    structured_control_flow::StructuredLoop& loop,
30
    const data_flow::AccessNode& access_node,
31
    const types::StorageType& storage_type
32
)
33
    : loop_(loop), access_node_(access_node), container_(access_node.data()), storage_type_(storage_type) {}
32✔
34

35
std::string InLocalStorage::name() const { return "InLocalStorage"; }
7✔
36

37
bool InLocalStorage::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
32✔
38
    auto& sdfg = builder.subject();
32✔
39
    auto& body = this->loop_.root();
32✔
40

41
    tile_info_ = TileInfo{};
32✔
42

43
    // Criterion: Container must exist
44
    if (!sdfg.exists(this->container_)) {
32✔
45
        return false;
×
46
    }
×
47

48
    auto& type = sdfg.type(this->container_);
32✔
49

50
    // Criterion: Container must be Array or Pointer (not Scalar)
51
    if (type.type_id() != types::TypeID::Pointer && type.type_id() != types::TypeID::Array) {
32✔
52
        return false;
1✔
53
    }
1✔
54

55
    // Criterion: Container must be used in the loop body
56
    auto& users = analysis_manager.get<analysis::Users>();
31✔
57
    analysis::UsersView body_users(users, body);
31✔
58
    if (body_users.uses(this->container_).empty()) {
31✔
59
        return false;
2✔
60
    }
2✔
61

62
    // Criterion: Container must be read-only within the loop (no writes)
63
    if (!body_users.writes(this->container_).empty()) {
29✔
64
        return false;
1✔
65
    }
1✔
66

67
    // Use MemoryLayoutAnalysis tile group API
68
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
28✔
69

70
    // Find a representative memlet from the access node to identify its group.
71
    // An access node may have multiple out-edges belonging to different tile
72
    // groups (e.g., A[i,k] and A[j,k]).  We iterate all out-edges and select
73
    // the first one whose tile group has provably integer extents (i.e., can
74
    // actually be applied).  This avoids picking a group with symbolic extents
75
    // when another group on the same node would succeed.
76
    // For GPU shared memory, extents may be symbolic until GPU block size
77
    // substitution, so we accept the first valid group unconditionally.
78
    const analysis::MemoryTileGroup* group = nullptr;
28✔
79
    auto& dfg = access_node_.get_parent();
28✔
80
    for (auto& memlet : dfg.out_edges(access_node_)) {
28✔
81
        auto* candidate = mla.tile_group_for(loop_, memlet);
28✔
82
        if (!candidate) continue;
28✔
83

84
        auto extents = candidate->tile.extents_approx();
28✔
85
        if (extents.empty()) continue;
28✔
86
        // Reject candidates with any unbounded-dependent extent (returned as null).
87
        bool has_null = false;
28✔
88
        for (auto& ext : extents) {
47✔
89
            if (ext.is_null()) {
47✔
NEW
90
                has_null = true;
×
NEW
91
                break;
×
NEW
92
            }
×
93
        }
47✔
94
        if (has_null) continue;
28✔
95

96
        if (storage_type_.is_nv_shared()) {
28✔
97
            // GPU path: accept first valid group (substitution happens later)
98
            group = candidate;
5✔
99
            break;
5✔
100
        }
5✔
101

102
        // CPU path: require provably integer extents
103
        bool all_integer = true;
23✔
104
        for (auto& ext : extents) {
38✔
105
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
38✔
106
                all_integer = false;
×
107
                break;
×
108
            }
×
109
        }
38✔
110
        if (all_integer) {
23✔
111
            group = candidate;
23✔
112
            break;
23✔
113
        }
23✔
114
    }
23✔
115
    if (!group) {
28✔
116
        return false;
×
117
    }
×
118

119
    auto& tile = group->tile;
28✔
120

121
    // Store group memlets for use in apply()
122
    group_memlets_.clear();
28✔
123
    group_memlets_.insert(group->memlets.begin(), group->memlets.end());
28✔
124

125
    // Get overapproximated extents (integer upper bounds)
126
    auto extents = tile.extents_approx();
28✔
127
    if (extents.empty()) {
28✔
128
        return false;
×
129
    }
×
130
    // Defensive: candidate filtering above already rejects unbounded-dependent extents,
131
    // but guard here too since downstream code dereferences these expressions.
132
    for (auto& ext : extents) {
47✔
133
        if (ext.is_null()) return false;
47✔
134
    }
47✔
135

136
    // Store tile info (before substitution, bases/strides stay symbolic)
137
    tile_info_.dimensions = extents;
28✔
138
    tile_info_.bases = tile.min_subset;
28✔
139
    tile_info_.strides = std::vector<symbolic::Expression>(tile.layout.strides().begin(), tile.layout.strides().end());
28✔
140
    tile_info_.offset = tile.layout.offset();
28✔
141

142
    // GPU shared memory: resolve symbolic extents using GPU block sizes and
143
    // require at least one cooperative dimension
144
    if (storage_type_.is_nv_shared()) {
28✔
145
        auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
5✔
146
        auto ancestors = scope_analysis.ancestor_scopes(&loop_);
5✔
147

148
        // Build substitution map: symbolic GPU map bounds → integer block sizes
149
        // E.g., Map condition "i < N" with block_size=32 → N=32
150
        for (auto* node : ancestors) {
23✔
151
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
23✔
152
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
9✔
153
                    continue;
×
154
                }
×
155
                auto block_size = gpu::gpu_block_size(ancestor_map->schedule_type());
9✔
156
                // Extract symbolic bound from condition: Lt(indvar, BOUND)
157
                auto condition = ancestor_map->condition();
9✔
158
                if (SymEngine::is_a<SymEngine::StrictLessThan>(*condition)) {
9✔
159
                    auto stl = SymEngine::rcp_static_cast<const SymEngine::StrictLessThan>(condition);
9✔
160
                    auto rhs = stl->get_args()[1];
9✔
161
                    auto iter_count = symbolic::sub(rhs, ancestor_map->init());
9✔
162
                    if (!SymEngine::is_a<SymEngine::Integer>(*iter_count)) {
9✔
163
                        // Symbolic bound — substitute with block size in extents and bases
164
                        for (auto& ext : tile_info_.dimensions) {
16✔
165
                            ext = symbolic::simplify(symbolic::subs(ext, iter_count, block_size));
16✔
166
                        }
16✔
167
                        for (auto& base : tile_info_.bases) {
16✔
168
                            base = symbolic::simplify(symbolic::subs(base, iter_count, block_size));
16✔
169
                        }
16✔
170
                    }
9✔
171
                }
9✔
172
            }
9✔
173
        }
23✔
174

175
        // Also resolve the loop's own bound if symbolic and matches a block size
176
        // E.g., For k = 0..K where K is a parameter — check if K can be resolved
177
        // from any GPU ancestor map
178
        // (Already handled above: if K appears as a GPU map bound, it's substituted)
179

180
        // Criterion: All extents must now be provably integer
181
        for (auto& ext : tile_info_.dimensions) {
9✔
182
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
9✔
183
                return false;
2✔
184
            }
2✔
185
        }
9✔
186

187
        // Criterion: At least one cooperative dimension
188
        bool has_cooperative_dim = false;
3✔
189
        for (auto* node : ancestors) {
6✔
190
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
6✔
191
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
3✔
192
                    continue;
×
193
                }
×
194
                // A GPU dim is cooperative if its indvar does NOT appear in any tile base
195
                bool appears_in_bases = false;
3✔
196
                for (auto& base : tile_info_.bases) {
5✔
197
                    if (symbolic::uses(base, ancestor_map->indvar())) {
5✔
198
                        appears_in_bases = true;
×
199
                        break;
×
200
                    }
×
201
                }
5✔
202
                if (!appears_in_bases) {
3✔
203
                    has_cooperative_dim = true;
3✔
204
                    break;
3✔
205
                }
3✔
206
            }
3✔
207
        }
6✔
208
        if (!has_cooperative_dim) {
3✔
209
            return false;
×
210
        }
×
211
    } else {
23✔
212
        // CPU path: All extents must be provably integer
213
        for (auto& ext : tile_info_.dimensions) {
38✔
214
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
38✔
215
                return false;
×
216
            }
×
217
        }
38✔
218
    }
23✔
219

220
    return true;
26✔
221
}
28✔
222

223
void InLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
18✔
224
    auto& sdfg = builder.subject();
18✔
225
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
18✔
226

227
    auto parent_node = scope_analysis.parent_scope(&loop_);
18✔
228
    auto parent = dynamic_cast<structured_control_flow::Sequence*>(parent_node);
18✔
229
    if (!parent) {
18✔
230
        throw InvalidSDFGException("InLocalStorage: Parent of loop must be a Sequence!");
×
231
    }
×
232

233
    // Get type information
234
    auto& type = sdfg.type(this->container_);
18✔
235
    types::Scalar scalar_type(type.primitive_type());
18✔
236

237
    // Create local buffer name
238
    local_name_ = builder.find_new_name("__daisy_in_local_storage_" + this->container_);
18✔
239

240
    // Collect varying dimensions (extent > 1) and compute buffer layout
241
    std::vector<size_t> varying_dims;
18✔
242
    std::vector<symbolic::Expression> dim_sizes;
18✔
243
    for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
49✔
244
        auto& dim_size = tile_info_.dimensions.at(d);
31✔
245
        if (!symbolic::eq(dim_size, symbolic::integer(1))) {
31✔
246
            varying_dims.push_back(d);
24✔
247
            dim_sizes.push_back(dim_size);
24✔
248
        }
24✔
249
    }
31✔
250

251
    // Compute total buffer size
252
    symbolic::Expression total_size = symbolic::integer(1);
18✔
253
    for (auto& ds : dim_sizes) {
24✔
254
        total_size = symbolic::mul(total_size, ds);
24✔
255
    }
24✔
256

257
    // Helper: build linearized local index from per-dimension symbolic expressions
258
    auto linearize_exprs = [&](const std::vector<symbolic::Expression>& indices) -> symbolic::Expression {
37✔
259
        symbolic::Expression linear_idx = symbolic::integer(0);
37✔
260
        symbolic::Expression stride = symbolic::integer(1);
37✔
261
        for (int i = indices.size() - 1; i >= 0; i--) {
90✔
262
            linear_idx = symbolic::add(linear_idx, symbolic::mul(indices[i], stride));
53✔
263
            stride = symbolic::mul(stride, dim_sizes[i]);
53✔
264
        }
53✔
265
        return linear_idx;
37✔
266
    };
37✔
267

268
    // Helper: build linearized local index from per-dimension indvars (symbols)
269
    auto linearize = [&](const std::vector<symbolic::Symbol>& indvars) -> symbolic::Expression {
18✔
270
        std::vector<symbolic::Expression> exprs(indvars.begin(), indvars.end());
15✔
271
        return linearize_exprs(exprs);
15✔
272
    };
15✔
273

274
    // Helper: build source subset (base[d] + copy_indvar[d]) for original container
275
    bool is_pointer = (type.type_id() == types::TypeID::Pointer);
18✔
276
    auto build_original_subset = [&](const std::vector<symbolic::Expression>& copy_indices) -> data_flow::Subset {
18✔
277
        std::vector<symbolic::Expression> full_indices;
18✔
278
        size_t var_idx = 0;
18✔
279
        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
49✔
280
            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
31✔
281
                full_indices.push_back(symbolic::add(tile_info_.bases.at(d), copy_indices.at(var_idx++)));
24✔
282
            } else {
24✔
283
                full_indices.push_back(tile_info_.bases.at(d));
7✔
284
            }
7✔
285
        }
31✔
286

287
        if (is_pointer) {
18✔
288
            symbolic::Expression linear = tile_info_.offset;
18✔
289
            for (size_t d = 0; d < full_indices.size(); d++) {
49✔
290
                linear = symbolic::add(linear, symbolic::mul(tile_info_.strides.at(d), full_indices.at(d)));
31✔
291
            }
31✔
292
            return {linear};
18✔
293
        } else {
18✔
294
            return data_flow::Subset(full_indices.begin(), full_indices.end());
×
295
        }
×
296
    };
18✔
297

298
    // ==================================================================
299
    // Branch: GPU cooperative path vs CPU sequential path
300
    // ==================================================================
301
    if (storage_type_.is_nv_shared()) {
18✔
302
        // ============================================================
303
        // GPU COOPERATIVE PATH
304
        // ============================================================
305
        auto ancestors = scope_analysis.ancestor_scopes(&loop_);
3✔
306

307
        // Collect cooperative GPU dimensions (indvar not in tile bases)
308
        struct CoopDim {
3✔
309
            symbolic::Symbol indvar;
3✔
310
            symbolic::Integer block_size;
3✔
311
            gpu::GPUDimension dimension;
3✔
312
        };
3✔
313
        std::vector<CoopDim> coop_dims;
3✔
314

315
        for (auto* node : ancestors) {
15✔
316
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
15✔
317
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
6✔
318
                    continue;
×
319
                }
×
320
                bool appears_in_bases = false;
6✔
321
                for (auto& base : tile_info_.bases) {
8✔
322
                    if (symbolic::uses(base, ancestor_map->indvar())) {
8✔
323
                        appears_in_bases = true;
2✔
324
                        break;
2✔
325
                    }
2✔
326
                }
8✔
327
                if (!appears_in_bases) {
6✔
328
                    coop_dims.push_back(
4✔
329
                        {ancestor_map->indvar(),
4✔
330
                         gpu::gpu_block_size(ancestor_map->schedule_type()),
4✔
331
                         gpu::gpu_dimension(ancestor_map->schedule_type())}
4✔
332
                    );
4✔
333
                }
4✔
334
            }
6✔
335
        }
15✔
336

337
        // Compute total cooperative thread count
338
        symbolic::Expression total_coop_threads = symbolic::integer(1);
3✔
339
        for (auto& cd : coop_dims) {
4✔
340
            total_coop_threads = symbolic::mul(total_coop_threads, cd.block_size);
4✔
341
        }
4✔
342

343
        // Create the local buffer with NV_Shared storage
344
        types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
3✔
345
        builder.add_container(local_name_, buffer_type);
3✔
346

347
        // Emit: barrier → guarded cooperative copy → barrier → loop
348
        // 1. Barrier before copy
349
        auto& barrier_block1 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
3✔
350
        builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block1, {});
3✔
351

352
        // 2. Cooperative copy with if_else guard
353
        // Flatten cooperative thread index: coop_flat = sum(indvar[i] * product(block_size[j] for j>i))
354
        symbolic::Expression coop_flat = symbolic::integer(0);
3✔
355
        symbolic::Expression coop_stride = symbolic::integer(1);
3✔
356
        for (int i = coop_dims.size() - 1; i >= 0; i--) {
7✔
357
            coop_flat = symbolic::add(coop_flat, symbolic::mul(coop_dims[i].indvar, coop_stride));
4✔
358
            coop_stride = symbolic::mul(coop_stride, coop_dims[i].block_size);
4✔
359
        }
4✔
360

361
        // Each thread loads elements strided by total_coop_threads
362
        // Thread t loads elements: t, t + total_threads, t + 2*total_threads, ...
363
        // We emit a loop: for (idx = coop_flat; idx < total_size; idx += total_coop_threads)
364
        auto idx_name = builder.find_new_name("__daisy_ils_coop_" + this->container_);
3✔
365
        types::Scalar idx_type(types::PrimitiveType::UInt64);
3✔
366
        builder.add_container(idx_name, idx_type);
3✔
367
        auto idx_var = symbolic::symbol(idx_name);
3✔
368

369
        auto copy_init = coop_flat;
3✔
370
        auto copy_condition = symbolic::Lt(idx_var, total_size);
3✔
371
        auto copy_update = symbolic::add(idx_var, total_coop_threads);
3✔
372

373
        auto& copy_loop = builder.add_map_before(
3✔
374
            *parent,
3✔
375
            loop_,
3✔
376
            idx_var,
3✔
377
            copy_condition,
3✔
378
            copy_init,
3✔
379
            copy_update,
3✔
380
            structured_control_flow::ScheduleType_Sequential::create(),
3✔
381
            {},
3✔
382
            loop_.debug_info()
3✔
383
        );
3✔
384

385
        // Decompose flat idx back into per-dimension indices for source subset
386
        // idx maps to varying_dims in row-major order
387
        auto& copy_scope = copy_loop.root();
3✔
388
        auto& copy_block = builder.add_block(copy_scope);
3✔
389
        auto& copy_src = builder.add_access(copy_block, this->container_);
3✔
390
        auto& copy_dst = builder.add_access(copy_block, local_name_);
3✔
391
        auto& copy_tasklet = builder.add_tasklet(copy_block, data_flow::TaskletCode::assign, "_out", {"_in"});
3✔
392

393
        // Decompose idx_var into per-dim indices
394
        std::vector<symbolic::Expression> copy_indices;
3✔
395
        symbolic::Expression remainder = idx_var;
3✔
396
        for (size_t i = 0; i < dim_sizes.size(); i++) {
6✔
397
            if (i < dim_sizes.size() - 1) {
3✔
398
                // integer division: idx / (product of remaining dims)
399
                symbolic::Expression divisor = symbolic::integer(1);
×
400
                for (size_t j = i + 1; j < dim_sizes.size(); j++) {
×
401
                    divisor = symbolic::mul(divisor, dim_sizes[j]);
×
402
                }
×
403
                auto quotient = symbolic::div(remainder, divisor);
×
404
                copy_indices.push_back(quotient);
×
405
                remainder = symbolic::mod(remainder, divisor);
×
406
            } else {
3✔
407
                copy_indices.push_back(remainder);
3✔
408
            }
3✔
409
        }
3✔
410

411
        auto copy_src_subset = build_original_subset(copy_indices);
3✔
412
        data_flow::Subset copy_dst_subset = {idx_var};
3✔
413

414
        builder.add_computational_memlet(copy_block, copy_src, copy_tasklet, "_in", copy_src_subset, type);
3✔
415
        builder.add_computational_memlet(copy_block, copy_tasklet, "_out", copy_dst, copy_dst_subset, buffer_type);
3✔
416

417
        // 3. Barrier after copy
418
        auto& barrier_block2 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
3✔
419
        builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block2, {});
3✔
420
    } else {
15✔
421
        // ============================================================
422
        // CPU SEQUENTIAL PATH
423
        // ============================================================
424
        // Create the local buffer with specified storage type
425
        types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
15✔
426
        builder.add_container(local_name_, buffer_type);
15✔
427

428
        std::vector<symbolic::Symbol> copy_indvars;
15✔
429
        structured_control_flow::Sequence* copy_scope =
15✔
430
            &builder.add_sequence_before(*parent, loop_, {}, loop_.debug_info());
15✔
431
        for (size_t i = 0; i < varying_dims.size(); i++) {
36✔
432
            size_t d = varying_dims[i];
21✔
433
            auto indvar_name = builder.find_new_name("__daisy_ils_" + this->container_ + "_d" + std::to_string(d));
21✔
434
            types::Scalar indvar_type(types::PrimitiveType::UInt64);
21✔
435
            builder.add_container(indvar_name, indvar_type);
21✔
436
            auto indvar = symbolic::symbol(indvar_name);
21✔
437
            copy_indvars.push_back(indvar);
21✔
438

439
            auto init = symbolic::integer(0);
21✔
440
            auto condition = symbolic::Lt(indvar, dim_sizes[i]);
21✔
441
            auto update = symbolic::add(indvar, symbolic::integer(1));
21✔
442

443
            auto& copy_loop = builder.add_map(
21✔
444
                *copy_scope,
21✔
445
                indvar,
21✔
446
                condition,
21✔
447
                init,
21✔
448
                update,
21✔
449
                structured_control_flow::ScheduleType_Sequential::create(),
21✔
450
                {},
21✔
451
                loop_.debug_info()
21✔
452
            );
21✔
453
            copy_scope = &copy_loop.root();
21✔
454
        }
21✔
455

456
        // Create copy block
457
        auto& copy_block = builder.add_block(*copy_scope);
15✔
458
        auto& copy_src = builder.add_access(copy_block, this->container_);
15✔
459
        auto& copy_dst = builder.add_access(copy_block, local_name_);
15✔
460
        auto& copy_tasklet = builder.add_tasklet(copy_block, data_flow::TaskletCode::assign, "_out", {"_in"});
15✔
461

462
        std::vector<symbolic::Expression> copy_exprs(copy_indvars.begin(), copy_indvars.end());
15✔
463
        auto copy_src_subset = build_original_subset(copy_exprs);
15✔
464
        data_flow::Subset copy_dst_subset = {linearize(copy_indvars)};
15✔
465

466
        builder.add_computational_memlet(copy_block, copy_src, copy_tasklet, "_in", copy_src_subset, type);
15✔
467
        types::Array buffer_type_ref(storage_type_, 0, {}, scalar_type, total_size);
15✔
468
        builder.add_computational_memlet(copy_block, copy_tasklet, "_out", copy_dst, copy_dst_subset, buffer_type_ref);
15✔
469
    }
15✔
470

471
    // ==================================================================
472
    // Update accesses in the main loop to use the local buffer
473
    // ==================================================================
474
    types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
18✔
475
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
18✔
476

477
    // Recursive helper to traverse all blocks in the loop body
478
    std::function<void(structured_control_flow::ControlFlowNode&)> rewrite_accesses;
18✔
479
    rewrite_accesses = [&](structured_control_flow::ControlFlowNode& node) {
71✔
480
        if (auto* block = dynamic_cast<structured_control_flow::Block*>(&node)) {
71✔
481
            auto& dfg = block->dataflow();
27✔
482

483
            // Collect access nodes to process (avoid iterator invalidation)
484
            std::vector<data_flow::AccessNode*> access_nodes;
27✔
485
            for (auto* access_node : dfg.data_nodes()) {
77✔
486
                if (access_node->data() == this->container_) {
77✔
487
                    access_nodes.push_back(access_node);
24✔
488
                }
24✔
489
            }
77✔
490

491
            for (auto* access : access_nodes) {
27✔
492
                // Classify memlets: group vs non-group
493
                struct MemletRewrite {
24✔
494
                    data_flow::Memlet* memlet;
24✔
495
                    data_flow::Subset local_subset;
24✔
496
                    bool is_outgoing;
24✔
497
                };
24✔
498
                std::vector<MemletRewrite> group_rewrites;
24✔
499
                bool all_in_group = true;
24✔
500

501
                for (auto& memlet : dfg.out_edges(*access)) {
24✔
502
                    if (group_memlets_.count(&memlet) == 0) {
24✔
503
                        all_in_group = false;
2✔
504
                        continue;
2✔
505
                    }
2✔
506
                    auto* acc = mla.access(memlet);
22✔
507
                    if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
22✔
508
                        std::vector<symbolic::Expression> local_indices;
22✔
509
                        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
61✔
510
                            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
39✔
511
                                local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
32✔
512
                            }
32✔
513
                        }
39✔
514
                        symbolic::Expression linear_idx = linearize_exprs(local_indices);
22✔
515
                        group_rewrites.push_back({&memlet, {linear_idx}, true});
22✔
516
                    }
22✔
517
                }
22✔
518
                for (auto& memlet : dfg.in_edges(*access)) {
24✔
519
                    if (group_memlets_.count(&memlet) == 0) {
×
520
                        all_in_group = false;
×
521
                        continue;
×
522
                    }
×
523
                    auto* acc = mla.access(memlet);
×
524
                    if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
×
525
                        std::vector<symbolic::Expression> local_indices;
×
526
                        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
×
527
                            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
×
528
                                local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
×
529
                            }
×
530
                        }
×
531
                        symbolic::Expression linear_idx = linearize_exprs(local_indices);
×
532
                        group_rewrites.push_back({&memlet, {linear_idx}, false});
×
533
                    }
×
534
                }
×
535

536
                if (group_rewrites.empty()) continue;
24✔
537

538
                if (all_in_group) {
22✔
539
                    // Simple case: all memlets in group → rewrite in-place and rename
540
                    for (auto& rw : group_rewrites) {
22✔
541
                        rw.memlet->set_subset(rw.local_subset);
22✔
542
                        rw.memlet->set_base_type(buffer_type);
22✔
543
                    }
22✔
544
                    access->data(local_name_);
22✔
545
                } else {
22✔
546
                    // Mixed case: split — create new local access node, redirect group memlets
547
                    auto& local_access = builder.add_access(*block, local_name_);
×
548
                    for (auto& rw : group_rewrites) {
×
549
                        if (rw.is_outgoing) {
×
550
                            // outgoing: access→tasklet  →  local_access→tasklet
551
                            auto& dst_node = rw.memlet->dst();
×
552
                            auto dst_conn = rw.memlet->dst_conn();
×
553
                            builder.remove_memlet(*block, *rw.memlet);
×
554
                            builder.add_memlet(
×
555
                                *block, local_access, "void", dst_node, dst_conn, rw.local_subset, buffer_type, {}
×
556
                            );
×
557
                        } else {
×
558
                            // incoming: tasklet→access  →  tasklet→local_access
559
                            auto& src_node = rw.memlet->src();
×
560
                            auto src_conn = rw.memlet->src_conn();
×
561
                            builder.remove_memlet(*block, *rw.memlet);
×
562
                            builder.add_memlet(
×
563
                                *block, src_node, src_conn, local_access, "void", rw.local_subset, buffer_type, {}
×
564
                            );
×
565
                        }
×
566
                    }
×
567
                }
×
568
            }
22✔
569
        } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
44✔
570
            for (size_t i = 0; i < seq->size(); i++) {
71✔
571
                rewrite_accesses(seq->at(i).first);
40✔
572
            }
40✔
573
        } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
31✔
574
            rewrite_accesses(loop->root());
13✔
575
        } else if (auto* if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
13✔
576
            for (size_t i = 0; i < if_else->size(); i++) {
×
577
                rewrite_accesses(if_else->at(i).first);
×
578
            }
×
579
        }
×
580
    };
71✔
581
    rewrite_accesses(loop_.root());
18✔
582

583
    // Cleanup
584
    analysis_manager.invalidate_all();
18✔
585

586
    passes::SequenceFusion sf_pass;
18✔
587
    passes::DeadCFGElimination dce_pass;
18✔
588
    bool applies = false;
18✔
589
    do {
33✔
590
        applies = false;
33✔
591
        applies |= dce_pass.run(builder, analysis_manager);
33✔
592
        applies |= sf_pass.run(builder, analysis_manager);
33✔
593
    } while (applies);
33✔
594
}
18✔
595

596
void InLocalStorage::to_json(nlohmann::json& j) const {
6✔
597
    std::string loop_type;
6✔
598
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
6✔
599
        loop_type = "for";
6✔
600
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
6✔
601
        loop_type = "map";
×
602
    } else {
×
603
        throw std::runtime_error("Unsupported loop type for serialization of loop: " + loop_.indvar()->get_name());
×
604
    }
×
605
    j["subgraph"] = {
6✔
606
        {"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}},
6✔
607
        {"1", {{"element_id", this->access_node_.element_id()}, {"type", "access_node"}}}
6✔
608
    };
6✔
609
    j["transformation_type"] = this->name();
6✔
610
    j["container"] = container_;
6✔
611
}
6✔
612

613
InLocalStorage InLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
614
    auto loop_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
615
    auto element = builder.find_element_by_id(loop_id);
1✔
616
    if (!element) {
1✔
617
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
618
    }
×
619
    auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(element);
1✔
620
    if (!loop) {
1✔
621
        throw InvalidTransformationDescriptionException(
×
622
            "Element with ID " + std::to_string(loop_id) + " is not a structured loop."
×
623
        );
×
624
    }
×
625

626
    auto access_node = dynamic_cast<
1✔
627
        data_flow::AccessNode*>(builder.find_element_by_id(desc.at("subgraph").at("1").at("element_id").get<size_t>()));
1✔
628
    if (!access_node) {
1✔
629
        throw InvalidTransformationDescriptionException(
×
630
            "Access node with ID " + std::to_string(desc.at("subgraph").at("1").at("element_id").get<size_t>()) +
×
631
            " not found."
×
632
        );
×
633
    }
×
634

635
    return InLocalStorage(*loop, *access_node);
1✔
636
}
1✔
637

638
} // namespace transformations
639
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc