• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 27492724151

14 Jun 2026 08:04AM UTC coverage: 61.54% (+0.1%) from 61.42%
27492724151

push

github

web-flow
Extends test suites of local storage and loop transformations (#759)

* extends test suite for loop tiling

* extends in local storage test suite

* removes clean up passes from OLS transformation

* refactors ILS tests into inductive schema

* re-introduces read-only check in InLocalStorage

* refactors tests for ILS in multi-group cases

* refactors OLS tests

* use correct exception in OMPTransform deserialization

* adds tests for VectorizeTransform

* re-introduces pass-specific cleanup passes

3 of 5 new or added lines in 3 files covered. (60.0%)

2 existing lines in 1 file now uncovered.

36446 of 59223 relevant lines covered (61.54%)

1136.13 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.06
/opt/src/transformations/in_local_storage.cpp
1
#include "sdfg/transformations/in_local_storage.h"
2

3
#include <cstddef>
4
#include <functional>
5
#include <string>
6

7
#include "sdfg/analysis/memory_layout_analysis.h"
8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/analysis/users.h"
10
#include "sdfg/builder/structured_sdfg_builder.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
13
#include "sdfg/data_flow/memlet.h"
14
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
15
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
16
#include "sdfg/structured_control_flow/if_else.h"
17
#include "sdfg/structured_control_flow/sequence.h"
18
#include "sdfg/structured_control_flow/structured_loop.h"
19
#include "sdfg/symbolic/symbolic.h"
20
#include "sdfg/targets/gpu/gpu_schedule_type.h"
21
#include "sdfg/types/array.h"
22
#include "sdfg/types/pointer.h"
23
#include "sdfg/types/scalar.h"
24

25
namespace sdfg {
26
namespace transformations {
27

28
InLocalStorage::InLocalStorage(
29
    structured_control_flow::StructuredLoop& loop,
30
    const data_flow::AccessNode& access_node,
31
    const types::StorageType& storage_type
32
)
33
    : loop_(loop), access_node_(access_node), container_(access_node.data()), storage_type_(storage_type) {}
33✔
34

35
std::string InLocalStorage::name() const { return "InLocalStorage"; }
7✔
36

37
bool InLocalStorage::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
33✔
38
    auto& sdfg = builder.subject();
33✔
39
    auto& body = this->loop_.root();
33✔
40

41
    tile_info_ = TileInfo{};
33✔
42

43
    // Criterion: Container must exist and is pointer
44
    if (!sdfg.exists(this->container_)) {
33✔
45
        return false;
×
46
    }
×
47
    auto& type = sdfg.type(this->container_);
33✔
48
    if (type.type_id() != types::TypeID::Pointer) {
33✔
UNCOV
49
        return false;
×
UNCOV
50
    }
×
51

52
    // Criterion: Container must be used in the loop body
53
    auto& users = analysis_manager.get<analysis::Users>();
33✔
54
    analysis::UsersView body_users(users, body);
33✔
55
    if (body_users.uses(this->container_).empty()) {
33✔
56
        return false;
2✔
57
    }
2✔
58

59
    // Criterion: Container must be read-only within the loop (no writes)
60
    if (!body_users.writes(this->container_).empty()) {
31✔
61
        return false;
1✔
62
    }
1✔
63

64
    // Use MemoryLayoutAnalysis tile group API
65
    // Find a representative memlet from the access node to identify its group.
66
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
30✔
67
    const analysis::MemoryTileGroup* group = nullptr;
30✔
68
    auto& dfg = access_node_.get_parent();
30✔
69
    for (auto& memlet : dfg.out_edges(access_node_)) {
30✔
70
        auto* candidate = mla.tile_group_for(loop_, memlet);
30✔
71
        if (!candidate) {
30✔
72
            continue;
×
73
        }
×
74

75
        auto extents = candidate->tile.extents_approx();
30✔
76
        if (extents.empty()) {
30✔
77
            continue;
×
78
        }
×
79

80
        // Reject candidates with any unbounded-dependent extent (returned as null).
81
        bool has_null = false;
30✔
82
        for (auto& ext : extents) {
52✔
83
            if (ext.is_null()) {
52✔
84
                has_null = true;
×
85
                break;
×
86
            }
×
87
        }
52✔
88
        if (has_null) {
30✔
89
            continue;
×
90
        }
×
91

92
        // GPU path: accept first valid group (substitution happens later)
93
        if (storage_type_.is_nv_shared()) {
30✔
94
            group = candidate;
5✔
95
            break;
5✔
96
        }
5✔
97

98
        // CPU path: require provably integer extents
99
        bool all_integer = true;
25✔
100
        for (auto& ext : extents) {
43✔
101
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
43✔
102
                all_integer = false;
×
103
                break;
×
104
            }
×
105
        }
43✔
106
        if (all_integer) {
25✔
107
            group = candidate;
25✔
108
            break;
25✔
109
        }
25✔
110
    }
25✔
111
    if (!group) {
30✔
112
        return false;
×
113
    }
×
114

115
    auto& tile = group->tile;
30✔
116
    auto extents = tile.extents_approx();
30✔
117

118
    // Store group memlets for use in apply()
119
    group_memlets_.clear();
30✔
120
    group_memlets_.insert(group->memlets.begin(), group->memlets.end());
30✔
121

122
    // Store tile info (before substitution, bases/strides stay symbolic)
123
    tile_info_.dimensions = extents;
30✔
124
    tile_info_.bases = tile.min_subset;
30✔
125
    tile_info_.strides = std::vector<symbolic::Expression>(tile.layout.strides().begin(), tile.layout.strides().end());
30✔
126
    tile_info_.offset = tile.layout.offset();
30✔
127

128
    // GPU shared memory: resolve symbolic extents using GPU block sizes and
129
    // require at least one cooperative dimension
130
    if (storage_type_.is_nv_shared()) {
30✔
131
        auto ancestors = ControlFlowNode::parent_chain(loop_);
5✔
132

133
        // Build substitution map: symbolic GPU map bounds → integer block sizes
134
        // E.g., Map condition "i < N" with block_size=32 → N=32
135
        for (auto* node : ancestors) {
23✔
136
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
23✔
137
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
9✔
138
                    continue;
×
139
                }
×
140
                auto block_size = gpu::gpu_block_size(ancestor_map->schedule_type());
9✔
141
                // Extract symbolic bound from condition: Lt(indvar, BOUND)
142
                auto condition = ancestor_map->condition();
9✔
143
                if (SymEngine::is_a<SymEngine::StrictLessThan>(*condition)) {
9✔
144
                    auto stl = SymEngine::rcp_static_cast<const SymEngine::StrictLessThan>(condition);
9✔
145
                    auto rhs = stl->get_args()[1];
9✔
146
                    auto iter_count = symbolic::sub(rhs, ancestor_map->init());
9✔
147
                    if (!SymEngine::is_a<SymEngine::Integer>(*iter_count)) {
9✔
148
                        // Symbolic bound — substitute with block size in extents and bases
149
                        for (auto& ext : tile_info_.dimensions) {
16✔
150
                            ext = symbolic::simplify(symbolic::subs(ext, iter_count, block_size));
16✔
151
                        }
16✔
152
                        for (auto& base : tile_info_.bases) {
16✔
153
                            base = symbolic::simplify(symbolic::subs(base, iter_count, block_size));
16✔
154
                        }
16✔
155
                    }
9✔
156
                }
9✔
157
            }
9✔
158
        }
23✔
159

160
        // Also resolve the loop's own bound if symbolic and matches a block size
161
        // E.g., For k = 0..K where K is a parameter — check if K can be resolved
162
        // from any GPU ancestor map
163
        // (Already handled above: if K appears as a GPU map bound, it's substituted)
164

165
        // Criterion: All extents must now be provably integer
166
        for (auto& ext : tile_info_.dimensions) {
9✔
167
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
9✔
168
                return false;
2✔
169
            }
2✔
170
        }
9✔
171

172
        // Criterion: At least one cooperative dimension
173
        bool has_cooperative_dim = false;
3✔
174
        for (auto* node : ancestors) {
6✔
175
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
6✔
176
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
3✔
177
                    continue;
×
178
                }
×
179
                // A GPU dim is cooperative if its indvar does NOT appear in any tile base
180
                bool appears_in_bases = false;
3✔
181
                for (auto& base : tile_info_.bases) {
5✔
182
                    if (symbolic::uses(base, ancestor_map->indvar())) {
5✔
183
                        appears_in_bases = true;
×
184
                        break;
×
185
                    }
×
186
                }
5✔
187
                if (!appears_in_bases) {
3✔
188
                    has_cooperative_dim = true;
3✔
189
                    break;
3✔
190
                }
3✔
191
            }
3✔
192
        }
6✔
193
        if (!has_cooperative_dim) {
3✔
194
            return false;
×
195
        }
×
196
    }
3✔
197

198
    return true;
28✔
199
}
30✔
200

201
void InLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
21✔
202
    auto& sdfg = builder.subject();
21✔
203

204
    auto parent_node = loop_.get_parent();
21✔
205
    auto parent = dynamic_cast<structured_control_flow::Sequence*>(parent_node);
21✔
206
    if (!parent) {
21✔
207
        throw InvalidSDFGException("InLocalStorage: Parent of loop must be a Sequence!");
×
208
    }
×
209

210
    // We replace all relevant memlets with flat local indices
211
    // Thus, we now use a flat pointer to index into container
212
    // Remark: sdfg.type may return an opaque pointer, so use
213
    //         memlet instead
214
    auto* memlet = *group_memlets_.begin();
21✔
215
    types::Scalar scalar_type(memlet->base_type().primitive_type());
21✔
216
    types::Pointer pointer_type(scalar_type);
21✔
217

218
    // Create local buffer name
219
    local_name_ = builder.find_new_name("__daisy_in_local_storage_" + this->container_);
21✔
220

221
    // Collect varying dimensions (extent > 1) and compute buffer layout
222
    std::vector<size_t> varying_dims;
21✔
223
    std::vector<symbolic::Expression> dim_sizes;
21✔
224
    for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
58✔
225
        auto& dim_size = tile_info_.dimensions.at(d);
37✔
226
        if (!symbolic::eq(dim_size, symbolic::integer(1))) {
37✔
227
            varying_dims.push_back(d);
28✔
228
            dim_sizes.push_back(dim_size);
28✔
229
        }
28✔
230
    }
37✔
231

232
    // Compute total buffer size
233
    symbolic::Expression total_size = symbolic::integer(1);
21✔
234
    for (auto& ds : dim_sizes) {
28✔
235
        total_size = symbolic::mul(total_size, ds);
28✔
236
    }
28✔
237

238
    // Helper: build linearized local index from per-dimension symbolic expressions
239
    auto linearize_exprs = [&](const std::vector<symbolic::Expression>& indices) -> symbolic::Expression {
43✔
240
        symbolic::Expression linear_idx = symbolic::integer(0);
43✔
241
        symbolic::Expression stride = symbolic::integer(1);
43✔
242
        for (int i = indices.size() - 1; i >= 0; i--) {
104✔
243
            linear_idx = symbolic::add(linear_idx, symbolic::mul(indices[i], stride));
61✔
244
            stride = symbolic::mul(stride, dim_sizes[i]);
61✔
245
        }
61✔
246
        return linear_idx;
43✔
247
    };
43✔
248

249
    // Helper: build linearized local index from per-dimension indvars (symbols)
250
    auto linearize = [&](const std::vector<symbolic::Symbol>& indvars) -> symbolic::Expression {
21✔
251
        std::vector<symbolic::Expression> exprs(indvars.begin(), indvars.end());
18✔
252
        return linearize_exprs(exprs);
18✔
253
    };
18✔
254

255
    // Helper: build source subset (base[d] + copy_indvar[d]) for original container
256
    auto build_original_subset = [&](const std::vector<symbolic::Expression>& copy_indices) -> data_flow::Subset {
21✔
257
        std::vector<symbolic::Expression> full_indices;
21✔
258
        size_t var_idx = 0;
21✔
259
        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
58✔
260
            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
37✔
261
                full_indices.push_back(symbolic::add(tile_info_.bases.at(d), copy_indices.at(var_idx++)));
28✔
262
            } else {
28✔
263
                full_indices.push_back(tile_info_.bases.at(d));
9✔
264
            }
9✔
265
        }
37✔
266

267
        symbolic::Expression linear = tile_info_.offset;
21✔
268
        for (size_t d = 0; d < full_indices.size(); d++) {
58✔
269
            linear = symbolic::add(linear, symbolic::mul(tile_info_.strides.at(d), full_indices.at(d)));
37✔
270
        }
37✔
271
        return {linear};
21✔
272
    };
21✔
273

274
    // ==================================================================
275
    // Branch: GPU cooperative path vs CPU sequential path
276
    // ==================================================================
277
    if (storage_type_.is_nv_shared()) {
21✔
278
        // ============================================================
279
        // GPU COOPERATIVE PATH
280
        // ============================================================
281
        auto ancestors = ControlFlowNode::parent_chain(loop_);
3✔
282

283
        // Collect cooperative GPU dimensions (indvar not in tile bases)
284
        struct CoopDim {
3✔
285
            symbolic::Symbol indvar;
3✔
286
            symbolic::Integer block_size;
3✔
287
            gpu::GPUDimension dimension;
3✔
288
        };
3✔
289
        std::vector<CoopDim> coop_dims;
3✔
290

291
        for (auto* node : ancestors) {
15✔
292
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
15✔
293
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
6✔
294
                    continue;
×
295
                }
×
296
                bool appears_in_bases = false;
6✔
297
                for (auto& base : tile_info_.bases) {
8✔
298
                    if (symbolic::uses(base, ancestor_map->indvar())) {
8✔
299
                        appears_in_bases = true;
2✔
300
                        break;
2✔
301
                    }
2✔
302
                }
8✔
303
                if (!appears_in_bases) {
6✔
304
                    coop_dims.push_back(
4✔
305
                        {ancestor_map->indvar(),
4✔
306
                         gpu::gpu_block_size(ancestor_map->schedule_type()),
4✔
307
                         gpu::gpu_dimension(ancestor_map->schedule_type())}
4✔
308
                    );
4✔
309
                }
4✔
310
            }
6✔
311
        }
15✔
312

313
        // Compute total cooperative thread count
314
        symbolic::Expression total_coop_threads = symbolic::integer(1);
3✔
315
        for (auto& cd : coop_dims) {
4✔
316
            total_coop_threads = symbolic::mul(total_coop_threads, cd.block_size);
4✔
317
        }
4✔
318

319
        // Create the local buffer with NV_Shared storage
320
        types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
3✔
321
        builder.add_container(local_name_, buffer_type);
3✔
322

323
        // Emit: barrier → guarded cooperative copy → barrier → loop
324
        // 1. Barrier before copy
325
        auto& barrier_block1 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
3✔
326
        builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block1, {});
3✔
327

328
        // 2. Cooperative copy with if_else guard
329
        // Flatten cooperative thread index: coop_flat = sum(indvar[i] * product(block_size[j] for j>i))
330
        symbolic::Expression coop_flat = symbolic::integer(0);
3✔
331
        symbolic::Expression coop_stride = symbolic::integer(1);
3✔
332
        for (int i = coop_dims.size() - 1; i >= 0; i--) {
7✔
333
            coop_flat = symbolic::add(coop_flat, symbolic::mul(coop_dims[i].indvar, coop_stride));
4✔
334
            coop_stride = symbolic::mul(coop_stride, coop_dims[i].block_size);
4✔
335
        }
4✔
336

337
        // Each thread loads elements strided by total_coop_threads
338
        // Thread t loads elements: t, t + total_threads, t + 2*total_threads, ...
339
        // We emit a loop: for (idx = coop_flat; idx < total_size; idx += total_coop_threads)
340
        auto idx_name = builder.find_new_name("__daisy_ils_coop_" + this->container_);
3✔
341
        types::Scalar idx_type(types::PrimitiveType::UInt64);
3✔
342
        builder.add_container(idx_name, idx_type);
3✔
343
        auto idx_var = symbolic::symbol(idx_name);
3✔
344

345
        auto copy_init = coop_flat;
3✔
346
        auto copy_condition = symbolic::Lt(idx_var, total_size);
3✔
347
        auto copy_update = symbolic::add(idx_var, total_coop_threads);
3✔
348

349
        auto& copy_loop = builder.add_map_before(
3✔
350
            *parent,
3✔
351
            loop_,
3✔
352
            idx_var,
3✔
353
            copy_condition,
3✔
354
            copy_init,
3✔
355
            copy_update,
3✔
356
            structured_control_flow::ScheduleType_Sequential::create(),
3✔
357
            {},
3✔
358
            loop_.debug_info()
3✔
359
        );
3✔
360

361
        // Decompose flat idx back into per-dimension indices for source subset
362
        // idx maps to varying_dims in row-major order
363
        auto& copy_scope = copy_loop.root();
3✔
364
        auto& copy_block = builder.add_block(copy_scope);
3✔
365
        auto& copy_src = builder.add_access(copy_block, this->container_);
3✔
366
        auto& copy_dst = builder.add_access(copy_block, local_name_);
3✔
367
        auto& copy_tasklet = builder.add_tasklet(copy_block, data_flow::TaskletCode::assign, "_out", {"_in"});
3✔
368

369
        // Decompose idx_var into per-dim indices
370
        std::vector<symbolic::Expression> copy_indices;
3✔
371
        symbolic::Expression remainder = idx_var;
3✔
372
        for (size_t i = 0; i < dim_sizes.size(); i++) {
6✔
373
            if (i < dim_sizes.size() - 1) {
3✔
374
                // integer division: idx / (product of remaining dims)
375
                symbolic::Expression divisor = symbolic::integer(1);
×
376
                for (size_t j = i + 1; j < dim_sizes.size(); j++) {
×
377
                    divisor = symbolic::mul(divisor, dim_sizes[j]);
×
378
                }
×
379
                auto quotient = symbolic::div(remainder, divisor);
×
380
                copy_indices.push_back(quotient);
×
381
                remainder = symbolic::mod(remainder, divisor);
×
382
            } else {
3✔
383
                copy_indices.push_back(remainder);
3✔
384
            }
3✔
385
        }
3✔
386

387
        auto copy_src_subset = build_original_subset(copy_indices);
3✔
388
        data_flow::Subset copy_dst_subset = {idx_var};
3✔
389

390
        builder.add_computational_memlet(copy_block, copy_src, copy_tasklet, "_in", copy_src_subset, pointer_type);
3✔
391
        builder.add_computational_memlet(copy_block, copy_tasklet, "_out", copy_dst, copy_dst_subset, buffer_type);
3✔
392

393
        // 3. Barrier after copy
394
        auto& barrier_block2 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
3✔
395
        builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block2, {});
3✔
396
    } else {
18✔
397
        // ============================================================
398
        // CPU SEQUENTIAL PATH
399
        // ============================================================
400
        // Create the local buffer with specified storage type
401
        types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
18✔
402
        builder.add_container(local_name_, buffer_type);
18✔
403

404
        std::vector<symbolic::Symbol> copy_indvars;
18✔
405
        structured_control_flow::Sequence* copy_scope =
18✔
406
            &builder.add_sequence_before(*parent, loop_, {}, loop_.debug_info());
18✔
407
        for (size_t i = 0; i < varying_dims.size(); i++) {
43✔
408
            size_t d = varying_dims[i];
25✔
409
            auto indvar_name = builder.find_new_name("__daisy_ils_" + this->container_ + "_d" + std::to_string(d));
25✔
410
            types::Scalar indvar_type(types::PrimitiveType::UInt64);
25✔
411
            builder.add_container(indvar_name, indvar_type);
25✔
412
            auto indvar = symbolic::symbol(indvar_name);
25✔
413
            copy_indvars.push_back(indvar);
25✔
414

415
            auto init = symbolic::integer(0);
25✔
416
            auto condition = symbolic::Lt(indvar, dim_sizes[i]);
25✔
417
            auto update = symbolic::add(indvar, symbolic::integer(1));
25✔
418

419
            auto& copy_loop = builder.add_map(
25✔
420
                *copy_scope,
25✔
421
                indvar,
25✔
422
                condition,
25✔
423
                init,
25✔
424
                update,
25✔
425
                structured_control_flow::ScheduleType_Sequential::create(),
25✔
426
                {},
25✔
427
                loop_.debug_info()
25✔
428
            );
25✔
429
            copy_scope = &copy_loop.root();
25✔
430
        }
25✔
431

432
        // Create copy block
433
        auto& copy_block = builder.add_block(*copy_scope);
18✔
434
        auto& copy_src = builder.add_access(copy_block, this->container_);
18✔
435
        auto& copy_dst = builder.add_access(copy_block, local_name_);
18✔
436
        auto& copy_tasklet = builder.add_tasklet(copy_block, data_flow::TaskletCode::assign, "_out", {"_in"});
18✔
437

438
        std::vector<symbolic::Expression> copy_exprs(copy_indvars.begin(), copy_indvars.end());
18✔
439
        auto copy_src_subset = build_original_subset(copy_exprs);
18✔
440
        data_flow::Subset copy_dst_subset = {linearize(copy_indvars)};
18✔
441

442
        builder.add_computational_memlet(copy_block, copy_src, copy_tasklet, "_in", copy_src_subset, pointer_type);
18✔
443
        types::Array buffer_type_ref(storage_type_, 0, {}, scalar_type, total_size);
18✔
444
        builder.add_computational_memlet(copy_block, copy_tasklet, "_out", copy_dst, copy_dst_subset, buffer_type_ref);
18✔
445
    }
18✔
446

447
    // ==================================================================
448
    // Update accesses in the main loop to use the local buffer
449
    // ==================================================================
450
    types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
21✔
451
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
21✔
452

453
    // Recursive helper to traverse all blocks in the loop body
454
    std::function<void(structured_control_flow::ControlFlowNode&)> rewrite_accesses;
21✔
455
    rewrite_accesses = [&](structured_control_flow::ControlFlowNode& node) {
81✔
456
        if (auto* block = dynamic_cast<structured_control_flow::Block*>(&node)) {
81✔
457
            auto& dfg = block->dataflow();
32✔
458

459
            // Collect access nodes to process (avoid iterator invalidation)
460
            std::vector<data_flow::AccessNode*> access_nodes;
32✔
461
            for (auto* access_node : dfg.data_nodes()) {
91✔
462
                if (access_node->data() == this->container_) {
91✔
463
                    access_nodes.push_back(access_node);
26✔
464
                }
26✔
465
            }
91✔
466

467
            for (auto* access : access_nodes) {
32✔
468
                // Classify memlets: group vs non-group
469
                struct MemletRewrite {
26✔
470
                    data_flow::Memlet* memlet;
26✔
471
                    data_flow::Subset local_subset;
26✔
472
                    bool is_outgoing;
26✔
473
                };
26✔
474
                std::vector<MemletRewrite> group_rewrites;
26✔
475
                bool all_in_group = true;
26✔
476

477
                for (auto& memlet : dfg.out_edges(*access)) {
27✔
478
                    if (group_memlets_.count(&memlet) == 0) {
27✔
479
                        all_in_group = false;
2✔
480
                        continue;
2✔
481
                    }
2✔
482
                    auto* acc = mla.access(memlet);
25✔
483
                    if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
25✔
484
                        std::vector<symbolic::Expression> local_indices;
25✔
485
                        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
70✔
486
                            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
45✔
487
                                local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
36✔
488
                            }
36✔
489
                        }
45✔
490
                        symbolic::Expression linear_idx = linearize_exprs(local_indices);
25✔
491
                        group_rewrites.push_back({&memlet, {linear_idx}, true});
25✔
492
                    }
25✔
493
                }
25✔
494
                for (auto& memlet : dfg.in_edges(*access)) {
26✔
495
                    if (group_memlets_.count(&memlet) == 0) {
×
496
                        all_in_group = false;
×
497
                        continue;
×
498
                    }
×
499
                    auto* acc = mla.access(memlet);
×
500
                    if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
×
501
                        std::vector<symbolic::Expression> local_indices;
×
502
                        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
×
503
                            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
×
504
                                local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
×
505
                            }
×
506
                        }
×
507
                        symbolic::Expression linear_idx = linearize_exprs(local_indices);
×
508
                        group_rewrites.push_back({&memlet, {linear_idx}, false});
×
509
                    }
×
510
                }
×
511

512
                if (group_rewrites.empty()) continue;
26✔
513

514
                if (all_in_group) {
25✔
515
                    // Simple case: all memlets in group → rewrite in-place and rename
516
                    for (auto& rw : group_rewrites) {
24✔
517
                        rw.memlet->set_subset(rw.local_subset);
24✔
518
                        rw.memlet->set_base_type(buffer_type);
24✔
519
                    }
24✔
520
                    access->data(local_name_);
24✔
521
                } else {
24✔
522
                    // Mixed case: split — create new local access node, redirect group memlets
523
                    auto& local_access = builder.add_access(*block, local_name_);
1✔
524
                    for (auto& rw : group_rewrites) {
1✔
525
                        if (rw.is_outgoing) {
1✔
526
                            // outgoing: access→tasklet  →  local_access→tasklet
527
                            auto& dst_node = rw.memlet->dst();
1✔
528
                            auto dst_conn = rw.memlet->dst_conn();
1✔
529
                            builder.remove_memlet(*block, *rw.memlet);
1✔
530
                            builder.add_memlet(
1✔
531
                                *block, local_access, "void", dst_node, dst_conn, rw.local_subset, buffer_type, {}
1✔
532
                            );
1✔
533
                        } else {
1✔
534
                            // incoming: tasklet→access  →  tasklet→local_access
535
                            auto& src_node = rw.memlet->src();
×
536
                            auto src_conn = rw.memlet->src_conn();
×
537
                            builder.remove_memlet(*block, *rw.memlet);
×
538
                            builder.add_memlet(
×
539
                                *block, src_node, src_conn, local_access, "void", rw.local_subset, buffer_type, {}
×
540
                            );
×
541
                        }
×
542
                    }
1✔
543
                }
1✔
544
            }
25✔
545
        } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
49✔
546
            for (size_t i = 0; i < seq->size(); i++) {
81✔
547
                rewrite_accesses(seq->at(i).first);
46✔
548
            }
46✔
549
        } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
35✔
550
            rewrite_accesses(loop->root());
14✔
551
        } else if (auto* if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
14✔
552
            for (size_t i = 0; i < if_else->size(); i++) {
×
553
                rewrite_accesses(if_else->at(i).first);
×
554
            }
×
555
        }
×
556
    };
81✔
557
    rewrite_accesses(loop_.root());
21✔
558

559
    // Cleanup
560
    analysis_manager.invalidate_all();
21✔
561

562
    passes::SequenceFusion sf_pass;
21✔
563
    passes::DeadCFGElimination dce_pass;
21✔
564
    bool applies = false;
21✔
565
    do {
39✔
566
        applies = false;
39✔
567
        applies |= dce_pass.run(builder, analysis_manager);
39✔
568
        applies |= sf_pass.run(builder, analysis_manager);
39✔
569
    } while (applies);
39✔
570
}
21✔
571

572
void InLocalStorage::to_json(nlohmann::json& j) const {
6✔
573
    std::string loop_type;
6✔
574
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
6✔
575
        loop_type = "for";
6✔
576
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
6✔
577
        loop_type = "map";
×
578
    } else {
×
579
        throw std::runtime_error("Unsupported loop type for serialization of loop: " + loop_.indvar()->get_name());
×
580
    }
×
581
    j["subgraph"] = {
6✔
582
        {"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}},
6✔
583
        {"1", {{"element_id", this->access_node_.element_id()}, {"type", "access_node"}}}
6✔
584
    };
6✔
585
    j["transformation_type"] = this->name();
6✔
586
    j["container"] = container_;
6✔
587
}
6✔
588

589
InLocalStorage InLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
590
    auto loop_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
591
    auto element = builder.find_element_by_id(loop_id);
1✔
592
    if (!element) {
1✔
593
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
594
    }
×
595
    auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(element);
1✔
596
    if (!loop) {
1✔
597
        throw InvalidTransformationDescriptionException(
×
598
            "Element with ID " + std::to_string(loop_id) + " is not a structured loop."
×
599
        );
×
600
    }
×
601

602
    auto access_node = dynamic_cast<
1✔
603
        data_flow::AccessNode*>(builder.find_element_by_id(desc.at("subgraph").at("1").at("element_id").get<size_t>()));
1✔
604
    if (!access_node) {
1✔
605
        throw InvalidTransformationDescriptionException(
×
606
            "Access node with ID " + std::to_string(desc.at("subgraph").at("1").at("element_id").get<size_t>()) +
×
607
            " not found."
×
608
        );
×
609
    }
×
610

611
    return InLocalStorage(*loop, *access_node);
1✔
612
}
1✔
613

614
} // namespace transformations
615
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc