• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 25313866168

04 May 2026 10:25AM UTC coverage: 64.448%. First build
25313866168

Pull #699

github

web-flow
Merge 5c2bf1d67 into eedda3adc
Pull Request #699: Extends memory layout analysis to support groups of memlets

306 of 355 new or added lines in 7 files covered. (86.2%)

31949 of 49573 relevant lines covered (64.45%)

2301.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.69
/opt/src/transformations/in_local_storage.cpp
1
#include "sdfg/transformations/in_local_storage.h"
2

3
#include <cstddef>
4
#include <functional>
5
#include <string>
6

7
#include "sdfg/analysis/memory_layout_analysis.h"
8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/analysis/users.h"
10
#include "sdfg/builder/structured_sdfg_builder.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
13
#include "sdfg/data_flow/memlet.h"
14
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
15
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
16
#include "sdfg/structured_control_flow/if_else.h"
17
#include "sdfg/structured_control_flow/sequence.h"
18
#include "sdfg/structured_control_flow/structured_loop.h"
19
#include "sdfg/symbolic/symbolic.h"
20
#include "sdfg/targets/gpu/gpu_schedule_type.h"
21
#include "sdfg/types/array.h"
22
#include "sdfg/types/pointer.h"
23
#include "sdfg/types/scalar.h"
24

25
namespace sdfg {
26
namespace transformations {
27

28
InLocalStorage::InLocalStorage(
29
    structured_control_flow::StructuredLoop& loop,
30
    const data_flow::AccessNode& access_node,
31
    const types::StorageType& storage_type
32
)
33
    : loop_(loop), access_node_(access_node), container_(access_node.data()), storage_type_(storage_type) {}
31✔
34

35
std::string InLocalStorage::name() const { return "InLocalStorage"; }
7✔
36

37
bool InLocalStorage::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
31✔
38
    auto& sdfg = builder.subject();
31✔
39
    auto& body = this->loop_.root();
31✔
40

41
    tile_info_ = TileInfo{};
31✔
42

43
    // Criterion: Container must exist
44
    if (!sdfg.exists(this->container_)) {
31✔
45
        return false;
×
46
    }
×
47

48
    auto& type = sdfg.type(this->container_);
31✔
49

50
    // Criterion: Container must be Array or Pointer (not Scalar)
51
    if (type.type_id() != types::TypeID::Pointer && type.type_id() != types::TypeID::Array) {
31✔
52
        return false;
1✔
53
    }
1✔
54

55
    // Criterion: Container must be used in the loop body
56
    auto& users = analysis_manager.get<analysis::Users>();
30✔
57
    analysis::UsersView body_users(users, body);
30✔
58
    if (body_users.uses(this->container_).empty()) {
30✔
59
        return false;
2✔
60
    }
2✔
61

62
    // Criterion: Container must be read-only within the loop (no writes)
63
    if (!body_users.writes(this->container_).empty()) {
28✔
64
        return false;
1✔
65
    }
1✔
66

67
    // Use MemoryLayoutAnalysis tile group API
68
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
27✔
69

70
    // Find a representative memlet from the access node to identify its group
71
    const data_flow::Memlet* representative_memlet = nullptr;
27✔
72
    auto& dfg = access_node_.get_parent();
27✔
73
    for (auto& memlet : dfg.out_edges(access_node_)) {
27✔
74
        representative_memlet = &memlet;
27✔
75
        break;
27✔
76
    }
27✔
77
    if (!representative_memlet) {
27✔
NEW
78
        return false;
×
NEW
79
    }
×
80

81
    auto* group = mla.tile_group_for(loop_, *representative_memlet);
27✔
82
    if (!group) {
27✔
83
        return false;
×
84
    }
×
85

86
    auto& tile = group->tile;
27✔
87

88
    // Store group memlets for use in apply()
89
    group_memlets_.clear();
27✔
90
    group_memlets_.insert(group->memlets.begin(), group->memlets.end());
27✔
91

92
    // Get overapproximated extents (integer upper bounds)
93
    auto extents = tile.extents_approx();
27✔
94
    if (extents.empty()) {
27✔
95
        return false;
×
96
    }
×
97

98
    // Store tile info (before substitution, bases/strides stay symbolic)
99
    tile_info_.dimensions = extents;
27✔
100
    tile_info_.bases = tile.min_subset;
27✔
101
    tile_info_.strides = std::vector<symbolic::Expression>(tile.layout.strides().begin(), tile.layout.strides().end());
27✔
102
    tile_info_.offset = tile.layout.offset();
27✔
103

104
    // GPU shared memory: resolve symbolic extents using GPU block sizes and
105
    // require at least one cooperative dimension
106
    if (storage_type_.is_nv_shared()) {
27✔
107
        auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
5✔
108
        auto ancestors = scope_analysis.ancestor_scopes(&loop_);
5✔
109

110
        // Build substitution map: symbolic GPU map bounds → integer block sizes
111
        // E.g., Map condition "i < N" with block_size=32 → N=32
112
        for (auto* node : ancestors) {
23✔
113
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
23✔
114
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
9✔
115
                    continue;
×
116
                }
×
117
                auto block_size = gpu::gpu_block_size(ancestor_map->schedule_type());
9✔
118
                // Extract symbolic bound from condition: Lt(indvar, BOUND)
119
                auto condition = ancestor_map->condition();
9✔
120
                if (SymEngine::is_a<SymEngine::StrictLessThan>(*condition)) {
9✔
121
                    auto stl = SymEngine::rcp_static_cast<const SymEngine::StrictLessThan>(condition);
9✔
122
                    auto rhs = stl->get_args()[1];
9✔
123
                    auto iter_count = symbolic::sub(rhs, ancestor_map->init());
9✔
124
                    if (!SymEngine::is_a<SymEngine::Integer>(*iter_count)) {
9✔
125
                        // Symbolic bound — substitute with block size in extents and bases
126
                        for (auto& ext : tile_info_.dimensions) {
16✔
127
                            ext = symbolic::simplify(symbolic::subs(ext, iter_count, block_size));
16✔
128
                        }
16✔
129
                        for (auto& base : tile_info_.bases) {
16✔
130
                            base = symbolic::simplify(symbolic::subs(base, iter_count, block_size));
16✔
131
                        }
16✔
132
                    }
9✔
133
                }
9✔
134
            }
9✔
135
        }
23✔
136

137
        // Also resolve the loop's own bound if symbolic and matches a block size
138
        // E.g., For k = 0..K where K is a parameter — check if K can be resolved
139
        // from any GPU ancestor map
140
        // (Already handled above: if K appears as a GPU map bound, it's substituted)
141

142
        // Criterion: All extents must now be provably integer
143
        for (auto& ext : tile_info_.dimensions) {
9✔
144
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
9✔
145
                return false;
2✔
146
            }
2✔
147
        }
9✔
148

149
        // Criterion: At least one cooperative dimension
150
        bool has_cooperative_dim = false;
3✔
151
        for (auto* node : ancestors) {
6✔
152
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
6✔
153
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
3✔
154
                    continue;
×
155
                }
×
156
                // A GPU dim is cooperative if its indvar does NOT appear in any tile base
157
                bool appears_in_bases = false;
3✔
158
                for (auto& base : tile_info_.bases) {
5✔
159
                    if (symbolic::uses(base, ancestor_map->indvar())) {
5✔
160
                        appears_in_bases = true;
×
161
                        break;
×
162
                    }
×
163
                }
5✔
164
                if (!appears_in_bases) {
3✔
165
                    has_cooperative_dim = true;
3✔
166
                    break;
3✔
167
                }
3✔
168
            }
3✔
169
        }
6✔
170
        if (!has_cooperative_dim) {
3✔
171
            return false;
×
172
        }
×
173
    } else {
22✔
174
        // CPU path: All extents must be provably integer
175
        for (auto& ext : tile_info_.dimensions) {
37✔
176
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
37✔
177
                return false;
×
178
            }
×
179
        }
37✔
180
    }
22✔
181

182
    return true;
25✔
183
}
27✔
184

185
void InLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
17✔
186
    auto& sdfg = builder.subject();
17✔
187
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
17✔
188

189
    auto parent_node = scope_analysis.parent_scope(&loop_);
17✔
190
    auto parent = dynamic_cast<structured_control_flow::Sequence*>(parent_node);
17✔
191
    if (!parent) {
17✔
192
        throw InvalidSDFGException("InLocalStorage: Parent of loop must be a Sequence!");
×
193
    }
×
194

195
    // Get type information
196
    auto& type = sdfg.type(this->container_);
17✔
197
    types::Scalar scalar_type(type.primitive_type());
17✔
198

199
    // Create local buffer name
200
    local_name_ = builder.find_new_name("__daisy_in_local_storage_" + this->container_);
17✔
201

202
    // Collect varying dimensions (extent > 1) and compute buffer layout
203
    std::vector<size_t> varying_dims;
17✔
204
    std::vector<symbolic::Expression> dim_sizes;
17✔
205
    for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
47✔
206
        auto& dim_size = tile_info_.dimensions.at(d);
30✔
207
        if (!symbolic::eq(dim_size, symbolic::integer(1))) {
30✔
208
            varying_dims.push_back(d);
24✔
209
            dim_sizes.push_back(dim_size);
24✔
210
        }
24✔
211
    }
30✔
212

213
    // Compute total buffer size
214
    symbolic::Expression total_size = symbolic::integer(1);
17✔
215
    for (auto& ds : dim_sizes) {
24✔
216
        total_size = symbolic::mul(total_size, ds);
24✔
217
    }
24✔
218

219
    // Helper: build linearized local index from per-dimension symbolic expressions
220
    auto linearize_exprs = [&](const std::vector<symbolic::Expression>& indices) -> symbolic::Expression {
35✔
221
        symbolic::Expression linear_idx = symbolic::integer(0);
35✔
222
        symbolic::Expression stride = symbolic::integer(1);
35✔
223
        for (int i = indices.size() - 1; i >= 0; i--) {
88✔
224
            linear_idx = symbolic::add(linear_idx, symbolic::mul(indices[i], stride));
53✔
225
            stride = symbolic::mul(stride, dim_sizes[i]);
53✔
226
        }
53✔
227
        return linear_idx;
35✔
228
    };
35✔
229

230
    // Helper: build linearized local index from per-dimension indvars (symbols)
231
    auto linearize = [&](const std::vector<symbolic::Symbol>& indvars) -> symbolic::Expression {
17✔
232
        std::vector<symbolic::Expression> exprs(indvars.begin(), indvars.end());
14✔
233
        return linearize_exprs(exprs);
14✔
234
    };
14✔
235

236
    // Helper: build source subset (base[d] + copy_indvar[d]) for original container
237
    bool is_pointer = (type.type_id() == types::TypeID::Pointer);
17✔
238
    auto build_original_subset = [&](const std::vector<symbolic::Expression>& copy_indices) -> data_flow::Subset {
17✔
239
        std::vector<symbolic::Expression> full_indices;
17✔
240
        size_t var_idx = 0;
17✔
241
        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
47✔
242
            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
30✔
243
                full_indices.push_back(symbolic::add(tile_info_.bases.at(d), copy_indices.at(var_idx++)));
24✔
244
            } else {
24✔
245
                full_indices.push_back(tile_info_.bases.at(d));
6✔
246
            }
6✔
247
        }
30✔
248

249
        if (is_pointer) {
17✔
250
            symbolic::Expression linear = tile_info_.offset;
17✔
251
            for (size_t d = 0; d < full_indices.size(); d++) {
47✔
252
                linear = symbolic::add(linear, symbolic::mul(tile_info_.strides.at(d), full_indices.at(d)));
30✔
253
            }
30✔
254
            return {linear};
17✔
255
        } else {
17✔
256
            return data_flow::Subset(full_indices.begin(), full_indices.end());
×
257
        }
×
258
    };
17✔
259

260
    // ==================================================================
261
    // Branch: GPU cooperative path vs CPU sequential path
262
    // ==================================================================
263
    if (storage_type_.is_nv_shared()) {
17✔
264
        // ============================================================
265
        // GPU COOPERATIVE PATH
266
        // ============================================================
267
        auto ancestors = scope_analysis.ancestor_scopes(&loop_);
3✔
268

269
        // Collect cooperative GPU dimensions (indvar not in tile bases)
270
        struct CoopDim {
3✔
271
            symbolic::Symbol indvar;
3✔
272
            symbolic::Integer block_size;
3✔
273
            gpu::GPUDimension dimension;
3✔
274
        };
3✔
275
        std::vector<CoopDim> coop_dims;
3✔
276

277
        for (auto* node : ancestors) {
15✔
278
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
15✔
279
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
6✔
280
                    continue;
×
281
                }
×
282
                bool appears_in_bases = false;
6✔
283
                for (auto& base : tile_info_.bases) {
8✔
284
                    if (symbolic::uses(base, ancestor_map->indvar())) {
8✔
285
                        appears_in_bases = true;
2✔
286
                        break;
2✔
287
                    }
2✔
288
                }
8✔
289
                if (!appears_in_bases) {
6✔
290
                    coop_dims.push_back(
4✔
291
                        {ancestor_map->indvar(),
4✔
292
                         gpu::gpu_block_size(ancestor_map->schedule_type()),
4✔
293
                         gpu::gpu_dimension(ancestor_map->schedule_type())}
4✔
294
                    );
4✔
295
                }
4✔
296
            }
6✔
297
        }
15✔
298

299
        // Compute total cooperative thread count
300
        symbolic::Expression total_coop_threads = symbolic::integer(1);
3✔
301
        for (auto& cd : coop_dims) {
4✔
302
            total_coop_threads = symbolic::mul(total_coop_threads, cd.block_size);
4✔
303
        }
4✔
304

305
        // Create the local buffer with NV_Shared storage
306
        types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
3✔
307
        builder.add_container(local_name_, buffer_type);
3✔
308

309
        // Emit: barrier → guarded cooperative copy → barrier → loop
310
        // 1. Barrier before copy
311
        auto& barrier_block1 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
3✔
312
        builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block1, {});
3✔
313

314
        // 2. Cooperative copy with if_else guard
315
        // Flatten cooperative thread index: coop_flat = sum(indvar[i] * product(block_size[j] for j>i))
316
        symbolic::Expression coop_flat = symbolic::integer(0);
3✔
317
        symbolic::Expression coop_stride = symbolic::integer(1);
3✔
318
        for (int i = coop_dims.size() - 1; i >= 0; i--) {
7✔
319
            coop_flat = symbolic::add(coop_flat, symbolic::mul(coop_dims[i].indvar, coop_stride));
4✔
320
            coop_stride = symbolic::mul(coop_stride, coop_dims[i].block_size);
4✔
321
        }
4✔
322

323
        // Each thread loads elements strided by total_coop_threads
324
        // Thread t loads elements: t, t + total_threads, t + 2*total_threads, ...
325
        // We emit a loop: for (idx = coop_flat; idx < total_size; idx += total_coop_threads)
326
        auto idx_name = builder.find_new_name("__daisy_ils_coop_" + this->container_);
3✔
327
        types::Scalar idx_type(types::PrimitiveType::UInt64);
3✔
328
        builder.add_container(idx_name, idx_type);
3✔
329
        auto idx_var = symbolic::symbol(idx_name);
3✔
330

331
        auto copy_init = coop_flat;
3✔
332
        auto copy_condition = symbolic::Lt(idx_var, total_size);
3✔
333
        auto copy_update = symbolic::add(idx_var, total_coop_threads);
3✔
334

335
        auto& copy_loop = builder.add_map_before(
3✔
336
            *parent,
3✔
337
            loop_,
3✔
338
            idx_var,
3✔
339
            copy_condition,
3✔
340
            copy_init,
3✔
341
            copy_update,
3✔
342
            structured_control_flow::ScheduleType_Sequential::create(),
3✔
343
            {},
3✔
344
            loop_.debug_info()
3✔
345
        );
3✔
346

347
        // Decompose flat idx back into per-dimension indices for source subset
348
        // idx maps to varying_dims in row-major order
349
        auto& copy_scope = copy_loop.root();
3✔
350
        auto& copy_block = builder.add_block(copy_scope);
3✔
351
        auto& copy_src = builder.add_access(copy_block, this->container_);
3✔
352
        auto& copy_dst = builder.add_access(copy_block, local_name_);
3✔
353
        auto& copy_tasklet = builder.add_tasklet(copy_block, data_flow::TaskletCode::assign, "_out", {"_in"});
3✔
354

355
        // Decompose idx_var into per-dim indices
356
        std::vector<symbolic::Expression> copy_indices;
3✔
357
        symbolic::Expression remainder = idx_var;
3✔
358
        for (size_t i = 0; i < dim_sizes.size(); i++) {
6✔
359
            if (i < dim_sizes.size() - 1) {
3✔
360
                // integer division: idx / (product of remaining dims)
361
                symbolic::Expression divisor = symbolic::integer(1);
×
362
                for (size_t j = i + 1; j < dim_sizes.size(); j++) {
×
363
                    divisor = symbolic::mul(divisor, dim_sizes[j]);
×
364
                }
×
365
                auto quotient = symbolic::div(remainder, divisor);
×
366
                copy_indices.push_back(quotient);
×
367
                remainder = symbolic::mod(remainder, divisor);
×
368
            } else {
3✔
369
                copy_indices.push_back(remainder);
3✔
370
            }
3✔
371
        }
3✔
372

373
        auto copy_src_subset = build_original_subset(copy_indices);
3✔
374
        data_flow::Subset copy_dst_subset = {idx_var};
3✔
375

376
        builder.add_computational_memlet(copy_block, copy_src, copy_tasklet, "_in", copy_src_subset, type);
3✔
377
        builder.add_computational_memlet(copy_block, copy_tasklet, "_out", copy_dst, copy_dst_subset, buffer_type);
3✔
378

379
        // 3. Barrier after copy
380
        auto& barrier_block2 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
3✔
381
        builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block2, {});
3✔
382
    } else {
14✔
383
        // ============================================================
384
        // CPU SEQUENTIAL PATH
385
        // ============================================================
386
        // Create the local buffer with specified storage type
387
        types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
14✔
388
        builder.add_container(local_name_, buffer_type);
14✔
389

390
        std::vector<symbolic::Symbol> copy_indvars;
14✔
391
        structured_control_flow::Sequence* copy_scope = parent;
14✔
392
        bool first_copy_loop = true;
14✔
393

394
        for (size_t i = 0; i < varying_dims.size(); i++) {
35✔
395
            size_t d = varying_dims[i];
21✔
396
            auto indvar_name = builder.find_new_name("__daisy_ils_" + this->container_ + "_d" + std::to_string(d));
21✔
397
            types::Scalar indvar_type(types::PrimitiveType::UInt64);
21✔
398
            builder.add_container(indvar_name, indvar_type);
21✔
399
            auto indvar = symbolic::symbol(indvar_name);
21✔
400
            copy_indvars.push_back(indvar);
21✔
401

402
            auto init = symbolic::integer(0);
21✔
403
            auto condition = symbolic::Lt(indvar, dim_sizes[i]);
21✔
404
            auto update = symbolic::add(indvar, symbolic::integer(1));
21✔
405

406
            if (first_copy_loop) {
21✔
407
                auto& copy_loop = builder.add_map_before(
14✔
408
                    *copy_scope,
14✔
409
                    loop_,
14✔
410
                    indvar,
14✔
411
                    condition,
14✔
412
                    init,
14✔
413
                    update,
14✔
414
                    structured_control_flow::ScheduleType_Sequential::create(),
14✔
415
                    {},
14✔
416
                    loop_.debug_info()
14✔
417
                );
14✔
418
                copy_scope = &copy_loop.root();
14✔
419
                first_copy_loop = false;
14✔
420
            } else {
14✔
421
                auto& copy_loop = builder.add_map(
7✔
422
                    *copy_scope,
7✔
423
                    indvar,
7✔
424
                    condition,
7✔
425
                    init,
7✔
426
                    update,
7✔
427
                    structured_control_flow::ScheduleType_Sequential::create(),
7✔
428
                    {},
7✔
429
                    loop_.debug_info()
7✔
430
                );
7✔
431
                copy_scope = &copy_loop.root();
7✔
432
            }
7✔
433
        }
21✔
434

435
        // Create copy block
436
        auto& copy_block = builder.add_block(*copy_scope);
14✔
437
        auto& copy_src = builder.add_access(copy_block, this->container_);
14✔
438
        auto& copy_dst = builder.add_access(copy_block, local_name_);
14✔
439
        auto& copy_tasklet = builder.add_tasklet(copy_block, data_flow::TaskletCode::assign, "_out", {"_in"});
14✔
440

441
        std::vector<symbolic::Expression> copy_exprs(copy_indvars.begin(), copy_indvars.end());
14✔
442
        auto copy_src_subset = build_original_subset(copy_exprs);
14✔
443
        data_flow::Subset copy_dst_subset = {linearize(copy_indvars)};
14✔
444

445
        builder.add_computational_memlet(copy_block, copy_src, copy_tasklet, "_in", copy_src_subset, type);
14✔
446
        types::Array buffer_type_ref(storage_type_, 0, {}, scalar_type, total_size);
14✔
447
        builder.add_computational_memlet(copy_block, copy_tasklet, "_out", copy_dst, copy_dst_subset, buffer_type_ref);
14✔
448
    }
14✔
449

450
    // ==================================================================
451
    // Update accesses in the main loop to use the local buffer
452
    // ==================================================================
453
    types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
17✔
454
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
17✔
455

456
    // Recursive helper to traverse all blocks in the loop body
457
    std::function<void(structured_control_flow::ControlFlowNode&)> rewrite_accesses;
17✔
458
    rewrite_accesses = [&](structured_control_flow::ControlFlowNode& node) {
69✔
459
        if (auto* block = dynamic_cast<structured_control_flow::Block*>(&node)) {
69✔
460
            auto& dfg = block->dataflow();
26✔
461
            for (auto* access : dfg.data_nodes()) {
74✔
462
                if (access->data() != this->container_) continue;
74✔
463
                bool all_rewritten = true;
23✔
464
                // Rewrite outgoing memlets (reads from this access node)
465
                for (auto& memlet : dfg.out_edges(*access)) {
23✔
466
                    if (group_memlets_.count(&memlet) == 0) {
23✔
467
                        all_rewritten = false;
2✔
468
                        continue;
2✔
469
                    }
2✔
470
                    auto* acc = mla.access(memlet);
21✔
471
                    if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
21✔
472
                        std::vector<symbolic::Expression> local_indices;
21✔
473
                        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
59✔
474
                            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
38✔
475
                                local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
32✔
476
                            }
32✔
477
                        }
38✔
478
                        symbolic::Expression linear_idx = linearize_exprs(local_indices);
21✔
479
                        memlet.set_subset({linear_idx});
21✔
480
                        memlet.set_base_type(buffer_type);
21✔
481
                    }
21✔
482
                }
21✔
483
                // Rewrite incoming memlets (writes to this access node)
484
                for (auto& memlet : dfg.in_edges(*access)) {
23✔
NEW
485
                    if (group_memlets_.count(&memlet) == 0) {
×
NEW
486
                        all_rewritten = false;
×
NEW
487
                        continue;
×
NEW
488
                    }
×
489
                    auto* acc = mla.access(memlet);
×
490
                    if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
×
491
                        std::vector<symbolic::Expression> local_indices;
×
492
                        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
×
493
                            if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
×
494
                                local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
×
495
                            }
×
496
                        }
×
497
                        symbolic::Expression linear_idx = linearize_exprs(local_indices);
×
498
                        memlet.set_subset({linear_idx});
×
499
                        memlet.set_base_type(buffer_type);
×
500
                    }
×
501
                }
×
502
                // Rename the access node only if all its memlets belong to our group
503
                if (all_rewritten) {
23✔
504
                    access->data(local_name_);
21✔
505
                }
21✔
506
            }
23✔
507
        } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
43✔
508
            for (size_t i = 0; i < seq->size(); i++) {
69✔
509
                rewrite_accesses(seq->at(i).first);
39✔
510
            }
39✔
511
        } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
30✔
512
            rewrite_accesses(loop->root());
13✔
513
        } else if (auto* if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
13✔
514
            for (size_t i = 0; i < if_else->size(); i++) {
×
515
                rewrite_accesses(if_else->at(i).first);
×
516
            }
×
517
        }
×
518
    };
69✔
519
    rewrite_accesses(loop_.root());
17✔
520

521
    // Cleanup
522
    analysis_manager.invalidate_all();
17✔
523

524
    passes::SequenceFusion sf_pass;
17✔
525
    passes::DeadCFGElimination dce_pass;
17✔
526
    bool applies = false;
17✔
527
    do {
17✔
528
        applies = false;
17✔
529
        applies |= dce_pass.run(builder, analysis_manager);
17✔
530
        applies |= sf_pass.run(builder, analysis_manager);
17✔
531
    } while (applies);
17✔
532
}
17✔
533

534
void InLocalStorage::to_json(nlohmann::json& j) const {
6✔
535
    std::string loop_type;
6✔
536
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
6✔
537
        loop_type = "for";
6✔
538
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
6✔
539
        loop_type = "map";
×
540
    } else {
×
541
        throw std::runtime_error("Unsupported loop type for serialization of loop: " + loop_.indvar()->get_name());
×
542
    }
×
543
    j["subgraph"] = {
6✔
544
        {"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}},
6✔
545
        {"1", {{"element_id", this->access_node_.element_id()}, {"type", "access_node"}}}
6✔
546
    };
6✔
547
    j["transformation_type"] = this->name();
6✔
548
    j["container"] = container_;
6✔
549
}
6✔
550

551
InLocalStorage InLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
552
    auto loop_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
553
    auto element = builder.find_element_by_id(loop_id);
1✔
554
    if (!element) {
1✔
555
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
556
    }
×
557
    auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(element);
1✔
558
    if (!loop) {
1✔
559
        throw InvalidTransformationDescriptionException(
×
560
            "Element with ID " + std::to_string(loop_id) + " is not a structured loop."
×
561
        );
×
562
    }
×
563

564
    auto access_node = dynamic_cast<
1✔
565
        data_flow::AccessNode*>(builder.find_element_by_id(desc.at("subgraph").at("1").at("element_id").get<size_t>()));
1✔
566
    if (!access_node) {
1✔
567
        throw InvalidTransformationDescriptionException(
×
568
            "Access node with ID " + std::to_string(desc.at("subgraph").at("1").at("element_id").get<size_t>()) +
×
569
            " not found."
×
570
        );
×
571
    }
×
572

573
    return InLocalStorage(*loop, *access_node);
1✔
574
}
1✔
575

576
} // namespace transformations
577
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc