• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 25220149405

01 May 2026 03:23PM UTC coverage: 64.643% (+0.1%) from 64.532%
25220149405

push

github

web-flow
Merge pull request #695 from daisytuner/local-storage-indices

fixes subset updates for local storage transformations

70 of 84 new or added lines in 2 files covered. (83.33%)

2 existing lines in 1 file now uncovered.

31329 of 48465 relevant lines covered (64.64%)

1395.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.41
/opt/src/transformations/out_local_storage.cpp
1
#include "sdfg/transformations/out_local_storage.h"
2

3
#include <cstddef>
4
#include <functional>
5
#include <string>
6

7
#include "sdfg/analysis/memory_layout_analysis.h"
8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/analysis/users.h"
10
#include "sdfg/builder/structured_sdfg_builder.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
13
#include "sdfg/data_flow/memlet.h"
14
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
15
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
16
#include "sdfg/structured_control_flow/if_else.h"
17
#include "sdfg/structured_control_flow/sequence.h"
18
#include "sdfg/structured_control_flow/structured_loop.h"
19
#include "sdfg/symbolic/symbolic.h"
20
#include "sdfg/targets/gpu/gpu_schedule_type.h"
21
#include "sdfg/types/array.h"
22
#include "sdfg/types/pointer.h"
23
#include "sdfg/types/scalar.h"
24

25
namespace sdfg {
26
namespace transformations {
27

28
OutLocalStorage::OutLocalStorage(
29
    structured_control_flow::StructuredLoop& loop,
30
    const data_flow::AccessNode& access_node,
31
    const types::StorageType& storage_type
32
)
33
    : loop_(loop), access_node_(access_node), container_(access_node.data()), storage_type_(storage_type) {};
25✔
34

35
std::string OutLocalStorage::name() const { return "OutLocalStorage"; };
5✔
36

37
bool OutLocalStorage::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
23✔
38
    auto& sdfg = builder.subject();
23✔
39
    auto& body = this->loop_.root();
23✔
40

41
    tile_info_ = TileInfo{};
23✔
42

43
    // Criterion: Container must exist
44
    if (!sdfg.exists(this->container_)) {
23✔
45
        return false;
×
46
    }
×
47

48
    auto& type = sdfg.type(this->container_);
23✔
49

50
    // Criterion: Container must be used in the loop body
51
    auto& users = analysis_manager.get<analysis::Users>();
23✔
52
    analysis::UsersView body_users(users, body);
23✔
53
    if (body_users.uses(this->container_).empty()) {
23✔
54
        return false;
2✔
55
    }
2✔
56

57
    // Criterion: Container must have writes (this is OutLocalStorage, not InLocalStorage)
58
    if (body_users.writes(this->container_).empty()) {
21✔
59
        return false;
1✔
60
    }
1✔
61

62
    // Determine if container is also read (read-write vs write-only)
63
    tile_info_.has_read = !body_users.reads(this->container_).empty();
20✔
64

65
    // Handle scalar containers: no tile needed, dimensions stay empty
66
    if (type.type_id() == types::TypeID::Scalar) {
20✔
67
        return true;
1✔
68
    }
1✔
69

70
    // For Array/Pointer types: use MemoryLayoutAnalysis tile API
71
    if (type.type_id() != types::TypeID::Pointer && type.type_id() != types::TypeID::Array) {
19✔
72
        return false;
×
73
    }
×
74

75
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
19✔
76
    auto* tile = mla.tile(loop_, this->container_);
19✔
77
    if (!tile) {
19✔
78
        return false;
1✔
79
    }
1✔
80

81
    // Get overapproximated extents (integer upper bounds)
82
    auto extents = tile->extents_approx();
18✔
83
    if (extents.empty()) {
18✔
84
        return false;
×
85
    }
×
86

87
    // Store tile info (before substitution, bases/strides stay symbolic)
88
    tile_info_.dimensions = extents;
18✔
89
    tile_info_.bases = tile->min_subset;
18✔
90
    tile_info_.strides =
18✔
91
        std::vector<symbolic::Expression>(tile->layout.strides().begin(), tile->layout.strides().end());
18✔
92
    tile_info_.offset = tile->layout.offset();
18✔
93

94
    // GPU shared memory: resolve symbolic extents using GPU block sizes and
95
    // require at least one cooperative dimension
96
    if (storage_type_.is_nv_shared()) {
18✔
97
        auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
6✔
98
        auto ancestors = scope_analysis.ancestor_scopes(&loop_);
6✔
99

100
        // Build substitution map: symbolic GPU map bounds → integer block sizes
101
        for (auto* node : ancestors) {
26✔
102
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
26✔
103
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
10✔
104
                    continue;
×
105
                }
×
106
                auto block_size = gpu::gpu_block_size(ancestor_map->schedule_type());
10✔
107
                // Extract symbolic bound from condition: Lt(indvar, BOUND)
108
                auto condition = ancestor_map->condition();
10✔
109
                if (SymEngine::is_a<SymEngine::StrictLessThan>(*condition)) {
10✔
110
                    auto stl = SymEngine::rcp_static_cast<const SymEngine::StrictLessThan>(condition);
10✔
111
                    auto rhs = stl->get_args()[1];
10✔
112
                    auto iter_count = symbolic::sub(rhs, ancestor_map->init());
10✔
113
                    if (!SymEngine::is_a<SymEngine::Integer>(*iter_count)) {
10✔
114
                        // Symbolic bound — substitute with block size in extents and bases
115
                        for (auto& ext : tile_info_.dimensions) {
17✔
116
                            ext = symbolic::simplify(symbolic::subs(ext, iter_count, block_size));
17✔
117
                        }
17✔
118
                        for (auto& base : tile_info_.bases) {
17✔
119
                            base = symbolic::simplify(symbolic::subs(base, iter_count, block_size));
17✔
120
                        }
17✔
121
                    }
10✔
122
                }
10✔
123
            }
10✔
124
        }
26✔
125

126
        // Criterion: All extents must now be provably integer
127
        for (auto& ext : tile_info_.dimensions) {
10✔
128
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
10✔
129
                return false;
2✔
130
            }
2✔
131
        }
10✔
132

133
        // Criterion: At least one cooperative dimension
134
        bool has_cooperative_dim = false;
4✔
135
        for (auto* node : ancestors) {
12✔
136
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
12✔
137
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
6✔
138
                    continue;
×
139
                }
×
140
                bool appears_in_bases = false;
6✔
141
                for (auto& base : tile_info_.bases) {
9✔
142
                    if (symbolic::uses(base, ancestor_map->indvar())) {
9✔
143
                        appears_in_bases = true;
2✔
144
                        break;
2✔
145
                    }
2✔
146
                }
9✔
147
                if (!appears_in_bases) {
6✔
148
                    has_cooperative_dim = true;
4✔
149
                    break;
4✔
150
                }
4✔
151
            }
6✔
152
        }
12✔
153
        if (!has_cooperative_dim) {
4✔
154
            return false;
×
155
        }
×
156
    } else {
12✔
157
        // CPU path: All extents must be provably integer
158
        for (auto& ext : tile_info_.dimensions) {
17✔
159
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
17✔
160
                return false;
×
161
            }
×
162
        }
17✔
163
    }
12✔
164

165
    return true;
16✔
166
}
18✔
167

168
void OutLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
15✔
169
    auto& sdfg = builder.subject();
15✔
170
    auto& users = analysis_manager.get<analysis::Users>();
15✔
171
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
15✔
172

173
    auto parent_node = scope_analysis.parent_scope(&loop_);
15✔
174
    auto parent = dynamic_cast<structured_control_flow::Sequence*>(parent_node);
15✔
175
    if (!parent) {
15✔
176
        throw InvalidSDFGException("OutLocalStorage: Parent of loop must be a Sequence!");
×
177
    }
×
178

179
    // Get type information
180
    auto& type = sdfg.type(this->container_);
15✔
181
    types::Scalar scalar_type(type.primitive_type());
15✔
182

183
    // Create local buffer name
184
    local_name_ = "__daisy_out_local_storage_" + this->container_;
15✔
185

186
    // ========================================================================
187
    // SCALAR PATH: tile_info_.dimensions is empty
188
    // ========================================================================
189
    if (tile_info_.dimensions.empty()) {
15✔
190
        // Create scalar local buffer
191
        builder.add_container(local_name_, scalar_type);
1✔
192

193
        // Get the access subset from the first user (all scalar, so empty subset)
194
        analysis::UsersView body_users(users, loop_.root());
1✔
195
        auto accesses = body_users.uses(this->container_);
1✔
196
        auto first_access = accesses.at(0);
1✔
197
        auto first_subset = first_access->subsets().at(0);
1✔
198

199
        // Init block (copy from container to local) - before loop
200
        if (tile_info_.has_read) {
1✔
201
            auto& init_block = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
1✔
202
            auto& init_src = builder.add_access(init_block, this->container_);
1✔
203
            auto& init_dst = builder.add_access(init_block, local_name_);
1✔
204
            auto& init_tasklet = builder.add_tasklet(init_block, data_flow::TaskletCode::assign, "_out", {"_in"});
1✔
205
            builder.add_computational_memlet(init_block, init_src, init_tasklet, "_in", first_subset, type);
1✔
206
            builder.add_computational_memlet(init_block, init_tasklet, "_out", init_dst, {}, scalar_type);
1✔
207
        }
1✔
208

209
        // Writeback block (copy from local to container) - after loop
210
        {
1✔
211
            auto& wb_block = builder.add_block_after(*parent, loop_, {}, loop_.debug_info());
1✔
212
            auto& wb_src = builder.add_access(wb_block, local_name_);
1✔
213
            auto& wb_dst = builder.add_access(wb_block, this->container_);
1✔
214
            auto& wb_tasklet = builder.add_tasklet(wb_block, data_flow::TaskletCode::assign, "_out", {"_in"});
1✔
215
            builder.add_computational_memlet(wb_block, wb_src, wb_tasklet, "_in", {}, scalar_type);
1✔
216
            builder.add_computational_memlet(wb_block, wb_tasklet, "_out", wb_dst, first_subset, type);
1✔
217
        }
1✔
218

219
        // Rewrite body accesses to use scalar local
220
        for (auto* user : body_users.uses(this->container_)) {
2✔
221
            auto element = user->element();
2✔
222
            if (auto access = dynamic_cast<data_flow::AccessNode*>(element)) {
2✔
223
                for (auto& iedge : access->get_parent().in_edges(*access)) {
2✔
224
                    auto memlet = &iedge;
1✔
225
                    memlet->set_subset({});
1✔
226
                    memlet->set_base_type(scalar_type);
1✔
227
                }
1✔
228
                for (auto& oedge : access->get_parent().out_edges(*access)) {
2✔
229
                    auto memlet = &oedge;
1✔
230
                    memlet->set_subset({});
1✔
231
                    memlet->set_base_type(scalar_type);
1✔
232
                }
1✔
233
            }
2✔
234
        }
2✔
235

236
        // Replace container name in the loop body
237
        loop_.replace(symbolic::symbol(this->container_), symbolic::symbol(local_name_));
1✔
238
    }
1✔
239
    // ========================================================================
240
    // ARRAY PATH: tile_info_.dimensions is non-empty
241
    // ========================================================================
242
    else {
14✔
243
        // Collect varying dimensions (extent > 1) and compute buffer layout
244
        std::vector<size_t> varying_dims;
14✔
245
        std::vector<symbolic::Expression> dim_sizes;
14✔
246
        for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
35✔
247
            auto& dim_size = tile_info_.dimensions.at(d);
21✔
248
            if (!symbolic::eq(dim_size, symbolic::integer(1))) {
21✔
249
                varying_dims.push_back(d);
16✔
250
                dim_sizes.push_back(dim_size);
16✔
251
            }
16✔
252
        }
21✔
253

254
        // Compute total buffer size
255
        symbolic::Expression total_size = symbolic::integer(1);
14✔
256
        for (auto& ds : dim_sizes) {
16✔
257
            total_size = symbolic::mul(total_size, ds);
16✔
258
        }
16✔
259

260
        // Create the local buffer with specified storage type
261
        types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
14✔
262
        builder.add_container(local_name_, buffer_type);
14✔
263

264
        // Helper: build linearized local index from per-dimension expressions
265
        auto linearize_exprs = [&](const std::vector<symbolic::Expression>& indices) -> symbolic::Expression {
39✔
266
            symbolic::Expression linear_idx = symbolic::integer(0);
39✔
267
            symbolic::Expression stride = symbolic::integer(1);
39✔
268
            for (int i = indices.size() - 1; i >= 0; i--) {
86✔
269
                linear_idx = symbolic::add(linear_idx, symbolic::mul(indices[i], stride));
47✔
270
                stride = symbolic::mul(stride, dim_sizes[i]);
47✔
271
            }
47✔
272
            return linear_idx;
39✔
273
        };
39✔
274

275
        // Helper: build linearized local index from per-dimension indvars (symbols)
276
        auto linearize = [&](const std::vector<symbolic::Symbol>& indvars) -> symbolic::Expression {
17✔
277
            std::vector<symbolic::Expression> exprs(indvars.begin(), indvars.end());
17✔
278
            return linearize_exprs(exprs);
17✔
279
        };
17✔
280

281
        // Helper: build source subset (base[d] + copy_indvar[d]) for original container
282
        bool is_pointer = (type.type_id() == types::TypeID::Pointer);
14✔
283
        auto build_original_subset = [&](const std::vector<symbolic::Expression>& copy_indices) -> data_flow::Subset {
22✔
284
            std::vector<symbolic::Expression> full_indices;
22✔
285
            size_t var_idx = 0;
22✔
286
            for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
55✔
287
                if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
33✔
288
                    full_indices.push_back(symbolic::add(tile_info_.bases.at(d), copy_indices.at(var_idx++)));
26✔
289
                } else {
26✔
290
                    full_indices.push_back(tile_info_.bases.at(d));
7✔
291
                }
7✔
292
            }
33✔
293

294
            if (is_pointer) {
22✔
295
                symbolic::Expression linear = tile_info_.offset;
22✔
296
                for (size_t d = 0; d < full_indices.size(); d++) {
55✔
297
                    linear = symbolic::add(linear, symbolic::mul(tile_info_.strides.at(d), full_indices.at(d)));
33✔
298
                }
33✔
299
                return {linear};
22✔
300
            } else {
22✔
301
                return data_flow::Subset(full_indices.begin(), full_indices.end());
×
302
            }
×
303
        };
22✔
304

305
        if (storage_type_.is_nv_shared()) {
14✔
306
            // ============================================================
307
            // GPU COOPERATIVE PATH
308
            // ============================================================
309
            auto ancestors = scope_analysis.ancestor_scopes(&loop_);
4✔
310

311
            // Collect cooperative GPU dimensions
312
            struct CoopDim {
4✔
313
                symbolic::Symbol indvar;
4✔
314
                symbolic::Integer block_size;
4✔
315
                gpu::GPUDimension dimension;
4✔
316
            };
4✔
317
            std::vector<CoopDim> coop_dims;
4✔
318

319
            for (auto* node : ancestors) {
20✔
320
                if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
20✔
321
                    if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
8✔
322
                        continue;
×
323
                    }
×
324
                    bool appears_in_bases = false;
8✔
325
                    for (auto& base : tile_info_.bases) {
11✔
326
                        if (symbolic::uses(base, ancestor_map->indvar())) {
11✔
327
                            appears_in_bases = true;
3✔
328
                            break;
3✔
329
                        }
3✔
330
                    }
11✔
331
                    if (!appears_in_bases) {
8✔
332
                        coop_dims.push_back(
5✔
333
                            {ancestor_map->indvar(),
5✔
334
                             gpu::gpu_block_size(ancestor_map->schedule_type()),
5✔
335
                             gpu::gpu_dimension(ancestor_map->schedule_type())}
5✔
336
                        );
5✔
337
                    }
5✔
338
                }
8✔
339
            }
20✔
340

341
            // Compute total cooperative thread count
342
            symbolic::Expression total_coop_threads = symbolic::integer(1);
4✔
343
            for (auto& cd : coop_dims) {
5✔
344
                total_coop_threads = symbolic::mul(total_coop_threads, cd.block_size);
5✔
345
            }
5✔
346

347
            // Flatten cooperative thread index
348
            symbolic::Expression coop_flat = symbolic::integer(0);
4✔
349
            symbolic::Expression coop_stride = symbolic::integer(1);
4✔
350
            for (int i = coop_dims.size() - 1; i >= 0; i--) {
9✔
351
                coop_flat = symbolic::add(coop_flat, symbolic::mul(coop_dims[i].indvar, coop_stride));
5✔
352
                coop_stride = symbolic::mul(coop_stride, coop_dims[i].block_size);
5✔
353
            }
5✔
354

355
            // INIT: barrier → cooperative copy-in → barrier (if has_read)
356
            if (tile_info_.has_read) {
4✔
357
                // Barrier before init
358
                auto& barrier_block1 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
1✔
359
                builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block1, {});
1✔
360

361
                // Cooperative copy-in loop
362
                auto idx_name = "__daisy_ols_coop_init_" + this->container_;
1✔
363
                types::Scalar idx_type(types::PrimitiveType::UInt64);
1✔
364
                builder.add_container(idx_name, idx_type);
1✔
365
                auto idx_var = symbolic::symbol(idx_name);
1✔
366

367
                auto& init_loop = builder.add_map_before(
1✔
368
                    *parent,
1✔
369
                    loop_,
1✔
370
                    idx_var,
1✔
371
                    symbolic::Lt(idx_var, total_size),
1✔
372
                    coop_flat,
1✔
373
                    symbolic::add(idx_var, total_coop_threads),
1✔
374
                    structured_control_flow::ScheduleType_Sequential::create(),
1✔
375
                    {},
1✔
376
                    loop_.debug_info()
1✔
377
                );
1✔
378

379
                auto& init_block = builder.add_block(init_loop.root());
1✔
380
                auto& init_src = builder.add_access(init_block, this->container_);
1✔
381
                auto& init_dst = builder.add_access(init_block, local_name_);
1✔
382
                auto& init_tasklet = builder.add_tasklet(init_block, data_flow::TaskletCode::assign, "_out", {"_in"});
1✔
383

384
                // Decompose idx_var into per-dim indices
385
                std::vector<symbolic::Expression> init_indices;
1✔
386
                symbolic::Expression remainder = idx_var;
1✔
387
                for (size_t i = 0; i < dim_sizes.size(); i++) {
2✔
388
                    if (i < dim_sizes.size() - 1) {
1✔
389
                        symbolic::Expression divisor = symbolic::integer(1);
×
390
                        for (size_t j = i + 1; j < dim_sizes.size(); j++) {
×
391
                            divisor = symbolic::mul(divisor, dim_sizes[j]);
×
392
                        }
×
393
                        init_indices.push_back(symbolic::div(remainder, divisor));
×
394
                        remainder = symbolic::mod(remainder, divisor);
×
395
                    } else {
1✔
396
                        init_indices.push_back(remainder);
1✔
397
                    }
1✔
398
                }
1✔
399

400
                auto init_src_subset = build_original_subset(init_indices);
1✔
401
                builder.add_computational_memlet(init_block, init_src, init_tasklet, "_in", init_src_subset, type);
1✔
402
                builder.add_computational_memlet(init_block, init_tasklet, "_out", init_dst, {idx_var}, buffer_type);
1✔
403

404
                // Barrier after init
405
                auto& barrier_block2 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
1✔
406
                builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block2, {});
1✔
407
            }
1✔
408

409
            // WRITEBACK: barrier → cooperative copy-out → barrier
410
            {
4✔
411
                // Barrier before writeback
412
                auto& barrier_block3 = builder.add_block_after(*parent, loop_, {}, loop_.debug_info());
4✔
413
                builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block3, {});
4✔
414

415
                // Cooperative writeback loop
416
                auto idx_name = "__daisy_ols_coop_wb_" + this->container_;
4✔
417
                types::Scalar idx_type(types::PrimitiveType::UInt64);
4✔
418
                builder.add_container(idx_name, idx_type);
4✔
419
                auto idx_var = symbolic::symbol(idx_name);
4✔
420

421
                auto& wb_loop = builder.add_map_after(
4✔
422
                    *parent,
4✔
423
                    loop_,
4✔
424
                    idx_var,
4✔
425
                    symbolic::Lt(idx_var, total_size),
4✔
426
                    coop_flat,
4✔
427
                    symbolic::add(idx_var, total_coop_threads),
4✔
428
                    structured_control_flow::ScheduleType_Sequential::create(),
4✔
429
                    {},
4✔
430
                    loop_.debug_info()
4✔
431
                );
4✔
432

433
                auto& wb_block = builder.add_block(wb_loop.root());
4✔
434
                auto& wb_src = builder.add_access(wb_block, local_name_);
4✔
435
                auto& wb_dst = builder.add_access(wb_block, this->container_);
4✔
436
                auto& wb_tasklet = builder.add_tasklet(wb_block, data_flow::TaskletCode::assign, "_out", {"_in"});
4✔
437

438
                // Decompose idx_var into per-dim indices
439
                std::vector<symbolic::Expression> wb_indices;
4✔
440
                symbolic::Expression remainder = idx_var;
4✔
441
                for (size_t i = 0; i < dim_sizes.size(); i++) {
8✔
442
                    if (i < dim_sizes.size() - 1) {
4✔
443
                        symbolic::Expression divisor = symbolic::integer(1);
×
444
                        for (size_t j = i + 1; j < dim_sizes.size(); j++) {
×
445
                            divisor = symbolic::mul(divisor, dim_sizes[j]);
×
446
                        }
×
447
                        wb_indices.push_back(symbolic::div(remainder, divisor));
×
448
                        remainder = symbolic::mod(remainder, divisor);
×
449
                    } else {
4✔
450
                        wb_indices.push_back(remainder);
4✔
451
                    }
4✔
452
                }
4✔
453

454
                auto wb_dst_subset = build_original_subset(wb_indices);
4✔
455
                builder.add_computational_memlet(wb_block, wb_src, wb_tasklet, "_in", {idx_var}, buffer_type);
4✔
456
                builder.add_computational_memlet(wb_block, wb_tasklet, "_out", wb_dst, wb_dst_subset, type);
4✔
457

458
                // Barrier after writeback
459
                auto& barrier_block4 = builder.add_block_after(*parent, loop_, {}, loop_.debug_info());
4✔
460
                builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block4, {});
4✔
461
            }
4✔
462
        } else {
10✔
463
            // ============================================================
464
            // CPU SEQUENTIAL PATH
465
            // ============================================================
466
            if (tile_info_.has_read) {
10✔
467
                std::vector<symbolic::Symbol> init_indvars;
7✔
468
                structured_control_flow::Sequence* init_scope = parent;
7✔
469
                bool first_init_loop = true;
7✔
470

471
                for (size_t i = 0; i < varying_dims.size(); i++) {
16✔
472
                    size_t d = varying_dims[i];
9✔
473
                    auto indvar_name = "__daisy_ols_init_" + this->container_ + "_d" + std::to_string(d);
9✔
474
                    types::Scalar indvar_type(types::PrimitiveType::UInt64);
9✔
475
                    builder.add_container(indvar_name, indvar_type);
9✔
476
                    auto indvar = symbolic::symbol(indvar_name);
9✔
477
                    init_indvars.push_back(indvar);
9✔
478

479
                    auto init = symbolic::integer(0);
9✔
480
                    auto condition = symbolic::Lt(indvar, dim_sizes[i]);
9✔
481
                    auto update = symbolic::add(indvar, symbolic::integer(1));
9✔
482

483
                    if (first_init_loop) {
9✔
484
                        auto& init_loop = builder.add_map_before(
6✔
485
                            *init_scope,
6✔
486
                            loop_,
6✔
487
                            indvar,
6✔
488
                            condition,
6✔
489
                            init,
6✔
490
                            update,
6✔
491
                            structured_control_flow::ScheduleType_Sequential::create(),
6✔
492
                            {},
6✔
493
                            loop_.debug_info()
6✔
494
                        );
6✔
495
                        init_scope = &init_loop.root();
6✔
496
                        first_init_loop = false;
6✔
497
                    } else {
6✔
498
                        auto& init_loop = builder.add_map(
3✔
499
                            *init_scope,
3✔
500
                            indvar,
3✔
501
                            condition,
3✔
502
                            init,
3✔
503
                            update,
3✔
504
                            structured_control_flow::ScheduleType_Sequential::create(),
3✔
505
                            {},
3✔
506
                            loop_.debug_info()
3✔
507
                        );
3✔
508
                        init_scope = &init_loop.root();
3✔
509
                    }
3✔
510
                }
9✔
511

512
                // Create init copy block
513
                auto& init_block = builder.add_block(*init_scope);
7✔
514
                auto& init_src = builder.add_access(init_block, this->container_);
7✔
515
                auto& init_dst = builder.add_access(init_block, local_name_);
7✔
516
                auto& init_tasklet = builder.add_tasklet(init_block, data_flow::TaskletCode::assign, "_out", {"_in"});
7✔
517

518
                std::vector<symbolic::Expression> init_exprs(init_indvars.begin(), init_indvars.end());
7✔
519
                auto init_src_subset = build_original_subset(init_exprs);
7✔
520
                data_flow::Subset init_dst_subset = {linearize(init_indvars)};
7✔
521

522
                builder.add_computational_memlet(init_block, init_src, init_tasklet, "_in", init_src_subset, type);
7✔
523
                builder
7✔
524
                    .add_computational_memlet(init_block, init_tasklet, "_out", init_dst, init_dst_subset, buffer_type);
7✔
525
            }
7✔
526

527
            // Writeback Maps
528
            {
10✔
529
                std::vector<symbolic::Symbol> wb_indvars;
10✔
530
                structured_control_flow::Sequence* wb_scope = parent;
10✔
531
                bool first_wb_loop = true;
10✔
532

533
                for (size_t i = 0; i < varying_dims.size(); i++) {
22✔
534
                    size_t d = varying_dims[i];
12✔
535
                    auto indvar_name = "__daisy_ols_wb_" + this->container_ + "_d" + std::to_string(d);
12✔
536
                    types::Scalar indvar_type(types::PrimitiveType::UInt64);
12✔
537
                    builder.add_container(indvar_name, indvar_type);
12✔
538
                    auto indvar = symbolic::symbol(indvar_name);
12✔
539
                    wb_indvars.push_back(indvar);
12✔
540

541
                    auto init = symbolic::integer(0);
12✔
542
                    auto condition = symbolic::Lt(indvar, dim_sizes[i]);
12✔
543
                    auto update = symbolic::add(indvar, symbolic::integer(1));
12✔
544

545
                    if (first_wb_loop) {
12✔
546
                        auto& wb_loop = builder.add_map_after(
9✔
547
                            *wb_scope,
9✔
548
                            loop_,
9✔
549
                            indvar,
9✔
550
                            condition,
9✔
551
                            init,
9✔
552
                            update,
9✔
553
                            structured_control_flow::ScheduleType_Sequential::create(),
9✔
554
                            {},
9✔
555
                            loop_.debug_info()
9✔
556
                        );
9✔
557
                        wb_scope = &wb_loop.root();
9✔
558
                        first_wb_loop = false;
9✔
559
                    } else {
9✔
560
                        auto& wb_loop = builder.add_map(
3✔
561
                            *wb_scope,
3✔
562
                            indvar,
3✔
563
                            condition,
3✔
564
                            init,
3✔
565
                            update,
3✔
566
                            structured_control_flow::ScheduleType_Sequential::create(),
3✔
567
                            {},
3✔
568
                            loop_.debug_info()
3✔
569
                        );
3✔
570
                        wb_scope = &wb_loop.root();
3✔
571
                    }
3✔
572
                }
12✔
573

574
                // Create writeback copy block
575
                auto& wb_block = builder.add_block(*wb_scope);
10✔
576
                auto& wb_src = builder.add_access(wb_block, local_name_);
10✔
577
                auto& wb_dst = builder.add_access(wb_block, this->container_);
10✔
578
                auto& wb_tasklet = builder.add_tasklet(wb_block, data_flow::TaskletCode::assign, "_out", {"_in"});
10✔
579

580
                std::vector<symbolic::Expression> wb_exprs(wb_indvars.begin(), wb_indvars.end());
10✔
581
                data_flow::Subset wb_src_subset = {linearize(wb_indvars)};
10✔
582
                auto wb_dst_subset = build_original_subset(wb_exprs);
10✔
583

584
                builder.add_computational_memlet(wb_block, wb_src, wb_tasklet, "_in", wb_src_subset, buffer_type);
10✔
585
                builder.add_computational_memlet(wb_block, wb_tasklet, "_out", wb_dst, wb_dst_subset, type);
10✔
586
            }
10✔
587
        }
10✔
588

589
        // ==================================================================
590
        // Update accesses in the main loop to use the local buffer
591
        // ==================================================================
592
        auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
14✔
593

594
        // Recursive helper to traverse all blocks in the loop body
595
        std::function<void(structured_control_flow::ControlFlowNode&)> rewrite_accesses;
14✔
596
        rewrite_accesses = [&](structured_control_flow::ControlFlowNode& node) {
44✔
597
            if (auto* block = dynamic_cast<structured_control_flow::Block*>(&node)) {
44✔
598
                auto& dfg = block->dataflow();
16✔
599
                for (auto* access : dfg.data_nodes()) {
42✔
600
                    if (access->data() != this->container_) continue;
42✔
601
                    // Rewrite outgoing memlets (reads from this access node)
602
                    for (auto& memlet : dfg.out_edges(*access)) {
22✔
603
                        auto* acc = mla.access(memlet);
8✔
604
                        if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
8✔
605
                            std::vector<symbolic::Expression> local_indices;
8✔
606
                            for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
20✔
607
                                if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
12✔
608
                                    local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
10✔
609
                                }
10✔
610
                            }
12✔
611
                            symbolic::Expression linear_idx = linearize_exprs(local_indices);
8✔
612
                            memlet.set_subset({linear_idx});
8✔
613
                            memlet.set_base_type(buffer_type);
8✔
614
                        }
8✔
615
                    }
8✔
616
                    // Rewrite incoming memlets (writes to this access node)
617
                    for (auto& memlet : dfg.in_edges(*access)) {
22✔
618
                        auto* acc = mla.access(memlet);
14✔
619
                        if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
14✔
620
                            std::vector<symbolic::Expression> local_indices;
14✔
621
                            for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
35✔
622
                                if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
21✔
623
                                    local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
16✔
624
                                }
16✔
625
                            }
21✔
626
                            symbolic::Expression linear_idx = linearize_exprs(local_indices);
14✔
627
                            memlet.set_subset({linear_idx});
14✔
628
                            memlet.set_base_type(buffer_type);
14✔
629
                        }
14✔
630
                    }
14✔
631
                    // Rename the access node to the local buffer
632
                    access->data(local_name_);
22✔
633
                }
22✔
634
            } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
28✔
635
                for (size_t i = 0; i < seq->size(); i++) {
44✔
636
                    rewrite_accesses(seq->at(i).first);
23✔
637
                }
23✔
638
            } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
21✔
639
                rewrite_accesses(loop->root());
7✔
640
            } else if (auto* if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
7✔
NEW
641
                for (size_t i = 0; i < if_else->size(); i++) {
×
NEW
642
                    rewrite_accesses(if_else->at(i).first);
×
643
                }
×
644
            }
×
645
        };
44✔
646
        rewrite_accesses(loop_.root());
14✔
647
    }
14✔
648

649
    // Cleanup
650
    analysis_manager.invalidate_all();
15✔
651

652
    passes::SequenceFusion sf_pass;
15✔
653
    passes::DeadCFGElimination dce_pass;
15✔
654
    bool applies = false;
15✔
655
    do {
15✔
656
        applies = false;
15✔
657
        applies |= dce_pass.run(builder, analysis_manager);
15✔
658
        applies |= sf_pass.run(builder, analysis_manager);
15✔
659
    } while (applies);
15✔
660
};
15✔
661

662
void OutLocalStorage::to_json(nlohmann::json& j) const {
3✔
663
    std::string loop_type;
3✔
664
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
3✔
665
        loop_type = "for";
2✔
666
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
2✔
667
        loop_type = "map";
1✔
668
    } else {
1✔
669
        throw std::runtime_error("Unsupported loop type for serialization of loop: " + loop_.indvar()->get_name());
×
670
    }
×
671
    j["subgraph"] = {
3✔
672
        {"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}},
3✔
673
        {"1", {{"element_id", this->access_node_.element_id()}, {"type", "access_node"}}}
3✔
674
    };
3✔
675
    j["transformation_type"] = this->name();
3✔
676
};
3✔
677

678
OutLocalStorage OutLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
679
    auto loop_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
680
    auto element = builder.find_element_by_id(loop_id);
1✔
681
    if (!element) {
1✔
682
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
683
    }
×
684
    auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(element);
1✔
685

686
    auto access_node = dynamic_cast<
1✔
687
        data_flow::AccessNode*>(builder.find_element_by_id(desc.at("subgraph").at("1").at("element_id").get<size_t>()));
1✔
688
    if (!access_node) {
1✔
689
        throw InvalidTransformationDescriptionException(
×
690
            "Access node with ID " + std::to_string(desc.at("subgraph").at("1").at("element_id").get<size_t>()) +
×
691
            " not found."
×
692
        );
×
693
    }
×
694

695
    return OutLocalStorage(*loop, *access_node);
1✔
696
};
1✔
697

698
} // namespace transformations
699
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc