• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 27237522183

09 Jun 2026 09:38PM UTC coverage: 61.388% (+0.1%) from 61.275%
27237522183

Pull #741

github

web-flow
Merge c8be834b4 into aacd50c09
Pull Request #741: replaces MemAccessRangeAnalysis with MemoryLayoutAnalysis

481 of 523 new or added lines in 12 files covered. (91.97%)

41 existing lines in 10 files now uncovered.

35748 of 58233 relevant lines covered (61.39%)

757.2 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.55
/opt/src/transformations/out_local_storage.cpp
1
#include "sdfg/transformations/out_local_storage.h"
2

3
#include <cstddef>
4
#include <functional>
5
#include <string>
6

7
#include "sdfg/analysis/memory_layout_analysis.h"
8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/analysis/users.h"
10
#include "sdfg/builder/structured_sdfg_builder.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
13
#include "sdfg/data_flow/memlet.h"
14
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
15
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
16
#include "sdfg/structured_control_flow/if_else.h"
17
#include "sdfg/structured_control_flow/sequence.h"
18
#include "sdfg/structured_control_flow/structured_loop.h"
19
#include "sdfg/symbolic/symbolic.h"
20
#include "sdfg/targets/gpu/gpu_schedule_type.h"
21
#include "sdfg/types/array.h"
22
#include "sdfg/types/pointer.h"
23
#include "sdfg/types/scalar.h"
24

25
namespace sdfg {
26
namespace transformations {
27

28
OutLocalStorage::OutLocalStorage(
29
    structured_control_flow::StructuredLoop& loop,
30
    const data_flow::AccessNode& access_node,
31
    const types::StorageType& storage_type
32
)
33
    : loop_(loop), access_node_(access_node), container_(access_node.data()), storage_type_(storage_type) {};
28✔
34

35
std::string OutLocalStorage::name() const { return "OutLocalStorage"; };
5✔
36

37
bool OutLocalStorage::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
26✔
38
    auto& sdfg = builder.subject();
26✔
39
    auto& body = this->loop_.root();
26✔
40

41
    tile_info_ = TileInfo{};
26✔
42

43
    // Criterion: Container must exist
44
    if (!sdfg.exists(this->container_)) {
26✔
45
        return false;
×
46
    }
×
47

48
    auto& type = sdfg.type(this->container_);
26✔
49

50
    // Criterion: Container must be used in the loop body
51
    auto& users = analysis_manager.get<analysis::Users>();
26✔
52
    analysis::UsersView body_users(users, body);
26✔
53
    if (body_users.uses(this->container_).empty()) {
26✔
54
        return false;
2✔
55
    }
2✔
56

57
    // Criterion: Container must have writes (this is OutLocalStorage, not InLocalStorage)
58
    if (body_users.writes(this->container_).empty()) {
24✔
59
        return false;
1✔
60
    }
1✔
61

62
    // Determine if container is also read (read-write vs write-only)
63
    tile_info_.has_read = !body_users.reads(this->container_).empty();
23✔
64

65
    // Handle scalar containers: no tile needed, dimensions stay empty
66
    if (type.type_id() == types::TypeID::Scalar) {
23✔
67
        return true;
1✔
68
    }
1✔
69

70
    // For Array/Pointer types: use MemoryLayoutAnalysis tile group API
71
    if (type.type_id() != types::TypeID::Pointer && type.type_id() != types::TypeID::Array) {
22✔
72
        return false;
×
73
    }
×
74

75
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
22✔
76

77
    // Find a representative memlet from the access node to identify its group.
78
    // An access node may have multiple edges belonging to different tile groups.
79
    // We iterate all edges and select the first one whose tile group is valid
80
    // at the target loop level.
81
    const analysis::MemoryTileGroup* group = nullptr;
22✔
82
    auto& dfg = access_node_.get_parent();
22✔
83
    for (auto& memlet : dfg.in_edges(access_node_)) {
22✔
84
        auto* candidate = mla.tile_group_for(loop_, memlet);
13✔
85
        if (candidate) {
13✔
86
            group = candidate;
13✔
87
            break;
13✔
88
        }
13✔
89
    }
13✔
90
    if (!group) {
22✔
91
        for (auto& memlet : dfg.out_edges(access_node_)) {
9✔
92
            auto* candidate = mla.tile_group_for(loop_, memlet);
9✔
93
            if (candidate) {
9✔
94
                group = candidate;
9✔
95
                break;
9✔
96
            }
9✔
97
        }
9✔
98
    }
9✔
99
    if (!group) {
22✔
UNCOV
100
        return false;
×
UNCOV
101
    }
×
102

103
    auto& tile = group->tile;
22✔
104

105
    // Store group memlets for use in apply()
106
    group_memlets_.clear();
22✔
107
    group_memlets_.insert(group->memlets.begin(), group->memlets.end());
22✔
108

109
    // Get overapproximated extents (integer upper bounds)
110
    auto extents = tile.extents_approx();
22✔
111
    if (extents.empty()) {
22✔
112
        return false;
×
113
    }
×
114
    // Reject if any extent depends on an unbounded leading dimension (returned as null
115
    // by extents_approx). Downstream code (substitution, stride computation) would
116
    // dereference these.
117
    for (auto& ext : extents) {
35✔
118
        if (ext.is_null()) return false;
35✔
119
    }
35✔
120

121
    // Store tile info (before substitution, bases/strides stay symbolic)
122
    tile_info_.dimensions = extents;
22✔
123
    tile_info_.bases = tile.min_subset;
22✔
124
    tile_info_.strides = std::vector<symbolic::Expression>(tile.layout.strides().begin(), tile.layout.strides().end());
22✔
125
    tile_info_.offset = tile.layout.offset();
22✔
126

127
    // GPU shared memory: resolve symbolic extents using GPU block sizes and
128
    // require at least one cooperative dimension
129
    if (storage_type_.is_nv_shared()) {
22✔
130
        auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
6✔
131
        auto ancestors = scope_analysis.ancestor_scopes(&loop_);
6✔
132

133
        // Build substitution map: symbolic GPU map bounds → integer block sizes
134
        for (auto* node : ancestors) {
26✔
135
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
26✔
136
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
10✔
137
                    continue;
×
138
                }
×
139
                auto block_size = gpu::gpu_block_size(ancestor_map->schedule_type());
10✔
140
                // Extract symbolic bound from condition: Lt(indvar, BOUND)
141
                auto condition = ancestor_map->condition();
10✔
142
                if (SymEngine::is_a<SymEngine::StrictLessThan>(*condition)) {
10✔
143
                    auto stl = SymEngine::rcp_static_cast<const SymEngine::StrictLessThan>(condition);
10✔
144
                    auto rhs = stl->get_args()[1];
10✔
145
                    auto iter_count = symbolic::sub(rhs, ancestor_map->init());
10✔
146
                    if (!SymEngine::is_a<SymEngine::Integer>(*iter_count)) {
10✔
147
                        // Symbolic bound — substitute with block size in extents and bases
148
                        for (auto& ext : tile_info_.dimensions) {
17✔
149
                            ext = symbolic::simplify(symbolic::subs(ext, iter_count, block_size));
17✔
150
                        }
17✔
151
                        for (auto& base : tile_info_.bases) {
17✔
152
                            base = symbolic::simplify(symbolic::subs(base, iter_count, block_size));
17✔
153
                        }
17✔
154
                    }
10✔
155
                }
10✔
156
            }
10✔
157
        }
26✔
158

159
        // Criterion: All extents must now be provably integer
160
        for (auto& ext : tile_info_.dimensions) {
10✔
161
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
10✔
162
                return false;
2✔
163
            }
2✔
164
        }
10✔
165

166
        // Criterion: At least one cooperative dimension
167
        bool has_cooperative_dim = false;
4✔
168
        for (auto* node : ancestors) {
12✔
169
            if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
12✔
170
                if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
6✔
171
                    continue;
×
172
                }
×
173
                bool appears_in_bases = false;
6✔
174
                for (auto& base : tile_info_.bases) {
9✔
175
                    if (symbolic::uses(base, ancestor_map->indvar())) {
9✔
176
                        appears_in_bases = true;
2✔
177
                        break;
2✔
178
                    }
2✔
179
                }
9✔
180
                if (!appears_in_bases) {
6✔
181
                    has_cooperative_dim = true;
4✔
182
                    break;
4✔
183
                }
4✔
184
            }
6✔
185
        }
12✔
186
        if (!has_cooperative_dim) {
4✔
187
            return false;
×
188
        }
×
189
    } else {
16✔
190
        // CPU path: All extents must be provably integer
191
        for (auto& ext : tile_info_.dimensions) {
25✔
192
            if (!SymEngine::is_a<SymEngine::Integer>(*ext)) {
25✔
193
                return false;
×
194
            }
×
195
        }
25✔
196
    }
16✔
197

198
    return true;
20✔
199
}
22✔
200

201
void OutLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
19✔
202
    auto& sdfg = builder.subject();
19✔
203
    auto& users = analysis_manager.get<analysis::Users>();
19✔
204
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
19✔
205

206
    auto parent_node = scope_analysis.parent_scope(&loop_);
19✔
207
    auto parent = dynamic_cast<structured_control_flow::Sequence*>(parent_node);
19✔
208
    if (!parent) {
19✔
209
        throw InvalidSDFGException("OutLocalStorage: Parent of loop must be a Sequence!");
×
210
    }
×
211

212
    // Get type information
213
    auto& type = sdfg.type(this->container_);
19✔
214
    types::Scalar scalar_type(type.primitive_type());
19✔
215

216
    // Create local buffer name
217
    local_name_ = builder.find_new_name("__daisy_out_local_storage_" + this->container_);
19✔
218

219
    // ========================================================================
220
    // SCALAR PATH: tile_info_.dimensions is empty
221
    // ========================================================================
222
    if (tile_info_.dimensions.empty()) {
19✔
223
        // Create scalar local buffer
224
        builder.add_container(local_name_, scalar_type);
1✔
225

226
        // Get the access subset from the first user (all scalar, so empty subset)
227
        analysis::UsersView body_users(users, loop_.root());
1✔
228
        auto accesses = body_users.uses(this->container_);
1✔
229
        auto first_access = accesses.at(0);
1✔
230
        auto first_subset = first_access->subsets().at(0);
1✔
231

232
        // Init block (copy from container to local) - before loop
233
        if (tile_info_.has_read) {
1✔
234
            auto& init_block = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
1✔
235
            auto& init_src = builder.add_access(init_block, this->container_);
1✔
236
            auto& init_dst = builder.add_access(init_block, local_name_);
1✔
237
            auto& init_tasklet = builder.add_tasklet(init_block, data_flow::TaskletCode::assign, "_out", {"_in"});
1✔
238
            builder.add_computational_memlet(init_block, init_src, init_tasklet, "_in", first_subset, type);
1✔
239
            builder.add_computational_memlet(init_block, init_tasklet, "_out", init_dst, {}, scalar_type);
1✔
240
        }
1✔
241

242
        // Writeback block (copy from local to container) - after loop
243
        {
1✔
244
            auto& wb_block = builder.add_block_after(*parent, loop_, {}, loop_.debug_info());
1✔
245
            auto& wb_src = builder.add_access(wb_block, local_name_);
1✔
246
            auto& wb_dst = builder.add_access(wb_block, this->container_);
1✔
247
            auto& wb_tasklet = builder.add_tasklet(wb_block, data_flow::TaskletCode::assign, "_out", {"_in"});
1✔
248
            builder.add_computational_memlet(wb_block, wb_src, wb_tasklet, "_in", {}, scalar_type);
1✔
249
            builder.add_computational_memlet(wb_block, wb_tasklet, "_out", wb_dst, first_subset, type);
1✔
250
        }
1✔
251

252
        // Rewrite body accesses to use scalar local
253
        for (auto* user : body_users.uses(this->container_)) {
2✔
254
            auto element = user->element();
2✔
255
            if (auto access = dynamic_cast<data_flow::AccessNode*>(element)) {
2✔
256
                for (auto& iedge : access->get_parent().in_edges(*access)) {
2✔
257
                    auto memlet = &iedge;
1✔
258
                    memlet->set_subset({});
1✔
259
                    memlet->set_base_type(scalar_type);
1✔
260
                }
1✔
261
                for (auto& oedge : access->get_parent().out_edges(*access)) {
2✔
262
                    auto memlet = &oedge;
1✔
263
                    memlet->set_subset({});
1✔
264
                    memlet->set_base_type(scalar_type);
1✔
265
                }
1✔
266
            }
2✔
267
        }
2✔
268

269
        // Replace container name in the loop body
270
        loop_.replace(symbolic::symbol(this->container_), symbolic::symbol(local_name_));
1✔
271
    }
1✔
272
    // ========================================================================
273
    // ARRAY PATH: tile_info_.dimensions is non-empty
274
    // ========================================================================
275
    else {
18✔
276
        // Compute total buffer size
277
        symbolic::Expression total_size = symbolic::integer(1);
18✔
278
        for (auto& ds : tile_info_.dimensions) {
29✔
279
            total_size = symbolic::mul(total_size, ds);
29✔
280
        }
29✔
281

282
        // Create the local buffer with specified storage type
283
        types::Array buffer_type(storage_type_, 0, {}, scalar_type, total_size);
18✔
284
        builder.add_container(local_name_, buffer_type);
18✔
285

286
        // Helper: build linearized local index from per-dimension expressions
287
        auto linearize_exprs = [&](const std::vector<symbolic::Expression>& indices) -> symbolic::Expression {
57✔
288
            symbolic::Expression linear_idx = symbolic::integer(0);
57✔
289
            symbolic::Expression stride = symbolic::integer(1);
57✔
290
            for (int i = indices.size() - 1; i >= 0; i--) {
123✔
291
                linear_idx = symbolic::add(linear_idx, symbolic::mul(indices[i], stride));
66✔
292
                stride = symbolic::mul(stride, tile_info_.dimensions[i]);
66✔
293
            }
66✔
294
            return linear_idx;
57✔
295
        };
57✔
296

297
        // Helper: build linearized local index from per-dimension indvars (symbols)
298
        auto linearize = [&](const std::vector<symbolic::Symbol>& indvars) -> symbolic::Expression {
25✔
299
            std::vector<symbolic::Expression> exprs(indvars.begin(), indvars.end());
25✔
300
            return linearize_exprs(exprs);
25✔
301
        };
25✔
302

303
        // Helper: build source subset (base[d] + copy_indvar[d]) for original container
304
        bool is_pointer = (type.type_id() == types::TypeID::Pointer);
18✔
305
        auto build_original_subset = [&](const std::vector<symbolic::Expression>& copy_indices) -> data_flow::Subset {
30✔
306
            std::vector<symbolic::Expression> full_indices;
30✔
307
            size_t var_idx = 0;
30✔
308
            for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
79✔
309
                if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
49✔
310
                    full_indices.push_back(symbolic::add(tile_info_.bases.at(d), copy_indices.at(var_idx++)));
26✔
311
                } else {
26✔
312
                    full_indices.push_back(tile_info_.bases.at(d));
23✔
313
                }
23✔
314
            }
49✔
315

316
            if (is_pointer) {
30✔
317
                symbolic::Expression linear = tile_info_.offset;
30✔
318
                for (size_t d = 0; d < full_indices.size(); d++) {
79✔
319
                    linear = symbolic::add(linear, symbolic::mul(tile_info_.strides.at(d), full_indices.at(d)));
49✔
320
                }
49✔
321
                return {linear};
30✔
322
            } else {
30✔
323
                return data_flow::Subset(full_indices.begin(), full_indices.end());
×
324
            }
×
325
        };
30✔
326

327
        if (storage_type_.is_nv_shared()) {
18✔
328
            // ============================================================
329
            // GPU COOPERATIVE PATH
330
            // ============================================================
331
            auto ancestors = scope_analysis.ancestor_scopes(&loop_);
4✔
332

333
            // Collect cooperative GPU dimensions
334
            struct CoopDim {
4✔
335
                symbolic::Symbol indvar;
4✔
336
                symbolic::Integer block_size;
4✔
337
                gpu::GPUDimension dimension;
4✔
338
            };
4✔
339
            std::vector<CoopDim> coop_dims;
4✔
340

341
            for (auto* node : ancestors) {
20✔
342
                if (auto* ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
20✔
343
                    if (!gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
8✔
344
                        continue;
×
345
                    }
×
346
                    bool appears_in_bases = false;
8✔
347
                    for (auto& base : tile_info_.bases) {
11✔
348
                        if (symbolic::uses(base, ancestor_map->indvar())) {
11✔
349
                            appears_in_bases = true;
3✔
350
                            break;
3✔
351
                        }
3✔
352
                    }
11✔
353
                    if (!appears_in_bases) {
8✔
354
                        coop_dims.push_back(
5✔
355
                            {ancestor_map->indvar(),
5✔
356
                             gpu::gpu_block_size(ancestor_map->schedule_type()),
5✔
357
                             gpu::gpu_dimension(ancestor_map->schedule_type())}
5✔
358
                        );
5✔
359
                    }
5✔
360
                }
8✔
361
            }
20✔
362

363
            // Compute total cooperative thread count
364
            symbolic::Expression total_coop_threads = symbolic::integer(1);
4✔
365
            for (auto& cd : coop_dims) {
5✔
366
                total_coop_threads = symbolic::mul(total_coop_threads, cd.block_size);
5✔
367
            }
5✔
368

369
            // Flatten cooperative thread index
370
            symbolic::Expression coop_flat = symbolic::integer(0);
4✔
371
            symbolic::Expression coop_stride = symbolic::integer(1);
4✔
372
            for (int i = coop_dims.size() - 1; i >= 0; i--) {
9✔
373
                coop_flat = symbolic::add(coop_flat, symbolic::mul(coop_dims[i].indvar, coop_stride));
5✔
374
                coop_stride = symbolic::mul(coop_stride, coop_dims[i].block_size);
5✔
375
            }
5✔
376

377
            // INIT: barrier → cooperative copy-in → barrier (if has_read)
378
            if (tile_info_.has_read) {
4✔
379
                // Barrier before init
380
                auto& barrier_block1 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
1✔
381
                builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block1, {});
1✔
382

383
                // Cooperative copy-in loop
384
                auto idx_name = builder.find_new_name("__daisy_ols_coop_init_" + this->container_);
1✔
385
                types::Scalar idx_type(types::PrimitiveType::UInt64);
1✔
386
                builder.add_container(idx_name, idx_type);
1✔
387
                auto idx_var = symbolic::symbol(idx_name);
1✔
388

389
                auto& init_loop = builder.add_map_before(
1✔
390
                    *parent,
1✔
391
                    loop_,
1✔
392
                    idx_var,
1✔
393
                    symbolic::Lt(idx_var, total_size),
1✔
394
                    coop_flat,
1✔
395
                    symbolic::add(idx_var, total_coop_threads),
1✔
396
                    structured_control_flow::ScheduleType_Sequential::create(),
1✔
397
                    {},
1✔
398
                    loop_.debug_info()
1✔
399
                );
1✔
400

401
                auto& init_block = builder.add_block(init_loop.root());
1✔
402
                auto& init_src = builder.add_access(init_block, this->container_);
1✔
403
                auto& init_dst = builder.add_access(init_block, local_name_);
1✔
404
                auto& init_tasklet = builder.add_tasklet(init_block, data_flow::TaskletCode::assign, "_out", {"_in"});
1✔
405

406
                // Decompose idx_var into per-dim indices
407
                std::vector<symbolic::Expression> init_indices;
1✔
408
                symbolic::Expression remainder = idx_var;
1✔
409
                for (size_t i = 0; i < tile_info_.dimensions.size(); i++) {
3✔
410
                    if (i < tile_info_.dimensions.size() - 1) {
2✔
411
                        symbolic::Expression divisor = symbolic::integer(1);
1✔
412
                        for (size_t j = i + 1; j < tile_info_.dimensions.size(); j++) {
2✔
413
                            divisor = symbolic::mul(divisor, tile_info_.dimensions[j]);
1✔
414
                        }
1✔
415
                        init_indices.push_back(symbolic::div(remainder, divisor));
1✔
416
                        remainder = symbolic::mod(remainder, divisor);
1✔
417
                    } else {
1✔
418
                        init_indices.push_back(remainder);
1✔
419
                    }
1✔
420
                }
2✔
421

422
                auto init_src_subset = build_original_subset(init_indices);
1✔
423
                builder.add_computational_memlet(init_block, init_src, init_tasklet, "_in", init_src_subset, type);
1✔
424
                builder.add_computational_memlet(init_block, init_tasklet, "_out", init_dst, {idx_var}, buffer_type);
1✔
425

426
                // Barrier after init
427
                auto& barrier_block2 = builder.add_block_before(*parent, loop_, {}, loop_.debug_info());
1✔
428
                builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block2, {});
1✔
429
            }
1✔
430

431
            // WRITEBACK: barrier → cooperative copy-out → barrier
432
            {
4✔
433
                // Barrier before writeback
434
                auto& barrier_block3 = builder.add_block_after(*parent, loop_, {}, loop_.debug_info());
4✔
435
                builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block3, {});
4✔
436

437
                // Cooperative writeback loop
438
                auto idx_name = builder.find_new_name("__daisy_ols_coop_wb_" + this->container_);
4✔
439
                types::Scalar idx_type(types::PrimitiveType::UInt64);
4✔
440
                builder.add_container(idx_name, idx_type);
4✔
441
                auto idx_var = symbolic::symbol(idx_name);
4✔
442

443
                auto& wb_loop = builder.add_map_after(
4✔
444
                    *parent,
4✔
445
                    loop_,
4✔
446
                    idx_var,
4✔
447
                    symbolic::Lt(idx_var, total_size),
4✔
448
                    coop_flat,
4✔
449
                    symbolic::add(idx_var, total_coop_threads),
4✔
450
                    structured_control_flow::ScheduleType_Sequential::create(),
4✔
451
                    {},
4✔
452
                    loop_.debug_info()
4✔
453
                );
4✔
454

455
                auto& wb_block = builder.add_block(wb_loop.root());
4✔
456
                auto& wb_src = builder.add_access(wb_block, local_name_);
4✔
457
                auto& wb_dst = builder.add_access(wb_block, this->container_);
4✔
458
                auto& wb_tasklet = builder.add_tasklet(wb_block, data_flow::TaskletCode::assign, "_out", {"_in"});
4✔
459

460
                // Decompose idx_var into per-dim indices
461
                std::vector<symbolic::Expression> wb_indices;
4✔
462
                symbolic::Expression remainder = idx_var;
4✔
463
                for (size_t i = 0; i < tile_info_.dimensions.size(); i++) {
11✔
464
                    if (i < tile_info_.dimensions.size() - 1) {
7✔
465
                        symbolic::Expression divisor = symbolic::integer(1);
3✔
466
                        for (size_t j = i + 1; j < tile_info_.dimensions.size(); j++) {
6✔
467
                            divisor = symbolic::mul(divisor, tile_info_.dimensions[j]);
3✔
468
                        }
3✔
469
                        wb_indices.push_back(symbolic::div(remainder, divisor));
3✔
470
                        remainder = symbolic::mod(remainder, divisor);
3✔
471
                    } else {
4✔
472
                        wb_indices.push_back(remainder);
4✔
473
                    }
4✔
474
                }
7✔
475

476
                auto wb_dst_subset = build_original_subset(wb_indices);
4✔
477
                builder.add_computational_memlet(wb_block, wb_src, wb_tasklet, "_in", {idx_var}, buffer_type);
4✔
478
                builder.add_computational_memlet(wb_block, wb_tasklet, "_out", wb_dst, wb_dst_subset, type);
4✔
479

480
                // Barrier after writeback
481
                auto& barrier_block4 = builder.add_block_after(*parent, loop_, {}, loop_.debug_info());
4✔
482
                builder.add_library_node<data_flow::BarrierLocalNode>(barrier_block4, {});
4✔
483
            }
4✔
484
        } else {
14✔
485
            // ============================================================
486
            // CPU SEQUENTIAL PATH
487
            // ============================================================
488
            if (tile_info_.has_read) {
14✔
489
                std::vector<symbolic::Symbol> init_indvars;
11✔
490
                structured_control_flow::Sequence* init_scope = parent;
11✔
491
                bool first_init_loop = true;
11✔
492

493
                for (size_t i = 0; i < tile_info_.dimensions.size(); i++) {
29✔
494
                    size_t d = i;
18✔
495
                    auto indvar_name =
18✔
496
                        builder.find_new_name("__daisy_ols_init_" + this->container_ + "_d" + std::to_string(d));
18✔
497
                    types::Scalar indvar_type(types::PrimitiveType::UInt64);
18✔
498
                    builder.add_container(indvar_name, indvar_type);
18✔
499
                    auto indvar = symbolic::symbol(indvar_name);
18✔
500
                    init_indvars.push_back(indvar);
18✔
501

502
                    auto init = symbolic::integer(0);
18✔
503
                    auto condition = symbolic::Lt(indvar, tile_info_.dimensions[i]);
18✔
504
                    auto update = symbolic::add(indvar, symbolic::integer(1));
18✔
505

506
                    if (first_init_loop) {
18✔
507
                        auto& init_loop = builder.add_map_before(
11✔
508
                            *init_scope,
11✔
509
                            loop_,
11✔
510
                            indvar,
11✔
511
                            condition,
11✔
512
                            init,
11✔
513
                            update,
11✔
514
                            structured_control_flow::ScheduleType_Sequential::create(),
11✔
515
                            {},
11✔
516
                            loop_.debug_info()
11✔
517
                        );
11✔
518
                        init_scope = &init_loop.root();
11✔
519
                        first_init_loop = false;
11✔
520
                    } else {
11✔
521
                        auto& init_loop = builder.add_map(
7✔
522
                            *init_scope,
7✔
523
                            indvar,
7✔
524
                            condition,
7✔
525
                            init,
7✔
526
                            update,
7✔
527
                            structured_control_flow::ScheduleType_Sequential::create(),
7✔
528
                            {},
7✔
529
                            loop_.debug_info()
7✔
530
                        );
7✔
531
                        init_scope = &init_loop.root();
7✔
532
                    }
7✔
533
                }
18✔
534

535
                // Create init copy block
536
                auto& init_block = builder.add_block(*init_scope);
11✔
537
                auto& init_src = builder.add_access(init_block, this->container_);
11✔
538
                auto& init_dst = builder.add_access(init_block, local_name_);
11✔
539
                auto& init_tasklet = builder.add_tasklet(init_block, data_flow::TaskletCode::assign, "_out", {"_in"});
11✔
540

541
                std::vector<symbolic::Expression> init_exprs(init_indvars.begin(), init_indvars.end());
11✔
542
                auto init_src_subset = build_original_subset(init_exprs);
11✔
543
                data_flow::Subset init_dst_subset = {linearize(init_indvars)};
11✔
544

545
                builder.add_computational_memlet(init_block, init_src, init_tasklet, "_in", init_src_subset, type);
11✔
546
                builder
11✔
547
                    .add_computational_memlet(init_block, init_tasklet, "_out", init_dst, init_dst_subset, buffer_type);
11✔
548
            }
11✔
549

550
            // Writeback Maps
551
            {
14✔
552
                std::vector<symbolic::Symbol> wb_indvars;
14✔
553
                structured_control_flow::Sequence* wb_scope = parent;
14✔
554
                bool first_wb_loop = true;
14✔
555

556
                for (size_t i = 0; i < tile_info_.dimensions.size(); i++) {
36✔
557
                    size_t d = i;
22✔
558
                    auto indvar_name =
22✔
559
                        builder.find_new_name("__daisy_ols_wb_" + this->container_ + "_d" + std::to_string(d));
22✔
560
                    types::Scalar indvar_type(types::PrimitiveType::UInt64);
22✔
561
                    builder.add_container(indvar_name, indvar_type);
22✔
562
                    auto indvar = symbolic::symbol(indvar_name);
22✔
563
                    wb_indvars.push_back(indvar);
22✔
564

565
                    auto init = symbolic::integer(0);
22✔
566
                    auto condition = symbolic::Lt(indvar, tile_info_.dimensions[i]);
22✔
567
                    auto update = symbolic::add(indvar, symbolic::integer(1));
22✔
568

569
                    if (first_wb_loop) {
22✔
570
                        auto& wb_loop = builder.add_map_after(
14✔
571
                            *wb_scope,
14✔
572
                            loop_,
14✔
573
                            indvar,
14✔
574
                            condition,
14✔
575
                            init,
14✔
576
                            update,
14✔
577
                            structured_control_flow::ScheduleType_Sequential::create(),
14✔
578
                            {},
14✔
579
                            loop_.debug_info()
14✔
580
                        );
14✔
581
                        wb_scope = &wb_loop.root();
14✔
582
                        first_wb_loop = false;
14✔
583
                    } else {
14✔
584
                        auto& wb_loop = builder.add_map(
8✔
585
                            *wb_scope,
8✔
586
                            indvar,
8✔
587
                            condition,
8✔
588
                            init,
8✔
589
                            update,
8✔
590
                            structured_control_flow::ScheduleType_Sequential::create(),
8✔
591
                            {},
8✔
592
                            loop_.debug_info()
8✔
593
                        );
8✔
594
                        wb_scope = &wb_loop.root();
8✔
595
                    }
8✔
596
                }
22✔
597

598
                // Create writeback copy block
599
                auto& wb_block = builder.add_block(*wb_scope);
14✔
600
                auto& wb_src = builder.add_access(wb_block, local_name_);
14✔
601
                auto& wb_dst = builder.add_access(wb_block, this->container_);
14✔
602
                auto& wb_tasklet = builder.add_tasklet(wb_block, data_flow::TaskletCode::assign, "_out", {"_in"});
14✔
603

604
                std::vector<symbolic::Expression> wb_exprs(wb_indvars.begin(), wb_indvars.end());
14✔
605
                data_flow::Subset wb_src_subset = {linearize(wb_indvars)};
14✔
606
                auto wb_dst_subset = build_original_subset(wb_exprs);
14✔
607

608
                builder.add_computational_memlet(wb_block, wb_src, wb_tasklet, "_in", wb_src_subset, buffer_type);
14✔
609
                builder.add_computational_memlet(wb_block, wb_tasklet, "_out", wb_dst, wb_dst_subset, type);
14✔
610
            }
14✔
611
        }
14✔
612

613
        // ==================================================================
614
        // Update accesses in the main loop to use the local buffer
615
        // ==================================================================
616
        auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
18✔
617

618
        // Recursive helper to traverse all blocks in the loop body
619
        std::function<void(structured_control_flow::ControlFlowNode&)> rewrite_accesses;
18✔
620
        rewrite_accesses = [&](structured_control_flow::ControlFlowNode& node) {
53✔
621
            if (auto* block = dynamic_cast<structured_control_flow::Block*>(&node)) {
53✔
622
                auto& dfg = block->dataflow();
21✔
623
                for (auto* access : dfg.data_nodes()) {
58✔
624
                    if (access->data() != this->container_) continue;
58✔
625
                    bool all_rewritten = true;
32✔
626
                    // Rewrite outgoing memlets (reads from this access node)
627
                    for (auto& memlet : dfg.out_edges(*access)) {
32✔
628
                        if (group_memlets_.count(&memlet) == 0) {
13✔
629
                            all_rewritten = false;
×
630
                            continue;
×
631
                        }
×
632
                        auto* acc = mla.access(memlet);
13✔
633
                        if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
13✔
634
                            std::vector<symbolic::Expression> local_indices;
13✔
635
                            for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
35✔
636
                                if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
22✔
637
                                    local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
10✔
638
                                }
10✔
639
                            }
22✔
640
                            symbolic::Expression linear_idx = linearize_exprs(local_indices);
13✔
641
                            memlet.set_subset({linear_idx});
13✔
642
                            memlet.set_base_type(buffer_type);
13✔
643
                        }
13✔
644
                    }
13✔
645
                    // Rewrite incoming memlets (writes to this access node)
646
                    for (auto& memlet : dfg.in_edges(*access)) {
32✔
647
                        if (group_memlets_.count(&memlet) == 0) {
19✔
648
                            all_rewritten = false;
×
649
                            continue;
×
650
                        }
×
651
                        auto* acc = mla.access(memlet);
19✔
652
                        if (acc && acc->subset.size() == tile_info_.dimensions.size()) {
19✔
653
                            std::vector<symbolic::Expression> local_indices;
19✔
654
                            for (size_t d = 0; d < tile_info_.dimensions.size(); d++) {
50✔
655
                                if (!symbolic::eq(tile_info_.dimensions.at(d), symbolic::integer(1))) {
31✔
656
                                    local_indices.push_back(symbolic::sub(acc->subset.at(d), tile_info_.bases.at(d)));
16✔
657
                                }
16✔
658
                            }
31✔
659
                            symbolic::Expression linear_idx = linearize_exprs(local_indices);
19✔
660
                            memlet.set_subset({linear_idx});
19✔
661
                            memlet.set_base_type(buffer_type);
19✔
662
                        }
19✔
663
                    }
19✔
664
                    // Rename the access node only if all its memlets belong to our group
665
                    if (all_rewritten) {
32✔
666
                        access->data(local_name_);
32✔
667
                    }
32✔
668
                }
32✔
669
            } else if (auto* seq = dynamic_cast<structured_control_flow::Sequence*>(&node)) {
32✔
670
                for (size_t i = 0; i < seq->size(); i++) {
53✔
671
                    rewrite_accesses(seq->at(i).first);
28✔
672
                }
28✔
673
            } else if (auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(&node)) {
25✔
674
                rewrite_accesses(loop->root());
7✔
675
            } else if (auto* if_else = dynamic_cast<structured_control_flow::IfElse*>(&node)) {
7✔
676
                for (size_t i = 0; i < if_else->size(); i++) {
×
677
                    rewrite_accesses(if_else->at(i).first);
×
678
                }
×
679
            }
×
680
        };
53✔
681
        rewrite_accesses(loop_.root());
18✔
682
    }
18✔
683

684
    // Cleanup
685
    analysis_manager.invalidate_all();
19✔
686

687
    passes::SequenceFusion sf_pass;
19✔
688
    passes::DeadCFGElimination dce_pass;
19✔
689
    bool applies = false;
19✔
690
    do {
25✔
691
        applies = false;
25✔
692
        applies |= dce_pass.run(builder, analysis_manager);
25✔
693
        applies |= sf_pass.run(builder, analysis_manager);
25✔
694
    } while (applies);
25✔
695
};
19✔
696

697
void OutLocalStorage::to_json(nlohmann::json& j) const {
3✔
698
    std::string loop_type;
3✔
699
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
3✔
700
        loop_type = "for";
2✔
701
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
2✔
702
        loop_type = "map";
1✔
703
    } else {
1✔
704
        throw std::runtime_error("Unsupported loop type for serialization of loop: " + loop_.indvar()->get_name());
×
705
    }
×
706
    j["subgraph"] = {
3✔
707
        {"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}},
3✔
708
        {"1", {{"element_id", this->access_node_.element_id()}, {"type", "access_node"}}}
3✔
709
    };
3✔
710
    j["transformation_type"] = this->name();
3✔
711
};
3✔
712

713
OutLocalStorage OutLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
714
    auto loop_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
715
    auto element = builder.find_element_by_id(loop_id);
1✔
716
    if (!element) {
1✔
717
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
718
    }
×
719
    auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(element);
1✔
720

721
    auto access_node = dynamic_cast<
1✔
722
        data_flow::AccessNode*>(builder.find_element_by_id(desc.at("subgraph").at("1").at("element_id").get<size_t>()));
1✔
723
    if (!access_node) {
1✔
724
        throw InvalidTransformationDescriptionException(
×
725
            "Access node with ID " + std::to_string(desc.at("subgraph").at("1").at("element_id").get<size_t>()) +
×
726
            " not found."
×
727
        );
×
728
    }
×
729

730
    return OutLocalStorage(*loop, *access_node);
1✔
731
};
1✔
732

733
} // namespace transformations
734
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc