• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 23053141451

13 Mar 2026 01:30PM UTC coverage: 63.617% (-0.1%) from 63.722%
23053141451

push

github

web-flow
Merge pull request #579 from daisytuner/SkipNestedTiling

Skip nested GPU tiling on the same container to avoid redefinitions o…

5 of 7 new or added lines in 2 files covered. (71.43%)

22 existing lines in 3 files now uncovered.

25200 of 39612 relevant lines covered (63.62%)

400.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.15
/opt/src/transformations/offloading/kernel_local_storage.cpp
1
#include "sdfg/transformations/offloading/kernel_local_storage.h"
2

3
#include <set>
4
#include <string>
5
#include <tuple>
6
#include <vector>
7

8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/analysis/type_analysis.h"
10
#include "sdfg/builder/structured_sdfg_builder.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/library_node.h"
13
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
14
#include "sdfg/data_flow/tasklet.h"
15
#include "sdfg/exceptions.h"
16
#include "sdfg/passes/dataflow/trivial_array_elimination.h"
17
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
18
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
19
#include "sdfg/serializer/json_serializer.h"
20
#include "sdfg/structured_control_flow/control_flow_node.h"
21
#include "sdfg/structured_control_flow/for.h"
22
#include "sdfg/structured_control_flow/if_else.h"
23
#include "sdfg/structured_control_flow/map.h"
24
#include "sdfg/structured_control_flow/sequence.h"
25
#include "sdfg/structured_control_flow/structured_loop.h"
26
#include "sdfg/symbolic/polynomials.h"
27
#include "sdfg/symbolic/symbolic.h"
28
#include "sdfg/targets/gpu/gpu_schedule_type.h"
29
#include "sdfg/transformations/utils.h"
30
#include "sdfg/types/array.h"
31
#include "sdfg/types/structure.h"
32
#include "sdfg/types/type.h"
33
#include "sdfg/types/utils.h"
34
#include "symengine/symengine_rcp.h"
35

36
namespace sdfg {
37
namespace transformations {
38

39
KernelLocalStorage::KernelLocalStorage(
40
    structured_control_flow::StructuredLoop& loop, symbolic::Expression offset, const std::string& container
41
)
42
    : loop_(loop), offset_(offset), container_(container) {};
22✔
43

44
std::string KernelLocalStorage::name() const { return "KernelLocalStorage"; };
3✔
45

46
bool KernelLocalStorage::reads_container(std::string container, analysis::UsersView& body_users) {
×
47
    if (body_users.reads(container).size() == 1) {
×
48
        return true;
×
49
    }
×
50
    return false;
×
51
}
×
52

53
bool KernelLocalStorage::uses_inner_indvar(analysis::UsersView& body_users) {
×
54
    bool result = false;
×
55
    for (auto& user : body_users.reads(this->container_)) {
×
56
        auto& subsets = user->subsets();
×
57
        if (subsets.size() == 0) {
×
58
            continue;
×
59
        }
×
60
        if (subsets.size() == 1) { // TODO: Handle multiple subsets
×
61
            for (auto access : subsets.at(0)) {
×
62
                result |= symbolic::uses(access, loop_.indvar());
×
63
            }
×
64
        }
×
65
    }
×
66
    return result;
×
67
};
×
68

69
std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer> KernelLocalStorage::
70
    dim_size(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
32✔
71
    symbolic::Integer x_dim_size = symbolic::one();
32✔
72
    symbolic::Integer y_dim_size = symbolic::one();
32✔
73
    symbolic::Integer z_dim_size = symbolic::one();
32✔
74

75
    for (auto node : ancestors) {
214✔
76
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
214✔
77
            auto schedule_type = ancestor_map->schedule_type();
64✔
78
            if (!gpu::is_gpu_schedule(schedule_type)) {
64✔
79
                continue;
×
80
            }
×
81
            auto dim = gpu::gpu_dimension(schedule_type);
64✔
82
            if (dim == gpu::GPUDimension::X) {
64✔
83
                x_dim_size = gpu::gpu_block_size(schedule_type);
32✔
84
            } else if (dim == gpu::GPUDimension::Y) {
32✔
85
                y_dim_size = gpu::gpu_block_size(schedule_type);
32✔
86
            } else if (dim == gpu::GPUDimension::Z) {
32✔
87
                z_dim_size = gpu::gpu_block_size(schedule_type);
×
88
            } else {
×
89
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
90
            }
×
91
        }
64✔
92
    }
214✔
93

94
    return {x_dim_size, y_dim_size, z_dim_size};
32✔
95
};
32✔
96

97
std::tuple<symbolic::Symbol, symbolic::Symbol, symbolic::Symbol> KernelLocalStorage::
98
    dim_indvars(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
24✔
99
    symbolic::Symbol x_dim_indvar = SymEngine::null;
24✔
100
    symbolic::Symbol y_dim_indvar = SymEngine::null;
24✔
101
    symbolic::Symbol z_dim_indvar = SymEngine::null;
24✔
102

103
    for (auto node : ancestors) {
158✔
104
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
158✔
105
            auto schedule_type = ancestor_map->schedule_type();
48✔
106
            if (!gpu::is_gpu_schedule(schedule_type)) {
48✔
107
                continue;
×
108
            }
×
109
            auto dim = gpu::gpu_dimension(schedule_type);
48✔
110
            if (dim == gpu::GPUDimension::X) {
48✔
111
                x_dim_indvar = ancestor_map->indvar();
24✔
112
            } else if (dim == gpu::GPUDimension::Y) {
24✔
113
                y_dim_indvar = ancestor_map->indvar();
24✔
114
            } else if (dim == gpu::GPUDimension::Z) {
24✔
115
                z_dim_indvar = ancestor_map->indvar();
×
116
            } else {
×
117
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
118
            }
×
119
        }
48✔
120
    }
158✔
121

122
    return {x_dim_indvar, y_dim_indvar, z_dim_indvar};
24✔
123
}
24✔
124

125
std::tuple<bool, bool, bool> KernelLocalStorage::
126
    available_dims(std::vector<symbolic::Expression> subsets, analysis::AnalysisManager& analysis_manager) {
14✔
127
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
14✔
128
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
14✔
129

130
    symbolic::Integer iteration_count = get_iteration_count(loop_);
14✔
131

132
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
14✔
133
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
14✔
134

135
    bool x_dim_available = (x_dim_indvar != SymEngine::null);
14✔
136
    bool y_dim_available = (y_dim_indvar != SymEngine::null);
14✔
137
    bool z_dim_available = (z_dim_indvar != SymEngine::null);
14✔
138

139
    if (x_dim_available) {
14✔
140
        bool x_used = false;
14✔
141
        for (auto subset : subsets) {
28✔
142
            for (auto atom : symbolic::atoms(subset)) {
28✔
143
                if (symbolic::eq(atom, x_dim_indvar)) {
28✔
144
                    x_used = true;
10✔
145
                }
10✔
146
            }
28✔
147
        }
28✔
148
        if (x_used) {
14✔
149
            x_dim_available = false;
10✔
150
        }
10✔
151
    }
14✔
152
    if (y_dim_available) {
14✔
153
        bool y_used = false;
14✔
154
        for (auto subset : subsets) {
28✔
155
            for (auto atom : symbolic::atoms(subset)) {
28✔
156
                if (symbolic::eq(atom, y_dim_indvar)) {
28✔
157
                    y_used = true;
4✔
158
                }
4✔
159
            }
28✔
160
        }
28✔
161
        if (y_used) {
14✔
162
            y_dim_available = false;
4✔
163
        }
4✔
164
    }
14✔
165
    if (z_dim_available) {
14✔
166
        bool z_used = false;
×
167
        for (auto subset : subsets) {
×
168
            for (auto atom : symbolic::atoms(subset)) {
×
169
                if (symbolic::eq(atom, z_dim_indvar)) {
×
170
                    z_used = true;
×
171
                }
×
172
            }
×
173
        }
×
174
        if (z_used) {
×
175
            z_dim_available = false;
×
176
        }
×
177
    }
×
178

179
    if (x_dim_available) {
14✔
180
        auto cond = symbolic::Ge(x_dim_size, iteration_count);
4✔
181
        if (symbolic::is_true(cond)) {
4✔
182
            x_dim_available = true;
4✔
183
        }
4✔
184
    }
4✔
185
    if (y_dim_available) {
14✔
186
        auto cond = symbolic::Ge(y_dim_size, iteration_count);
10✔
187
        if (symbolic::is_true(cond)) {
10✔
188
            y_dim_available = true;
10✔
189
        }
10✔
190
    }
10✔
191
    if (z_dim_available) {
14✔
192
        auto cond = symbolic::Ge(z_dim_size, iteration_count);
×
193
        if (symbolic::is_true(cond)) {
×
194
            z_dim_available = true;
×
195
        }
×
196
    }
×
197

198
    return {x_dim_available, y_dim_available, z_dim_available};
14✔
199
}
14✔
200

201
bool KernelLocalStorage::
202
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
20✔
203
    auto& sdfg = builder.subject();
20✔
204

205
    // Criterion: transformation cannot be applied twice on the same container
206
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
20✔
207
    std::string shared_container_name = "__daisy_shared_" + container_;
20✔
208
    if (containers.find(shared_container_name) != containers.end()) {
20✔
NEW
209
        return false;
×
NEW
210
    }
×
211

212
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
20✔
213
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
20✔
214

215
    // Criterion: Must not be a GPU map itself
216
    if (auto loop_map = dynamic_cast<structured_control_flow::Map*>(&loop_)) {
20✔
217
        if (gpu::is_gpu_schedule(loop_map->schedule_type())) {
6✔
218
            return false;
6✔
219
        }
6✔
220
    }
6✔
221

222
    // Criterion: Must be nested in a GPU schedule
223
    bool is_gpu_scope = false;
14✔
224
    for (auto ancestor : ancestors) {
96✔
225
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
96✔
226
            if (gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
28✔
227
                is_gpu_scope = true;
28✔
228
            } else if (ancestor_map->schedule_type().value() == ScheduleType_Sequential::value()) {
28✔
229
                continue;
×
230
            } else {
×
231
                return false;
×
232
            }
×
233
        }
28✔
234
    }
96✔
235
    if (!is_gpu_scope) {
14✔
236
        return false;
×
237
    }
×
238

239
    auto& inner_body = this->loop_.root();
14✔
240

241
    // Criterion: Container is contiguous (Maybe can be relaxed later)
242
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
14✔
243
    auto type = type_analysis.get_outer_type(container_);
14✔
244
    auto& peeled_type = types::peel_to_innermost_element(*type);
14✔
245
    if (peeled_type.type_id() == types::TypeID::Pointer) {
14✔
246
        return false;
×
247
    }
×
248

249

250
    // Criterion: Iteration count is known and an Integer
251
    symbolic::Integer iteration_count = get_iteration_count(loop_);
14✔
252
    if (iteration_count == SymEngine::null) {
14✔
253
        return false;
×
254
    }
×
255

256
    // Criterion: All block dimensions are known and an Integer
257
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
14✔
258
    if (x_dim_size == SymEngine::null || y_dim_size == SymEngine::null || z_dim_size == SymEngine::null) {
14✔
259
        return false;
×
260
    }
×
261

262
    // Criteria related to memory accesses
263
    auto& users = analysis_manager.get<analysis::Users>();
14✔
264
    analysis::UsersView inner_body_users(users, inner_body);
14✔
265

266
    // Criterion: Container is read-only
267
    if (!inner_body_users.writes(this->container_).empty() || !inner_body_users.views(this->container_).empty() ||
14✔
268
        !inner_body_users.moves(this->container_).empty()) {
14✔
269
        return false;
2✔
270
    }
2✔
271
    if (inner_body_users.reads(this->container_).empty()) {
12✔
272
        return false;
×
273
    }
×
274

275
    // Collect moving symbols
276

277
    // Criterion: Memory accesses do not depend on moving symbols
278
    for (auto& user : inner_body_users.uses(this->container_)) {
20✔
279
        auto& subsets = user->subsets();
20✔
280
        for (auto& subset : subsets) {
20✔
281
            for (auto& expr : subset) {
20✔
282
                for (auto& atom : symbolic::atoms(expr)) {
12✔
283
                    if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
12✔
284
                        auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
12✔
285
                        if (!inner_body_users.moves(symbol->get_name()).empty()) {
12✔
286
                            return false;
×
287
                        }
×
288
                    }
12✔
289
                }
12✔
290
            }
12✔
291
        }
20✔
292
    }
20✔
293

294
    // Criterion: Check if all memory accesses are affine w.r.t the inner loop index
295

296
    // Limitations: single memory access
297
    if (inner_body_users.reads(this->container_).size() != 1) {
12✔
298
        return false;
6✔
299
    }
6✔
300
    auto read = inner_body_users.reads(this->container_).at(0);
6✔
301
    if (read->subsets().size() != 1) {
6✔
302
        return false;
×
303
    }
×
304
    auto subsets = read->subsets().at(0);
6✔
305

306
    // Criterion: more than one dimension is available.
307
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
6✔
308
    symbolic::SymbolVec indvars;
6✔
309
    if (x_dim_indvar != SymEngine::null) {
6✔
310
        indvars.push_back(x_dim_indvar);
6✔
311
    }
6✔
312
    if (y_dim_indvar != SymEngine::null) {
6✔
313
        indvars.push_back(y_dim_indvar);
6✔
314
    }
6✔
315
    if (z_dim_indvar != SymEngine::null) {
6✔
316
        indvars.push_back(z_dim_indvar);
×
317
    }
×
318

319
    if (indvars.size() <= 1) {
6✔
320
        return false;
×
321
    }
×
322

323
    indvars.push_back(loop_.indvar());
6✔
324

325
    // Criterion: Memory access is polynomial of
326
    // c_0 * a + c_1 * b + c_2 * c + c_3 * k, where a, b, c are x-threads, y-threads, z-threads
327
    // and k is the inner loop index
328

329
    for (auto subset : subsets) {
12✔
330
        if (symbolic::polynomial(subset, indvars) == SymEngine::null) {
12✔
331
            return false;
×
332
        }
×
333
    }
12✔
334

335
    // Criterion: inner indvar is used in memory access
336
    bool uses_inner_indvar = false;
6✔
337
    for (auto subset : subsets) {
12✔
338
        for (auto atom : symbolic::atoms(subset)) {
12✔
339
            if (symbolic::eq(atom, loop_.indvar())) {
12✔
340
                uses_inner_indvar = true;
6✔
341
            }
6✔
342
        }
12✔
343
    }
12✔
344
    if (!uses_inner_indvar) {
6✔
345
        return false;
×
346
    }
×
347

348
    // Criterion: Has a free dimension to map to and that dimension is big enough
349
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
6✔
350

351
    if (!x_dim_available && !y_dim_available && !z_dim_available) {
6✔
352
        return false;
×
353
    }
×
354

355
    return true;
6✔
356
};
6✔
357

358
void KernelLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
4✔
359
    auto& sdfg = builder.subject();
4✔
360

361
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
4✔
362
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
4✔
363

364
    auto& users = analysis_manager.get<analysis::Users>();
4✔
365

366
    auto& inner_body = this->loop_.root();
4✔
367
    analysis::UsersView inner_body_users(users, inner_body);
4✔
368

369
    // Detect GPU backend from ancestor map schedule types
370
    bool is_rocm = false;
4✔
371
    for (auto node : ancestors) {
26✔
372
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
26✔
373
            if (ancestor_map->schedule_type().value() == "ROCM") {
8✔
374
                is_rocm = true;
×
375
                break;
×
376
            }
×
377
        }
8✔
378
    }
26✔
379

380
    std::string thread_prefix = is_rocm ? "__daisy_hip_thread_idx_" : "__daisy_cuda_thread_idx_";
4✔
381
    std::string x_name = thread_prefix + "x";
4✔
382
    std::string y_name = thread_prefix + "y";
4✔
383
    std::string z_name = thread_prefix + "z";
4✔
384
    symbolic::Symbol x_symbol = symbolic::symbol(x_name);
4✔
385
    symbolic::Symbol y_symbol = symbolic::symbol(y_name);
4✔
386
    symbolic::Symbol z_symbol = symbolic::symbol(z_name);
4✔
387

388
    auto index_type = types::Scalar(types::PrimitiveType::Int32);
4✔
389
    index_type.storage_type(types::StorageType::NV_Symbol());
4✔
390

391
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
4✔
392
    if (containers.find(x_name) == containers.end()) {
4✔
393
        builder.add_container(x_name, index_type);
3✔
394
    }
3✔
395
    if (containers.find(y_name) == containers.end()) {
4✔
396
        builder.add_container(y_name, index_type);
3✔
397
    }
3✔
398
    if (containers.find(z_name) == containers.end()) {
4✔
399
        builder.add_container(z_name, index_type);
3✔
400
    }
3✔
401

402
    /**
403
        1. Add new shared memory container
404
        2. Add barrier before loop
405
        3. add copyin branch before loop
406
        4. Add barrier before loop
407
        5. replace container in loop
408
        6. replace subset expressions in loop
409
    */
410

411
    symbolic::Integer iteration_count = get_iteration_count(loop_);
4✔
412

413
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
4✔
414
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
4✔
415

416
    auto parent = scope_analysis.parent_scope(&loop_);
4✔
417
    auto parent_seq = static_cast<structured_control_flow::Sequence*>(parent);
4✔
418
    auto& seq = builder.add_sequence_before(*parent_seq, loop_, {}, loop_.debug_info());
4✔
419

420
    // 1. Add new shared memory container
421
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
4✔
422
    auto type = type_analysis.get_outer_type(container_);
4✔
423
    auto& peeled_type = types::peel_to_innermost_element(*type);
4✔
424
    auto read = inner_body_users.reads(this->container_).at(0);
4✔
425
    auto subsets = read->subsets().at(0);
4✔
426

427
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
4✔
428

429
    // get free dim
430
    symbolic::Symbol target_dim;
4✔
431
    auto [dim_x, dim_y, dim_z] = available_dims(subsets, analysis_manager);
4✔
432

433
    if (dim_x) {
4✔
434
        target_dim = x_symbol;
1✔
435
    } else if (dim_y) {
3✔
436
        target_dim = y_symbol;
3✔
437
    } else if (dim_z) {
3✔
438
        target_dim = z_symbol;
×
439
    } else {
×
440
        throw InvalidSDFGException("No available GPU tiling dimension found!");
×
441
    }
×
442

443
    // std::unique_ptr<types::IType> element_type;
444

445
    // if (peeled_type.type_id() == types::TypeID::Structure) {
446
    //     auto struct_type = static_cast<const types::Structure&>(peeled_type);
447
    //     types::Structure new_struct_type(
448
    //         types::StorageType::NV_Shared(), 8, {}, struct_type.name()
449
    //     );
450
    //     element_type = new_struct_type.clone();
451
    // } else if (peeled_type.type_id() == types::TypeID::Scalar) {
452
    //     auto scalar_type = static_cast<const types::Scalar&>(peeled_type);
453
    //     types::Scalar new_scalar_type(
454
    //         types::StorageType::NV_Shared(), 8, {}, scalar_type.primitive_type()
455
    //     );
456
    //     element_type = new_scalar_type.clone();
457
    // } else {
458
    //     throw InvalidSDFGException(
459
    //         "Unsupported peeled type for KernelLocalStorage."
460
    //     );
461
    // }
462

463
    auto generic_storage = is_rocm ? types::StorageType("AMD_Generic") : types::StorageType::NV_Generic();
4✔
464

465
    types::Array tile_array_type(types::StorageType::NV_Shared(), 8, {}, peeled_type, iteration_count);
4✔
466
    types::Array z_array_type(generic_storage, 8, {}, tile_array_type, z_dim_size);
4✔
467
    types::Array* pred_y;
4✔
468
    if (symbolic::eq(target_dim, z_symbol)) {
4✔
469
        pred_y = &tile_array_type;
×
470
    } else {
4✔
471
        pred_y = &z_array_type;
4✔
472
    }
4✔
473
    types::Array y_array_type(generic_storage, 8, {}, *pred_y, y_dim_size);
4✔
474
    types::Array* pred_x;
4✔
475
    if (symbolic::eq(target_dim, y_symbol)) {
4✔
476
        pred_x = &z_array_type;
3✔
477
    } else {
3✔
478
        pred_x = &y_array_type;
1✔
479
    }
1✔
480
    types::Array x_array_type(generic_storage, 8, {}, *pred_x, x_dim_size);
4✔
481
    types::Array* final_type;
4✔
482
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
483
        final_type = &y_array_type;
1✔
484
    } else {
3✔
485
        final_type = &x_array_type;
3✔
486
    }
3✔
487

488
    std::string shared_container_name = "__daisy_shared_" + container_;
4✔
489
    builder.add_container(shared_container_name, *final_type);
4✔
490

491
    // 2. Add barrier before loop
492
    auto& sync_block1 = builder.add_block(seq);
4✔
493

494
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block1, {});
4✔
495

496
    // 3. add copyin branch before loop
497
    auto& if_else = builder.add_if_else(seq);
4✔
498

499
    auto condition = symbolic::subs(loop_.condition(), loop_.indvar(), symbolic::add(target_dim, offset_));
4✔
500
    auto& branch = builder.add_case(if_else, condition);
4✔
501

502
    auto& copyin_block = builder.add_block(branch);
4✔
503

504
    auto& access_in = builder.add_access(copyin_block, container_);
4✔
505
    auto& access_out = builder.add_access(copyin_block, shared_container_name);
4✔
506

507
    auto& tasklet = builder.add_tasklet(copyin_block, data_flow::TaskletCode::assign, "out_", {"in_"});
4✔
508

509
    std::vector<symbolic::Expression> copyin_subsets;
4✔
510
    for (auto subset : subsets) {
8✔
511
        auto substituted = symbolic::subs(subset, loop_.indvar(), symbolic::add(target_dim, offset_));
8✔
512
        copyin_subsets.push_back(substituted);
8✔
513
    }
8✔
514

515
    builder.add_computational_memlet(copyin_block, access_in, tasklet, "in_", copyin_subsets, *type);
4✔
516

517
    std::vector<symbolic::Expression> shared_access_subsets = {x_symbol, y_symbol, z_symbol, target_dim};
4✔
518

519
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
520
        shared_access_subsets.erase(shared_access_subsets.begin());
1✔
521
    } else if (symbolic::eq(target_dim, y_symbol)) {
3✔
522
        shared_access_subsets.erase(shared_access_subsets.begin() + 1);
3✔
523
    } else if (symbolic::eq(target_dim, z_symbol)) {
3✔
524
        shared_access_subsets.erase(shared_access_subsets.begin() + 2);
×
525
    }
×
526

527
    builder.add_computational_memlet(copyin_block, tasklet, "out_", access_out, shared_access_subsets);
4✔
528

529
    // 4. Add barrier before loop
530

531
    auto& sync_block2 = builder.add_block(seq);
4✔
532

533
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block2, {});
4✔
534

535
    // 5. replace container in loop
536
    loop_.replace(symbolic::symbol(container_), symbolic::symbol(shared_container_name));
4✔
537

538
    // 6. replace subset expressions in loop
539
    std::vector<symbolic::Expression> read_shared_access_subsets;
4✔
540
    symbolic::Expression substituted_dimension;
4✔
541
    for (auto& subset : shared_access_subsets) {
12✔
542
        auto substituted = symbolic::subs(subset, target_dim, symbolic::sub(loop_.indvar(), offset_));
12✔
543
        read_shared_access_subsets.push_back(substituted);
12✔
544
    }
12✔
545

546
    auto access_node = static_cast<data_flow::AccessNode*>(read->element());
4✔
547
    for (auto& oedge : access_node->get_parent().out_edges(*access_node)) {
4✔
548
        oedge.set_subset(read_shared_access_subsets);
4✔
549
        oedge.set_base_type(*final_type);
4✔
550
    }
4✔
551

552
    // End of transformation
553

554
    analysis_manager.invalidate_all();
4✔
555

556
    passes::SequenceFusion sf_pass;
4✔
557
    passes::DeadCFGElimination dce_pass;
4✔
558
    passes::TrivialArrayElimination tae_pass;
4✔
559
    bool applies = false;
4✔
560
    do {
8✔
561
        applies = false;
8✔
562
        applies |= dce_pass.run(builder, analysis_manager);
8✔
563
        applies |= sf_pass.run(builder, analysis_manager);
8✔
564
        applies |= tae_pass.run(builder, analysis_manager);
8✔
565
    } while (applies);
8✔
566
};
4✔
567

568
void KernelLocalStorage::to_json(nlohmann::json& j) const {
1✔
569
    j["transformation_type"] = this->name();
1✔
570

571
    std::string loop_type;
1✔
572
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
1✔
573
        loop_type = "for";
1✔
574
    } else if (dynamic_cast<structured_control_flow::While*>(&loop_)) {
1✔
575
        loop_type = "while";
×
576
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
×
577
        loop_type = "map";
×
578
    } else {
×
579
        loop_type = "unknown";
×
580
    }
×
581

582
    j["subgraph"] = {{"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}}};
1✔
583

584
    j["parameters"] = {{"offset", serializer::JSONSerializer::expression(offset_)}, {"container", this->container_}};
1✔
585

586
    // Legacy fields for backward compatibility
587
    j["loop_element_id"] = this->loop_.element_id();
1✔
588
    j["offset"] = serializer::JSONSerializer::expression(offset_);
1✔
589
    j["container"] = this->container_;
1✔
590
};
1✔
591

592
KernelLocalStorage KernelLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
593
    size_t loop_id;
1✔
594
    if (desc.contains("subgraph")) {
1✔
595
        const auto& node_desc = desc.at("subgraph").at("0");
1✔
596
        loop_id = node_desc.at("element_id").get<size_t>();
1✔
597
    } else {
1✔
598
        loop_id = desc.at("loop_element_id").get<size_t>();
×
599
    }
×
600

601
    auto element = builder.find_element_by_id(loop_id);
1✔
602
    if (!element) {
1✔
603
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
604
    }
×
605
    auto outer_loop = dynamic_cast<structured_control_flow::For*>(element);
1✔
606

607
    nlohmann::json offset_json;
1✔
608
    std::string container;
1✔
609
    if (desc.contains("parameters")) {
1✔
610
        const auto& params = desc.at("parameters");
1✔
611
        if (params.contains("offset")) {
1✔
612
            offset_json = params.at("offset");
1✔
613
        }
1✔
614
        if (params.contains("container")) {
1✔
615
            container = params.at("container").get<std::string>();
1✔
616
        }
1✔
617
    }
1✔
618
    if (offset_json.is_null() && desc.contains("offset")) {
1✔
619
        offset_json = desc.at("offset");
×
620
    }
×
621
    if (container.empty() && desc.contains("container")) {
1✔
622
        container = desc.at("container").get<std::string>();
×
623
    }
×
624

625
    auto offset = symbolic::parse(offset_json);
1✔
626

627
    return KernelLocalStorage(*outer_loop, offset, container);
1✔
628
};
1✔
629

630
} // namespace transformations
631
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc