• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 22949872003

11 Mar 2026 11:15AM UTC coverage: 63.681% (-0.9%) from 64.6%
22949872003

push

github

web-flow
Merge pull request #569 from daisytuner/HIPtarget

ROCmTarget

191 of 803 new or added lines in 15 files covered. (23.79%)

3 existing lines in 2 files now uncovered.

24700 of 38787 relevant lines covered (63.68%)

370.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.33
/opt/src/transformations/offloading/kernel_local_storage.cpp
1
#include "sdfg/transformations/offloading/kernel_local_storage.h"
2

3
#include <string>
4
#include <tuple>
5
#include <vector>
6

7
#include "sdfg/analysis/scope_analysis.h"
8
#include "sdfg/analysis/type_analysis.h"
9
#include "sdfg/builder/structured_sdfg_builder.h"
10
#include "sdfg/data_flow/access_node.h"
11
#include "sdfg/data_flow/library_node.h"
12
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
13
#include "sdfg/data_flow/tasklet.h"
14
#include "sdfg/exceptions.h"
15
#include "sdfg/passes/dataflow/trivial_array_elimination.h"
16
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
17
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
18
#include "sdfg/serializer/json_serializer.h"
19
#include "sdfg/structured_control_flow/control_flow_node.h"
20
#include "sdfg/structured_control_flow/if_else.h"
21
#include "sdfg/structured_control_flow/map.h"
22
#include "sdfg/structured_control_flow/sequence.h"
23
#include "sdfg/structured_control_flow/structured_loop.h"
24
#include "sdfg/symbolic/polynomials.h"
25
#include "sdfg/symbolic/symbolic.h"
26
#include "sdfg/targets/gpu/gpu_schedule_type.h"
27
#include "sdfg/transformations/utils.h"
28
#include "sdfg/types/array.h"
29
#include "sdfg/types/structure.h"
30
#include "sdfg/types/type.h"
31
#include "sdfg/types/utils.h"
32
#include "symengine/symengine_rcp.h"
33

34
namespace sdfg {
35
namespace transformations {
36

37
KernelLocalStorage::KernelLocalStorage(
38
    structured_control_flow::StructuredLoop& loop, symbolic::Expression offset, const std::string& container
39
)
40
    : loop_(loop), offset_(offset), container_(container) {};
22✔
41

42
std::string KernelLocalStorage::name() const { return "KernelLocalStorage"; };
3✔
43

44
bool KernelLocalStorage::reads_container(std::string container, analysis::UsersView& body_users) {
×
45
    if (body_users.reads(container).size() == 1) {
×
46
        return true;
×
47
    }
×
48
    return false;
×
49
}
×
50

51
bool KernelLocalStorage::uses_inner_indvar(analysis::UsersView& body_users) {
×
52
    bool result = false;
×
53
    for (auto& user : body_users.reads(this->container_)) {
×
54
        auto& subsets = user->subsets();
×
55
        if (subsets.size() == 0) {
×
56
            continue;
×
57
        }
×
58
        if (subsets.size() == 1) { // TODO: Handle multiple subsets
×
59
            for (auto access : subsets.at(0)) {
×
60
                result |= symbolic::uses(access, loop_.indvar());
×
61
            }
×
62
        }
×
63
    }
×
64
    return result;
×
65
};
×
66

67
std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer> KernelLocalStorage::
68
    dim_size(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
32✔
69
    symbolic::Integer x_dim_size = symbolic::one();
32✔
70
    symbolic::Integer y_dim_size = symbolic::one();
32✔
71
    symbolic::Integer z_dim_size = symbolic::one();
32✔
72

73
    for (auto node : ancestors) {
214✔
74
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
214✔
75
            auto schedule_type = ancestor_map->schedule_type();
64✔
76
            if (!gpu::is_gpu_schedule(schedule_type)) {
64✔
77
                continue;
×
78
            }
×
79
            auto dim = gpu::gpu_dimension(schedule_type);
64✔
80
            if (dim == gpu::GPUDimension::X) {
64✔
81
                x_dim_size = gpu::gpu_block_size(schedule_type);
32✔
82
            } else if (dim == gpu::GPUDimension::Y) {
32✔
83
                y_dim_size = gpu::gpu_block_size(schedule_type);
32✔
84
            } else if (dim == gpu::GPUDimension::Z) {
32✔
NEW
85
                z_dim_size = gpu::gpu_block_size(schedule_type);
×
86
            } else {
×
NEW
87
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
88
            }
×
89
        }
64✔
90
    }
214✔
91

92
    return {x_dim_size, y_dim_size, z_dim_size};
32✔
93
};
32✔
94

95
std::tuple<symbolic::Symbol, symbolic::Symbol, symbolic::Symbol> KernelLocalStorage::
96
    dim_indvars(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
24✔
97
    symbolic::Symbol x_dim_indvar = SymEngine::null;
24✔
98
    symbolic::Symbol y_dim_indvar = SymEngine::null;
24✔
99
    symbolic::Symbol z_dim_indvar = SymEngine::null;
24✔
100

101
    for (auto node : ancestors) {
158✔
102
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
158✔
103
            auto schedule_type = ancestor_map->schedule_type();
48✔
104
            if (!gpu::is_gpu_schedule(schedule_type)) {
48✔
105
                continue;
×
106
            }
×
107
            auto dim = gpu::gpu_dimension(schedule_type);
48✔
108
            if (dim == gpu::GPUDimension::X) {
48✔
109
                x_dim_indvar = ancestor_map->indvar();
24✔
110
            } else if (dim == gpu::GPUDimension::Y) {
24✔
111
                y_dim_indvar = ancestor_map->indvar();
24✔
112
            } else if (dim == gpu::GPUDimension::Z) {
24✔
113
                z_dim_indvar = ancestor_map->indvar();
×
114
            } else {
×
NEW
115
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
116
            }
×
117
        }
48✔
118
    }
158✔
119

120
    return {x_dim_indvar, y_dim_indvar, z_dim_indvar};
24✔
121
}
24✔
122

123
std::tuple<bool, bool, bool> KernelLocalStorage::
124
    available_dims(std::vector<symbolic::Expression> subsets, analysis::AnalysisManager& analysis_manager) {
14✔
125
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
14✔
126
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
14✔
127

128
    symbolic::Integer iteration_count = get_iteration_count(loop_);
14✔
129

130
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
14✔
131
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
14✔
132

133
    bool x_dim_available = (x_dim_indvar != SymEngine::null);
14✔
134
    bool y_dim_available = (y_dim_indvar != SymEngine::null);
14✔
135
    bool z_dim_available = (z_dim_indvar != SymEngine::null);
14✔
136

137
    if (x_dim_available) {
14✔
138
        bool x_used = false;
14✔
139
        for (auto subset : subsets) {
28✔
140
            for (auto atom : symbolic::atoms(subset)) {
28✔
141
                if (symbolic::eq(atom, x_dim_indvar)) {
28✔
142
                    x_used = true;
10✔
143
                }
10✔
144
            }
28✔
145
        }
28✔
146
        if (x_used) {
14✔
147
            x_dim_available = false;
10✔
148
        }
10✔
149
    }
14✔
150
    if (y_dim_available) {
14✔
151
        bool y_used = false;
14✔
152
        for (auto subset : subsets) {
28✔
153
            for (auto atom : symbolic::atoms(subset)) {
28✔
154
                if (symbolic::eq(atom, y_dim_indvar)) {
28✔
155
                    y_used = true;
4✔
156
                }
4✔
157
            }
28✔
158
        }
28✔
159
        if (y_used) {
14✔
160
            y_dim_available = false;
4✔
161
        }
4✔
162
    }
14✔
163
    if (z_dim_available) {
14✔
164
        bool z_used = false;
×
165
        for (auto subset : subsets) {
×
166
            for (auto atom : symbolic::atoms(subset)) {
×
167
                if (symbolic::eq(atom, z_dim_indvar)) {
×
168
                    z_used = true;
×
169
                }
×
170
            }
×
171
        }
×
172
        if (z_used) {
×
173
            z_dim_available = false;
×
174
        }
×
175
    }
×
176

177
    if (x_dim_available) {
14✔
178
        auto cond = symbolic::Ge(x_dim_size, iteration_count);
4✔
179
        if (symbolic::is_true(cond)) {
4✔
180
            x_dim_available = true;
4✔
181
        }
4✔
182
    }
4✔
183
    if (y_dim_available) {
14✔
184
        auto cond = symbolic::Ge(y_dim_size, iteration_count);
10✔
185
        if (symbolic::is_true(cond)) {
10✔
186
            y_dim_available = true;
10✔
187
        }
10✔
188
    }
10✔
189
    if (z_dim_available) {
14✔
190
        auto cond = symbolic::Ge(z_dim_size, iteration_count);
×
191
        if (symbolic::is_true(cond)) {
×
192
            z_dim_available = true;
×
193
        }
×
194
    }
×
195

196
    return {x_dim_available, y_dim_available, z_dim_available};
14✔
197
}
14✔
198

199
bool KernelLocalStorage::
200
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
20✔
201
    auto& sdfg = builder.subject();
20✔
202

203
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
20✔
204
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
20✔
205

206
    // Criterion: Must not be a GPU map itself
207
    if (auto loop_map = dynamic_cast<structured_control_flow::Map*>(&loop_)) {
20✔
208
        if (gpu::is_gpu_schedule(loop_map->schedule_type())) {
6✔
209
            return false;
6✔
210
        }
6✔
211
    }
6✔
212

213
    // Criterion: Must be nested in a GPU schedule
214
    bool is_gpu_scope = false;
14✔
215
    for (auto ancestor : ancestors) {
96✔
216
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
96✔
217
            if (gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
28✔
218
                is_gpu_scope = true;
28✔
219
            } else if (ancestor_map->schedule_type().value() == ScheduleType_Sequential::value()) {
28✔
220
                continue;
×
221
            } else {
×
222
                return false;
×
223
            }
×
224
        }
28✔
225
    }
96✔
226
    if (!is_gpu_scope) {
14✔
227
        return false;
×
228
    }
×
229

230
    auto& inner_body = this->loop_.root();
14✔
231

232
    // Criterion: Container is contiguous (Maybe can be relaxed later)
233
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
14✔
234
    auto type = type_analysis.get_outer_type(container_);
14✔
235
    auto& peeled_type = types::peel_to_innermost_element(*type);
14✔
236
    if (peeled_type.type_id() == types::TypeID::Pointer) {
14✔
237
        return false;
×
238
    }
×
239

240

241
    // Criterion: Iteration count is known and an Integer
242
    symbolic::Integer iteration_count = get_iteration_count(loop_);
14✔
243
    if (iteration_count == SymEngine::null) {
14✔
244
        return false;
×
245
    }
×
246

247
    // Criterion: All block dimensions are known and an Integer
248
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
14✔
249
    if (x_dim_size == SymEngine::null || y_dim_size == SymEngine::null || z_dim_size == SymEngine::null) {
14✔
250
        return false;
×
251
    }
×
252

253
    // Criteria related to memory accesses
254
    auto& users = analysis_manager.get<analysis::Users>();
14✔
255
    analysis::UsersView inner_body_users(users, inner_body);
14✔
256

257
    // Criterion: Container is read-only
258
    if (!inner_body_users.writes(this->container_).empty() || !inner_body_users.views(this->container_).empty() ||
14✔
259
        !inner_body_users.moves(this->container_).empty()) {
14✔
260
        return false;
2✔
261
    }
2✔
262
    if (inner_body_users.reads(this->container_).empty()) {
12✔
263
        return false;
×
264
    }
×
265

266
    // Collect moving symbols
267

268
    // Criterion: Memory accesses do not depend on moving symbols
269
    for (auto& user : inner_body_users.uses(this->container_)) {
20✔
270
        auto& subsets = user->subsets();
20✔
271
        for (auto& subset : subsets) {
20✔
272
            for (auto& expr : subset) {
20✔
273
                for (auto& atom : symbolic::atoms(expr)) {
12✔
274
                    if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
12✔
275
                        auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
12✔
276
                        if (!inner_body_users.moves(symbol->get_name()).empty()) {
12✔
277
                            return false;
×
278
                        }
×
279
                    }
12✔
280
                }
12✔
281
            }
12✔
282
        }
20✔
283
    }
20✔
284

285
    // Criterion: Check if all memory accesses are affine w.r.t the inner loop index
286

287
    // Limitations: single memory access
288
    if (inner_body_users.reads(this->container_).size() != 1) {
12✔
289
        return false;
6✔
290
    }
6✔
291
    auto read = inner_body_users.reads(this->container_).at(0);
6✔
292
    if (read->subsets().size() != 1) {
6✔
293
        return false;
×
294
    }
×
295
    auto subsets = read->subsets().at(0);
6✔
296

297
    // Criterion: more than one dimension is available.
298
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
6✔
299
    symbolic::SymbolVec indvars;
6✔
300
    if (x_dim_indvar != SymEngine::null) {
6✔
301
        indvars.push_back(x_dim_indvar);
6✔
302
    }
6✔
303
    if (y_dim_indvar != SymEngine::null) {
6✔
304
        indvars.push_back(y_dim_indvar);
6✔
305
    }
6✔
306
    if (z_dim_indvar != SymEngine::null) {
6✔
307
        indvars.push_back(z_dim_indvar);
×
308
    }
×
309

310
    if (indvars.size() <= 1) {
6✔
311
        return false;
×
312
    }
×
313

314
    indvars.push_back(loop_.indvar());
6✔
315

316
    // Criterion: Memory access is polynomial of
317
    // c_0 * a + c_1 * b + c_2 * c + c_3 * k, where a, b, c are x-threads, y-threads, z-threads
318
    // and k is the inner loop index
319

320
    for (auto subset : subsets) {
12✔
321
        if (symbolic::polynomial(subset, indvars) == SymEngine::null) {
12✔
322
            return false;
×
323
        }
×
324
    }
12✔
325

326
    // Criterion: inner indvar is used in memory access
327
    bool uses_inner_indvar = false;
6✔
328
    for (auto subset : subsets) {
12✔
329
        for (auto atom : symbolic::atoms(subset)) {
12✔
330
            if (symbolic::eq(atom, loop_.indvar())) {
12✔
331
                uses_inner_indvar = true;
6✔
332
            }
6✔
333
        }
12✔
334
    }
12✔
335
    if (!uses_inner_indvar) {
6✔
336
        return false;
×
337
    }
×
338

339
    // Criterion: Has a free dimension to map to and that dimension is big enough
340
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
6✔
341

342
    if (!x_dim_available && !y_dim_available && !z_dim_available) {
6✔
343
        return false;
×
344
    }
×
345

346
    return true;
6✔
347
};
6✔
348

349
void KernelLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
4✔
350
    auto& sdfg = builder.subject();
4✔
351

352
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
4✔
353
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
4✔
354

355
    auto& users = analysis_manager.get<analysis::Users>();
4✔
356

357
    auto& inner_body = this->loop_.root();
4✔
358
    analysis::UsersView inner_body_users(users, inner_body);
4✔
359

360
    // Detect GPU backend from ancestor map schedule types
361
    bool is_rocm = false;
4✔
362
    for (auto node : ancestors) {
26✔
363
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
26✔
364
            if (ancestor_map->schedule_type().value() == "ROCM") {
8✔
NEW
365
                is_rocm = true;
×
NEW
366
                break;
×
NEW
367
            }
×
368
        }
8✔
369
    }
26✔
370

371
    std::string thread_prefix = is_rocm ? "__daisy_hip_thread_idx_" : "__daisy_cuda_thread_idx_";
4✔
372
    std::string x_name = thread_prefix + "x";
4✔
373
    std::string y_name = thread_prefix + "y";
4✔
374
    std::string z_name = thread_prefix + "z";
4✔
375
    symbolic::Symbol x_symbol = symbolic::symbol(x_name);
4✔
376
    symbolic::Symbol y_symbol = symbolic::symbol(y_name);
4✔
377
    symbolic::Symbol z_symbol = symbolic::symbol(z_name);
4✔
378

379
    auto index_type = types::Scalar(types::PrimitiveType::Int32);
4✔
380
    index_type.storage_type(types::StorageType::NV_Symbol());
4✔
381

382
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
4✔
383
    if (containers.find(x_name) == containers.end()) {
4✔
384
        builder.add_container(x_name, index_type);
3✔
385
    }
3✔
386
    if (containers.find(y_name) == containers.end()) {
4✔
387
        builder.add_container(y_name, index_type);
3✔
388
    }
3✔
389
    if (containers.find(z_name) == containers.end()) {
4✔
390
        builder.add_container(z_name, index_type);
3✔
391
    }
3✔
392

393
    /**
394
        1. Add new shared memory container
395
        2. Add barrier before loop
396
        3. add copyin branch before loop
397
        4. Add barrier before loop
398
        5. replace container in loop
399
        6. replace subset expressions in loop
400
    */
401

402
    symbolic::Integer iteration_count = get_iteration_count(loop_);
4✔
403

404
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
4✔
405
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
4✔
406

407
    auto parent = scope_analysis.parent_scope(&loop_);
4✔
408
    auto parent_seq = static_cast<structured_control_flow::Sequence*>(parent);
4✔
409
    auto& seq = builder.add_sequence_before(*parent_seq, loop_, {}, loop_.debug_info());
4✔
410

411
    // 1. Add new shared memory container
412
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
4✔
413
    auto type = type_analysis.get_outer_type(container_);
4✔
414
    auto& peeled_type = types::peel_to_innermost_element(*type);
4✔
415
    auto read = inner_body_users.reads(this->container_).at(0);
4✔
416
    auto subsets = read->subsets().at(0);
4✔
417

418
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
4✔
419

420
    // get free dim
421
    symbolic::Symbol target_dim;
4✔
422
    auto [dim_x, dim_y, dim_z] = available_dims(subsets, analysis_manager);
4✔
423

424
    if (dim_x) {
4✔
425
        target_dim = x_symbol;
1✔
426
    } else if (dim_y) {
3✔
427
        target_dim = y_symbol;
3✔
428
    } else if (dim_z) {
3✔
429
        target_dim = z_symbol;
×
430
    } else {
×
431
        throw InvalidSDFGException("No available GPU tiling dimension found!");
×
432
    }
×
433

434
    // std::unique_ptr<types::IType> element_type;
435

436
    // if (peeled_type.type_id() == types::TypeID::Structure) {
437
    //     auto struct_type = static_cast<const types::Structure&>(peeled_type);
438
    //     types::Structure new_struct_type(
439
    //         types::StorageType::NV_Shared(), 8, {}, struct_type.name()
440
    //     );
441
    //     element_type = new_struct_type.clone();
442
    // } else if (peeled_type.type_id() == types::TypeID::Scalar) {
443
    //     auto scalar_type = static_cast<const types::Scalar&>(peeled_type);
444
    //     types::Scalar new_scalar_type(
445
    //         types::StorageType::NV_Shared(), 8, {}, scalar_type.primitive_type()
446
    //     );
447
    //     element_type = new_scalar_type.clone();
448
    // } else {
449
    //     throw InvalidSDFGException(
450
    //         "Unsupported peeled type for KernelLocalStorage."
451
    //     );
452
    // }
453

454
    auto generic_storage = is_rocm ? types::StorageType("AMD_Generic") : types::StorageType::NV_Generic();
4✔
455

456
    types::Array tile_array_type(types::StorageType::NV_Shared(), 8, {}, peeled_type, iteration_count);
4✔
457
    types::Array z_array_type(generic_storage, 8, {}, tile_array_type, z_dim_size);
4✔
458
    types::Array* pred_y;
4✔
459
    if (symbolic::eq(target_dim, z_symbol)) {
4✔
460
        pred_y = &tile_array_type;
×
461
    } else {
4✔
462
        pred_y = &z_array_type;
4✔
463
    }
4✔
464
    types::Array y_array_type(generic_storage, 8, {}, *pred_y, y_dim_size);
4✔
465
    types::Array* pred_x;
4✔
466
    if (symbolic::eq(target_dim, y_symbol)) {
4✔
467
        pred_x = &z_array_type;
3✔
468
    } else {
3✔
469
        pred_x = &y_array_type;
1✔
470
    }
1✔
471
    types::Array x_array_type(generic_storage, 8, {}, *pred_x, x_dim_size);
4✔
472
    types::Array* final_type;
4✔
473
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
474
        final_type = &y_array_type;
1✔
475
    } else {
3✔
476
        final_type = &x_array_type;
3✔
477
    }
3✔
478

479
    std::string shared_container_name = "__daisy_shared_" + container_;
4✔
480
    builder.add_container(shared_container_name, *final_type);
4✔
481

482
    // 2. Add barrier before loop
483
    auto& sync_block1 = builder.add_block(seq);
4✔
484

485
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block1, {});
4✔
486

487
    // 3. add copyin branch before loop
488
    auto& if_else = builder.add_if_else(seq);
4✔
489

490
    auto condition = symbolic::subs(loop_.condition(), loop_.indvar(), symbolic::add(target_dim, offset_));
4✔
491
    auto& branch = builder.add_case(if_else, condition);
4✔
492

493
    auto& copyin_block = builder.add_block(branch);
4✔
494

495
    auto& access_in = builder.add_access(copyin_block, container_);
4✔
496
    auto& access_out = builder.add_access(copyin_block, shared_container_name);
4✔
497

498
    auto& tasklet = builder.add_tasklet(copyin_block, data_flow::TaskletCode::assign, "out_", {"in_"});
4✔
499

500
    std::vector<symbolic::Expression> copyin_subsets;
4✔
501
    for (auto subset : subsets) {
8✔
502
        auto substituted = symbolic::subs(subset, loop_.indvar(), symbolic::add(target_dim, offset_));
8✔
503
        copyin_subsets.push_back(substituted);
8✔
504
    }
8✔
505

506
    builder.add_computational_memlet(copyin_block, access_in, tasklet, "in_", copyin_subsets, *type);
4✔
507

508
    std::vector<symbolic::Expression> shared_access_subsets = {x_symbol, y_symbol, z_symbol, target_dim};
4✔
509

510
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
511
        shared_access_subsets.erase(shared_access_subsets.begin());
1✔
512
    } else if (symbolic::eq(target_dim, y_symbol)) {
3✔
513
        shared_access_subsets.erase(shared_access_subsets.begin() + 1);
3✔
514
    } else if (symbolic::eq(target_dim, z_symbol)) {
3✔
515
        shared_access_subsets.erase(shared_access_subsets.begin() + 2);
×
516
    }
×
517

518
    builder.add_computational_memlet(copyin_block, tasklet, "out_", access_out, shared_access_subsets);
4✔
519

520
    // 4. Add barrier before loop
521

522
    auto& sync_block2 = builder.add_block(seq);
4✔
523

524
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block2, {});
4✔
525

526
    // 5. replace container in loop
527
    loop_.replace(symbolic::symbol(container_), symbolic::symbol(shared_container_name));
4✔
528

529
    // 6. replace subset expressions in loop
530
    std::vector<symbolic::Expression> read_shared_access_subsets;
4✔
531
    symbolic::Expression substituted_dimension;
4✔
532
    for (auto& subset : shared_access_subsets) {
12✔
533
        auto substituted = symbolic::subs(subset, target_dim, symbolic::sub(loop_.indvar(), offset_));
12✔
534
        read_shared_access_subsets.push_back(substituted);
12✔
535
    }
12✔
536

537
    auto access_node = static_cast<data_flow::AccessNode*>(read->element());
4✔
538
    for (auto& oedge : access_node->get_parent().out_edges(*access_node)) {
4✔
539
        oedge.set_subset(read_shared_access_subsets);
4✔
540
        oedge.set_base_type(*final_type);
4✔
541
    }
4✔
542

543
    // End of transformation
544

545
    analysis_manager.invalidate_all();
4✔
546

547
    passes::SequenceFusion sf_pass;
4✔
548
    passes::DeadCFGElimination dce_pass;
4✔
549
    passes::TrivialArrayElimination tae_pass;
4✔
550
    bool applies = false;
4✔
551
    do {
8✔
552
        applies = false;
8✔
553
        applies |= dce_pass.run(builder, analysis_manager);
8✔
554
        applies |= sf_pass.run(builder, analysis_manager);
8✔
555
        applies |= tae_pass.run(builder, analysis_manager);
8✔
556
    } while (applies);
8✔
557
};
4✔
558

559
void KernelLocalStorage::to_json(nlohmann::json& j) const {
1✔
560
    j["transformation_type"] = this->name();
1✔
561

562
    std::string loop_type;
1✔
563
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
1✔
564
        loop_type = "for";
1✔
565
    } else if (dynamic_cast<structured_control_flow::While*>(&loop_)) {
1✔
566
        loop_type = "while";
×
567
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
×
568
        loop_type = "map";
×
569
    } else {
×
570
        loop_type = "unknown";
×
571
    }
×
572

573
    j["subgraph"] = {{"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}}};
1✔
574

575
    j["parameters"] = {{"offset", serializer::JSONSerializer::expression(offset_)}, {"container", this->container_}};
1✔
576

577
    // Legacy fields for backward compatibility
578
    j["loop_element_id"] = this->loop_.element_id();
1✔
579
    j["offset"] = serializer::JSONSerializer::expression(offset_);
1✔
580
    j["container"] = this->container_;
1✔
581
};
1✔
582

583
KernelLocalStorage KernelLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
584
    size_t loop_id;
1✔
585
    if (desc.contains("subgraph")) {
1✔
586
        const auto& node_desc = desc.at("subgraph").at("0");
1✔
587
        loop_id = node_desc.at("element_id").get<size_t>();
1✔
588
    } else {
1✔
589
        loop_id = desc.at("loop_element_id").get<size_t>();
×
590
    }
×
591

592
    auto element = builder.find_element_by_id(loop_id);
1✔
593
    if (!element) {
1✔
594
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
595
    }
×
596
    auto outer_loop = dynamic_cast<structured_control_flow::For*>(element);
1✔
597

598
    nlohmann::json offset_json;
1✔
599
    std::string container;
1✔
600
    if (desc.contains("parameters")) {
1✔
601
        const auto& params = desc.at("parameters");
1✔
602
        if (params.contains("offset")) {
1✔
603
            offset_json = params.at("offset");
1✔
604
        }
1✔
605
        if (params.contains("container")) {
1✔
606
            container = params.at("container").get<std::string>();
1✔
607
        }
1✔
608
    }
1✔
609
    if (offset_json.is_null() && desc.contains("offset")) {
1✔
610
        offset_json = desc.at("offset");
×
611
    }
×
612
    if (container.empty() && desc.contains("container")) {
1✔
613
        container = desc.at("container").get<std::string>();
×
614
    }
×
615

616
    auto offset = symbolic::parse(offset_json);
1✔
617

618
    return KernelLocalStorage(*outer_loop, offset, container);
1✔
619
};
1✔
620

621
} // namespace transformations
622
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc