• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 23249128422

18 Mar 2026 02:12PM UTC coverage: 63.938% (+0.3%) from 63.617%
23249128422

Pull #584

github

web-flow
Merge 0fcde60dc into 64d54d7de
Pull Request #584: adds diamond tiling test

18 of 20 new or added lines in 1 file covered. (90.0%)

1180 existing lines in 28 files now uncovered.

26122 of 40855 relevant lines covered (63.94%)

407.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.82
/opt/src/transformations/offloading/kernel_local_storage.cpp
1
#include "sdfg/transformations/offloading/kernel_local_storage.h"
2

3
#include <set>
4
#include <string>
5
#include <tuple>
6
#include <vector>
7

8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/analysis/type_analysis.h"
10
#include "sdfg/builder/structured_sdfg_builder.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/library_node.h"
13
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
14
#include "sdfg/data_flow/tasklet.h"
15
#include "sdfg/exceptions.h"
16
#include "sdfg/passes/dataflow/trivial_array_elimination.h"
17
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
18
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
19
#include "sdfg/serializer/json_serializer.h"
20
#include "sdfg/structured_control_flow/control_flow_node.h"
21
#include "sdfg/structured_control_flow/for.h"
22
#include "sdfg/structured_control_flow/if_else.h"
23
#include "sdfg/structured_control_flow/map.h"
24
#include "sdfg/structured_control_flow/sequence.h"
25
#include "sdfg/structured_control_flow/structured_loop.h"
26
#include "sdfg/symbolic/polynomials.h"
27
#include "sdfg/symbolic/symbolic.h"
28
#include "sdfg/targets/gpu/gpu_schedule_type.h"
29
#include "sdfg/transformations/utils.h"
30
#include "sdfg/types/array.h"
31
#include "sdfg/types/structure.h"
32
#include "sdfg/types/type.h"
33
#include "sdfg/types/utils.h"
34
#include "symengine/symengine_rcp.h"
35

36
namespace sdfg {
37
namespace transformations {
38

39
KernelLocalStorage::KernelLocalStorage(
40
    structured_control_flow::StructuredLoop& loop, symbolic::Expression offset, const std::string& container
41
)
42
    : loop_(loop), offset_(offset), container_(container) {};
34✔
43

44
std::string KernelLocalStorage::name() const { return "KernelLocalStorage"; };
3✔
45

46
bool KernelLocalStorage::reads_container(std::string container, analysis::UsersView& body_users) {
×
47
    if (body_users.reads(container).size() == 1) {
×
48
        return true;
×
49
    }
×
50
    return false;
×
51
}
×
52

53
bool KernelLocalStorage::uses_inner_indvar(analysis::UsersView& body_users) {
×
54
    bool result = false;
×
55
    for (auto& user : body_users.reads(this->container_)) {
×
56
        auto& subsets = user->subsets();
×
57
        if (subsets.size() == 0) {
×
58
            continue;
×
59
        }
×
60
        if (subsets.size() == 1) { // TODO: Handle multiple subsets
×
61
            for (auto access : subsets.at(0)) {
×
62
                result |= symbolic::uses(access, loop_.indvar());
×
63
            }
×
64
        }
×
65
    }
×
66
    return result;
×
67
};
×
68

69
std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer> KernelLocalStorage::
70
    dim_size(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
41✔
71
    symbolic::Integer x_dim_size = symbolic::one();
41✔
72
    symbolic::Integer y_dim_size = symbolic::one();
41✔
73
    symbolic::Integer z_dim_size = symbolic::one();
41✔
74

75
    for (auto node : ancestors) {
257✔
76
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
257✔
77
            auto schedule_type = ancestor_map->schedule_type();
81✔
78
            if (!gpu::is_gpu_schedule(schedule_type)) {
81✔
79
                continue;
×
80
            }
×
81
            auto dim = gpu::gpu_dimension(schedule_type);
81✔
82
            if (dim == gpu::GPUDimension::X) {
81✔
83
                x_dim_size = gpu::gpu_block_size(schedule_type);
41✔
84
            } else if (dim == gpu::GPUDimension::Y) {
41✔
85
                y_dim_size = gpu::gpu_block_size(schedule_type);
40✔
86
            } else if (dim == gpu::GPUDimension::Z) {
40✔
87
                z_dim_size = gpu::gpu_block_size(schedule_type);
×
88
            } else {
×
89
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
90
            }
×
91
        }
81✔
92
    }
257✔
93

94
    return {x_dim_size, y_dim_size, z_dim_size};
41✔
95
};
41✔
96

97
std::tuple<symbolic::Symbol, symbolic::Symbol, symbolic::Symbol> KernelLocalStorage::
98
    dim_indvars(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
30✔
99
    symbolic::Symbol x_dim_indvar = SymEngine::null;
30✔
100
    symbolic::Symbol y_dim_indvar = SymEngine::null;
30✔
101
    symbolic::Symbol z_dim_indvar = SymEngine::null;
30✔
102

103
    for (auto node : ancestors) {
186✔
104
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
186✔
105
            auto schedule_type = ancestor_map->schedule_type();
59✔
106
            if (!gpu::is_gpu_schedule(schedule_type)) {
59✔
107
                continue;
×
108
            }
×
109
            auto dim = gpu::gpu_dimension(schedule_type);
59✔
110
            if (dim == gpu::GPUDimension::X) {
59✔
111
                x_dim_indvar = ancestor_map->indvar();
30✔
112
            } else if (dim == gpu::GPUDimension::Y) {
30✔
113
                y_dim_indvar = ancestor_map->indvar();
29✔
114
            } else if (dim == gpu::GPUDimension::Z) {
29✔
115
                z_dim_indvar = ancestor_map->indvar();
×
116
            } else {
×
117
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
118
            }
×
119
        }
59✔
120
    }
186✔
121

122
    return {x_dim_indvar, y_dim_indvar, z_dim_indvar};
30✔
123
}
30✔
124

125
std::tuple<bool, bool, bool> KernelLocalStorage::
126
    available_dims(std::vector<symbolic::Expression> subsets, analysis::AnalysisManager& analysis_manager) {
15✔
127
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
15✔
128
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
15✔
129

130
    symbolic::Integer iteration_count = get_iteration_count(loop_);
15✔
131

132
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
15✔
133
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
15✔
134

135
    bool x_dim_available = (x_dim_indvar != SymEngine::null);
15✔
136
    bool y_dim_available = (y_dim_indvar != SymEngine::null);
15✔
137
    bool z_dim_available = (z_dim_indvar != SymEngine::null);
15✔
138

139
    if (x_dim_available) {
15✔
140
        bool x_used = false;
15✔
141
        for (auto subset : subsets) {
30✔
142
            for (auto atom : symbolic::atoms(subset)) {
31✔
143
                if (symbolic::eq(atom, x_dim_indvar)) {
31✔
144
                    x_used = true;
11✔
145
                }
11✔
146
            }
31✔
147
        }
30✔
148
        if (x_used) {
15✔
149
            x_dim_available = false;
11✔
150
        }
11✔
151
    }
15✔
152
    if (y_dim_available) {
15✔
153
        bool y_used = false;
15✔
154
        for (auto subset : subsets) {
30✔
155
            for (auto atom : symbolic::atoms(subset)) {
31✔
156
                if (symbolic::eq(atom, y_dim_indvar)) {
31✔
157
                    y_used = true;
5✔
158
                }
5✔
159
            }
31✔
160
        }
30✔
161
        if (y_used) {
15✔
162
            y_dim_available = false;
5✔
163
        }
5✔
164
    }
15✔
165
    if (z_dim_available) {
15✔
166
        bool z_used = false;
×
167
        for (auto subset : subsets) {
×
168
            for (auto atom : symbolic::atoms(subset)) {
×
169
                if (symbolic::eq(atom, z_dim_indvar)) {
×
170
                    z_used = true;
×
171
                }
×
172
            }
×
173
        }
×
174
        if (z_used) {
×
175
            z_dim_available = false;
×
176
        }
×
177
    }
×
178

179
    if (x_dim_available) {
15✔
180
        auto cond = symbolic::Ge(x_dim_size, iteration_count);
4✔
181
        if (symbolic::is_true(cond)) {
4✔
182
            x_dim_available = true;
4✔
183
        }
4✔
184
    }
4✔
185
    if (y_dim_available) {
15✔
186
        auto cond = symbolic::Ge(y_dim_size, iteration_count);
10✔
187
        if (symbolic::is_true(cond)) {
10✔
188
            y_dim_available = true;
10✔
189
        }
10✔
190
    }
10✔
191
    if (z_dim_available) {
15✔
192
        auto cond = symbolic::Ge(z_dim_size, iteration_count);
×
193
        if (symbolic::is_true(cond)) {
×
194
            z_dim_available = true;
×
195
        }
×
196
    }
×
197

198
    return {x_dim_available, y_dim_available, z_dim_available};
15✔
199
}
15✔
200

201
bool KernelLocalStorage::
202
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
32✔
203
    auto& sdfg = builder.subject();
32✔
204

205
    // Criterion: transformation cannot be applied twice on the same container
206
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
32✔
207
    std::string shared_container_name = "__daisy_shared_" + container_;
32✔
208
    if (containers.find(shared_container_name) != containers.end()) {
32✔
209
        return false;
×
210
    }
×
211

212
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
32✔
213
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
32✔
214

215
    // Criterion: Must not be a GPU map itself
216
    if (auto loop_map = dynamic_cast<structured_control_flow::Map*>(&loop_)) {
32✔
217
        if (gpu::is_gpu_schedule(loop_map->schedule_type())) {
7✔
218
            return false;
7✔
219
        }
7✔
220
    }
7✔
221

222
    // Criterion: Must be nested in a GPU schedule
223
    bool is_gpu_scope = false;
25✔
224
    for (auto ancestor : ancestors) {
145✔
225
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
145✔
226
            if (gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
47✔
227
                is_gpu_scope = true;
47✔
228
            } else if (ancestor_map->schedule_type().value() == ScheduleType_Sequential::value()) {
47✔
229
                continue;
×
230
            } else {
×
231
                return false;
×
232
            }
×
233
        }
47✔
234
    }
145✔
235
    if (!is_gpu_scope) {
25✔
236
        return false;
1✔
237
    }
1✔
238

239
    auto& inner_body = this->loop_.root();
24✔
240

241
    // Criterion: Container is contiguous (Maybe can be relaxed later)
242
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
24✔
243
    auto type = type_analysis.get_outer_type(container_);
24✔
244
    auto& peeled_type = types::peel_to_innermost_element(*type);
24✔
245
    if (peeled_type.type_id() == types::TypeID::Pointer) {
24✔
246
        return false;
1✔
247
    }
1✔
248

249

250
    // Criterion: Iteration count is known and an Integer
251
    symbolic::Integer iteration_count = get_iteration_count(loop_);
23✔
252
    if (iteration_count == SymEngine::null) {
23✔
253
        return false;
1✔
254
    }
1✔
255

256
    // Criterion: All block dimensions are known and an Integer
257
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
22✔
258
    if (x_dim_size == SymEngine::null || y_dim_size == SymEngine::null || z_dim_size == SymEngine::null) {
22✔
259
        return false;
×
260
    }
×
261

262
    // Criteria related to memory accesses
263
    auto& users = analysis_manager.get<analysis::Users>();
22✔
264
    analysis::UsersView inner_body_users(users, inner_body);
22✔
265

266
    // Criterion: Container is read-only
267
    if (!inner_body_users.writes(this->container_).empty() || !inner_body_users.views(this->container_).empty() ||
22✔
268
        !inner_body_users.moves(this->container_).empty()) {
22✔
269
        return false;
3✔
270
    }
3✔
271
    if (inner_body_users.reads(this->container_).empty()) {
19✔
272
        return false;
1✔
273
    }
1✔
274

275
    // Collect moving symbols
276

277
    // Criterion: Memory accesses do not depend on moving symbols
278
    for (auto& user : inner_body_users.uses(this->container_)) {
27✔
279
        auto& subsets = user->subsets();
27✔
280
        for (auto& subset : subsets) {
27✔
281
            for (auto& expr : subset) {
27✔
282
                for (auto& atom : symbolic::atoms(expr)) {
27✔
283
                    if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
27✔
284
                        auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
27✔
285
                        if (!inner_body_users.moves(symbol->get_name()).empty()) {
27✔
286
                            return false;
×
287
                        }
×
288
                    }
27✔
289
                }
27✔
290
            }
26✔
291
        }
27✔
292
    }
27✔
293

294
    // Criterion: Check if all memory accesses are affine w.r.t the inner loop index
295

296
    // Limitations: single memory access
297
    if (inner_body_users.reads(this->container_).size() != 1) {
18✔
298
        return false;
7✔
299
    }
7✔
300
    auto read = inner_body_users.reads(this->container_).at(0);
11✔
301
    if (read->subsets().size() != 1) {
11✔
302
        return false;
×
303
    }
×
304
    auto subsets = read->subsets().at(0);
11✔
305

306
    // Criterion: more than one dimension is available.
307
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
11✔
308
    symbolic::SymbolVec indvars;
11✔
309
    if (x_dim_indvar != SymEngine::null) {
11✔
310
        indvars.push_back(x_dim_indvar);
11✔
311
    }
11✔
312
    if (y_dim_indvar != SymEngine::null) {
11✔
313
        indvars.push_back(y_dim_indvar);
10✔
314
    }
10✔
315
    if (z_dim_indvar != SymEngine::null) {
11✔
316
        indvars.push_back(z_dim_indvar);
×
317
    }
×
318

319
    if (indvars.size() <= 1) {
11✔
320
        return false;
1✔
321
    }
1✔
322

323
    indvars.push_back(loop_.indvar());
10✔
324

325
    // Criterion: Memory access is polynomial of
326
    // c_0 * a + c_1 * b + c_2 * c + c_3 * k, where a, b, c are x-threads, y-threads, z-threads
327
    // and k is the inner loop index
328

329
    for (auto subset : subsets) {
20✔
330
        if (symbolic::polynomial(subset, indvars) == SymEngine::null) {
20✔
331
            return false;
×
332
        }
×
333
    }
20✔
334

335
    // Criterion: inner indvar is used in memory access
336
    bool uses_inner_indvar = false;
10✔
337
    for (auto subset : subsets) {
20✔
338
        for (auto atom : symbolic::atoms(subset)) {
21✔
339
            if (symbolic::eq(atom, loop_.indvar())) {
21✔
340
                uses_inner_indvar = true;
8✔
341
            }
8✔
342
        }
21✔
343
    }
20✔
344
    if (!uses_inner_indvar) {
10✔
345
        return false;
2✔
346
    }
2✔
347

348
    // Criterion: Containers in subset expressions are not written to in the loop
349
    for (auto subset : subsets) {
15✔
350
        for (auto atom : symbolic::atoms(subset)) {
16✔
351
            if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
16✔
352
                auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
16✔
353
                if (!inner_body_users.writes(symbol->get_name()).empty()) {
16✔
354
                    return false;
1✔
355
                }
1✔
356
            }
16✔
357
        }
16✔
358
    }
15✔
359

360
    // Criterion: Has a free dimension to map to and that dimension is big enough
361
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
7✔
362

363
    if (!x_dim_available && !y_dim_available && !z_dim_available) {
7✔
364
        return false;
1✔
365
    }
1✔
366

367
    return true;
6✔
368
};
7✔
369

370
void KernelLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
4✔
371
    auto& sdfg = builder.subject();
4✔
372

373
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
4✔
374
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
4✔
375

376
    auto& users = analysis_manager.get<analysis::Users>();
4✔
377

378
    auto& inner_body = this->loop_.root();
4✔
379
    analysis::UsersView inner_body_users(users, inner_body);
4✔
380

381
    // Detect GPU backend from ancestor map schedule types
382
    bool is_rocm = false;
4✔
383
    for (auto node : ancestors) {
26✔
384
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
26✔
385
            if (ancestor_map->schedule_type().value() == "ROCM") {
8✔
UNCOV
386
                is_rocm = true;
×
UNCOV
387
                break;
×
UNCOV
388
            }
×
389
        }
8✔
390
    }
26✔
391

392
    std::string thread_prefix = is_rocm ? "__daisy_hip_thread_idx_" : "__daisy_cuda_thread_idx_";
4✔
393
    std::string x_name = thread_prefix + "x";
4✔
394
    std::string y_name = thread_prefix + "y";
4✔
395
    std::string z_name = thread_prefix + "z";
4✔
396
    symbolic::Symbol x_symbol = symbolic::symbol(x_name);
4✔
397
    symbolic::Symbol y_symbol = symbolic::symbol(y_name);
4✔
398
    symbolic::Symbol z_symbol = symbolic::symbol(z_name);
4✔
399

400
    auto index_type = types::Scalar(types::PrimitiveType::Int32);
4✔
401
    index_type.storage_type(types::StorageType::NV_Symbol());
4✔
402

403
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
4✔
404
    if (containers.find(x_name) == containers.end()) {
4✔
405
        builder.add_container(x_name, index_type);
3✔
406
    }
3✔
407
    if (containers.find(y_name) == containers.end()) {
4✔
408
        builder.add_container(y_name, index_type);
3✔
409
    }
3✔
410
    if (containers.find(z_name) == containers.end()) {
4✔
411
        builder.add_container(z_name, index_type);
3✔
412
    }
3✔
413

414
    /**
415
        1. Add new shared memory container
416
        2. Add barrier before loop
417
        3. add copyin branch before loop
418
        4. Add barrier before loop
419
        5. replace container in loop
420
        6. replace subset expressions in loop
421
    */
422

423
    symbolic::Integer iteration_count = get_iteration_count(loop_);
4✔
424

425
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
4✔
426
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
4✔
427

428
    auto parent = scope_analysis.parent_scope(&loop_);
4✔
429
    auto parent_seq = static_cast<structured_control_flow::Sequence*>(parent);
4✔
430
    auto& seq = builder.add_sequence_before(*parent_seq, loop_, {}, loop_.debug_info());
4✔
431

432
    // 1. Add new shared memory container
433
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
4✔
434
    auto type = type_analysis.get_outer_type(container_);
4✔
435
    auto& peeled_type = types::peel_to_innermost_element(*type);
4✔
436
    auto read = inner_body_users.reads(this->container_).at(0);
4✔
437
    auto subsets = read->subsets().at(0);
4✔
438

439
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
4✔
440

441
    // get free dim
442
    symbolic::Symbol target_dim;
4✔
443
    auto [dim_x, dim_y, dim_z] = available_dims(subsets, analysis_manager);
4✔
444

445
    if (dim_x) {
4✔
446
        target_dim = x_symbol;
1✔
447
    } else if (dim_y) {
3✔
448
        target_dim = y_symbol;
3✔
449
    } else if (dim_z) {
3✔
UNCOV
450
        target_dim = z_symbol;
×
UNCOV
451
    } else {
×
UNCOV
452
        throw InvalidSDFGException("No available GPU tiling dimension found!");
×
UNCOV
453
    }
×
454

455
    // std::unique_ptr<types::IType> element_type;
456

457
    // if (peeled_type.type_id() == types::TypeID::Structure) {
458
    //     auto struct_type = static_cast<const types::Structure&>(peeled_type);
459
    //     types::Structure new_struct_type(
460
    //         types::StorageType::NV_Shared(), 8, {}, struct_type.name()
461
    //     );
462
    //     element_type = new_struct_type.clone();
463
    // } else if (peeled_type.type_id() == types::TypeID::Scalar) {
464
    //     auto scalar_type = static_cast<const types::Scalar&>(peeled_type);
465
    //     types::Scalar new_scalar_type(
466
    //         types::StorageType::NV_Shared(), 8, {}, scalar_type.primitive_type()
467
    //     );
468
    //     element_type = new_scalar_type.clone();
469
    // } else {
470
    //     throw InvalidSDFGException(
471
    //         "Unsupported peeled type for KernelLocalStorage."
472
    //     );
473
    // }
474

475
    auto generic_storage = is_rocm ? types::StorageType("AMD_Generic") : types::StorageType::NV_Generic();
4✔
476

477
    types::Array tile_array_type(types::StorageType::NV_Shared(), 8, {}, peeled_type, iteration_count);
4✔
478
    types::Array z_array_type(generic_storage, 8, {}, tile_array_type, z_dim_size);
4✔
479
    types::Array* pred_y;
4✔
480
    if (symbolic::eq(target_dim, z_symbol)) {
4✔
UNCOV
481
        pred_y = &tile_array_type;
×
482
    } else {
4✔
483
        pred_y = &z_array_type;
4✔
484
    }
4✔
485
    types::Array y_array_type(generic_storage, 8, {}, *pred_y, y_dim_size);
4✔
486
    types::Array* pred_x;
4✔
487
    if (symbolic::eq(target_dim, y_symbol)) {
4✔
488
        pred_x = &z_array_type;
3✔
489
    } else {
3✔
490
        pred_x = &y_array_type;
1✔
491
    }
1✔
492
    types::Array x_array_type(generic_storage, 8, {}, *pred_x, x_dim_size);
4✔
493
    types::Array* final_type;
4✔
494
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
495
        final_type = &y_array_type;
1✔
496
    } else {
3✔
497
        final_type = &x_array_type;
3✔
498
    }
3✔
499

500
    std::string shared_container_name = "__daisy_shared_" + container_;
4✔
501
    builder.add_container(shared_container_name, *final_type);
4✔
502

503
    // 2. Add barrier before loop
504
    auto& sync_block1 = builder.add_block(seq);
4✔
505

506
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block1, {});
4✔
507

508
    // 3. add copyin branch before loop
509
    auto& if_else = builder.add_if_else(seq);
4✔
510

511
    auto condition = symbolic::subs(loop_.condition(), loop_.indvar(), symbolic::add(target_dim, offset_));
4✔
512
    auto& branch = builder.add_case(if_else, condition);
4✔
513

514
    auto& copyin_block = builder.add_block(branch);
4✔
515

516
    auto& access_in = builder.add_access(copyin_block, container_);
4✔
517
    auto& access_out = builder.add_access(copyin_block, shared_container_name);
4✔
518

519
    auto& tasklet = builder.add_tasklet(copyin_block, data_flow::TaskletCode::assign, "out_", {"in_"});
4✔
520

521
    std::vector<symbolic::Expression> copyin_subsets;
4✔
522
    for (auto subset : subsets) {
8✔
523
        auto substituted = symbolic::subs(subset, loop_.indvar(), symbolic::add(target_dim, offset_));
8✔
524
        copyin_subsets.push_back(substituted);
8✔
525
    }
8✔
526

527
    builder.add_computational_memlet(copyin_block, access_in, tasklet, "in_", copyin_subsets, *type);
4✔
528

529
    std::vector<symbolic::Expression> shared_access_subsets = {x_symbol, y_symbol, z_symbol, target_dim};
4✔
530

531
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
532
        shared_access_subsets.erase(shared_access_subsets.begin());
1✔
533
    } else if (symbolic::eq(target_dim, y_symbol)) {
3✔
534
        shared_access_subsets.erase(shared_access_subsets.begin() + 1);
3✔
535
    } else if (symbolic::eq(target_dim, z_symbol)) {
3✔
UNCOV
536
        shared_access_subsets.erase(shared_access_subsets.begin() + 2);
×
UNCOV
537
    }
×
538

539
    builder.add_computational_memlet(copyin_block, tasklet, "out_", access_out, shared_access_subsets);
4✔
540

541
    // 4. Add barrier before loop
542

543
    auto& sync_block2 = builder.add_block(seq);
4✔
544

545
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block2, {});
4✔
546

547
    // 5. replace container in loop
548
    loop_.replace(symbolic::symbol(container_), symbolic::symbol(shared_container_name));
4✔
549

550
    // 6. replace subset expressions in loop
551
    std::vector<symbolic::Expression> read_shared_access_subsets;
4✔
552
    symbolic::Expression substituted_dimension;
4✔
553
    for (auto& subset : shared_access_subsets) {
12✔
554
        auto substituted = symbolic::subs(subset, target_dim, symbolic::sub(loop_.indvar(), offset_));
12✔
555
        read_shared_access_subsets.push_back(substituted);
12✔
556
    }
12✔
557

558
    auto access_node = static_cast<data_flow::AccessNode*>(read->element());
4✔
559
    for (auto& oedge : access_node->get_parent().out_edges(*access_node)) {
4✔
560
        oedge.set_subset(read_shared_access_subsets);
4✔
561
        oedge.set_base_type(*final_type);
4✔
562
    }
4✔
563

564
    // End of transformation
565

566
    analysis_manager.invalidate_all();
4✔
567

568
    passes::SequenceFusion sf_pass;
4✔
569
    passes::DeadCFGElimination dce_pass;
4✔
570
    passes::TrivialArrayElimination tae_pass;
4✔
571
    bool applies = false;
4✔
572
    do {
8✔
573
        applies = false;
8✔
574
        applies |= dce_pass.run(builder, analysis_manager);
8✔
575
        applies |= sf_pass.run(builder, analysis_manager);
8✔
576
        applies |= tae_pass.run(builder, analysis_manager);
8✔
577
    } while (applies);
8✔
578
};
4✔
579

580
void KernelLocalStorage::to_json(nlohmann::json& j) const {
1✔
581
    j["transformation_type"] = this->name();
1✔
582

583
    std::string loop_type;
1✔
584
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
1✔
585
        loop_type = "for";
1✔
586
    } else if (dynamic_cast<structured_control_flow::While*>(&loop_)) {
1✔
UNCOV
587
        loop_type = "while";
×
UNCOV
588
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
×
UNCOV
589
        loop_type = "map";
×
UNCOV
590
    } else {
×
UNCOV
591
        loop_type = "unknown";
×
UNCOV
592
    }
×
593

594
    j["subgraph"] = {{"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}}};
1✔
595

596
    j["parameters"] = {{"offset", serializer::JSONSerializer::expression(offset_)}, {"container", this->container_}};
1✔
597

598
    // Legacy fields for backward compatibility
599
    j["loop_element_id"] = this->loop_.element_id();
1✔
600
    j["offset"] = serializer::JSONSerializer::expression(offset_);
1✔
601
    j["container"] = this->container_;
1✔
602
};
1✔
603

604
KernelLocalStorage KernelLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
605
    size_t loop_id;
1✔
606
    if (desc.contains("subgraph")) {
1✔
607
        const auto& node_desc = desc.at("subgraph").at("0");
1✔
608
        loop_id = node_desc.at("element_id").get<size_t>();
1✔
609
    } else {
1✔
UNCOV
610
        loop_id = desc.at("loop_element_id").get<size_t>();
×
UNCOV
611
    }
×
612

613
    auto element = builder.find_element_by_id(loop_id);
1✔
614
    if (!element) {
1✔
UNCOV
615
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
UNCOV
616
    }
×
617
    auto outer_loop = dynamic_cast<structured_control_flow::For*>(element);
1✔
618

619
    nlohmann::json offset_json;
1✔
620
    std::string container;
1✔
621
    if (desc.contains("parameters")) {
1✔
622
        const auto& params = desc.at("parameters");
1✔
623
        if (params.contains("offset")) {
1✔
624
            offset_json = params.at("offset");
1✔
625
        }
1✔
626
        if (params.contains("container")) {
1✔
627
            container = params.at("container").get<std::string>();
1✔
628
        }
1✔
629
    }
1✔
630
    if (offset_json.is_null() && desc.contains("offset")) {
1✔
UNCOV
631
        offset_json = desc.at("offset");
×
UNCOV
632
    }
×
633
    if (container.empty() && desc.contains("container")) {
1✔
UNCOV
634
        container = desc.at("container").get<std::string>();
×
UNCOV
635
    }
×
636

637
    auto offset = symbolic::parse(offset_json);
1✔
638

639
    return KernelLocalStorage(*outer_loop, offset, container);
1✔
640
};
1✔
641

642
} // namespace transformations
643
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc