• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 23906044628

02 Apr 2026 02:40PM UTC coverage: 64.553% (+0.08%) from 64.474%
23906044628

Pull #632

github

web-flow
Merge b2698daab into 3125b927b
Pull Request #632: Separate can_be_applied and apply for GPUTilling during Loop Scheduling

282 of 348 new or added lines in 16 files covered. (81.03%)

29 existing lines in 10 files now uncovered.

28998 of 44921 relevant lines covered (64.55%)

453.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.26
/opt/src/transformations/offloading/kernel_local_storage.cpp
1
#include "sdfg/transformations/offloading/kernel_local_storage.h"
2

3
#include <set>
4
#include <string>
5
#include <tuple>
6
#include <vector>
7

8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/analysis/type_analysis.h"
10
#include "sdfg/builder/structured_sdfg_builder.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/library_node.h"
13
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
14
#include "sdfg/data_flow/tasklet.h"
15
#include "sdfg/exceptions.h"
16
#include "sdfg/passes/dataflow/trivial_array_elimination.h"
17
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
18
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
19
#include "sdfg/serializer/json_serializer.h"
20
#include "sdfg/structured_control_flow/control_flow_node.h"
21
#include "sdfg/structured_control_flow/for.h"
22
#include "sdfg/structured_control_flow/if_else.h"
23
#include "sdfg/structured_control_flow/map.h"
24
#include "sdfg/structured_control_flow/sequence.h"
25
#include "sdfg/structured_control_flow/structured_loop.h"
26
#include "sdfg/symbolic/polynomials.h"
27
#include "sdfg/symbolic/symbolic.h"
28
#include "sdfg/targets/gpu/gpu_schedule_type.h"
29
#include "sdfg/transformations/utils.h"
30
#include "sdfg/types/array.h"
31
#include "sdfg/types/structure.h"
32
#include "sdfg/types/type.h"
33
#include "sdfg/types/utils.h"
34
#include "symengine/symengine_rcp.h"
35

36
namespace sdfg {
37
namespace transformations {
38

39
KernelLocalStorage::KernelLocalStorage(
40
    structured_control_flow::StructuredLoop& loop, symbolic::Expression offset, const std::string& container
41
)
42
    : loop_(loop), offset_(offset), container_(container) {};
28✔
43

44
std::string KernelLocalStorage::name() const { return "KernelLocalStorage"; };
3✔
45

46
bool KernelLocalStorage::reads_container(std::string container, analysis::UsersView& body_users) {
×
47
    if (body_users.reads(container).size() == 1) {
×
48
        return true;
×
49
    }
×
50
    return false;
×
51
}
×
52

53
bool KernelLocalStorage::uses_inner_indvar(analysis::UsersView& body_users) {
×
54
    bool result = false;
×
55
    for (auto& user : body_users.reads(this->container_)) {
×
56
        auto& subsets = user->subsets();
×
57
        if (subsets.size() == 0) {
×
58
            continue;
×
59
        }
×
60
        if (subsets.size() == 1) { // TODO: Handle multiple subsets
×
61
            for (auto access : subsets.at(0)) {
×
62
                result |= symbolic::uses(access, loop_.indvar());
×
63
            }
×
64
        }
×
65
    }
×
66
    return result;
×
67
};
×
68

69
std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer> KernelLocalStorage::
70
    dim_size(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
26✔
71
    symbolic::Integer x_dim_size = symbolic::one();
26✔
72
    symbolic::Integer y_dim_size = symbolic::one();
26✔
73
    symbolic::Integer z_dim_size = symbolic::one();
26✔
74

75
    for (auto node : ancestors) {
168✔
76
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
168✔
77
            auto schedule_type = ancestor_map->schedule_type();
52✔
78
            if (!gpu::is_gpu_schedule(schedule_type)) {
52✔
79
                continue;
×
80
            }
×
81
            auto dim = gpu::gpu_dimension(schedule_type);
52✔
82
            if (dim == gpu::GPUDimension::X) {
52✔
83
                x_dim_size = gpu::gpu_block_size(schedule_type);
26✔
84
            } else if (dim == gpu::GPUDimension::Y) {
26✔
85
                y_dim_size = gpu::gpu_block_size(schedule_type);
26✔
86
            } else if (dim == gpu::GPUDimension::Z) {
26✔
87
                z_dim_size = gpu::gpu_block_size(schedule_type);
×
88
            } else {
×
89
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
90
            }
×
91
        }
52✔
92
    }
168✔
93

94
    return {x_dim_size, y_dim_size, z_dim_size};
26✔
95
};
26✔
96

97
std::tuple<symbolic::Symbol, symbolic::Symbol, symbolic::Symbol> KernelLocalStorage::
98
    dim_indvars(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
19✔
99
    symbolic::Symbol x_dim_indvar = SymEngine::null;
19✔
100
    symbolic::Symbol y_dim_indvar = SymEngine::null;
19✔
101
    symbolic::Symbol z_dim_indvar = SymEngine::null;
19✔
102

103
    for (auto node : ancestors) {
123✔
104
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
123✔
105
            auto schedule_type = ancestor_map->schedule_type();
38✔
106
            if (!gpu::is_gpu_schedule(schedule_type)) {
38✔
107
                continue;
×
108
            }
×
109
            auto dim = gpu::gpu_dimension(schedule_type);
38✔
110
            if (dim == gpu::GPUDimension::X) {
38✔
111
                x_dim_indvar = ancestor_map->indvar();
19✔
112
            } else if (dim == gpu::GPUDimension::Y) {
19✔
113
                y_dim_indvar = ancestor_map->indvar();
19✔
114
            } else if (dim == gpu::GPUDimension::Z) {
19✔
115
                z_dim_indvar = ancestor_map->indvar();
×
116
            } else {
×
117
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
118
            }
×
119
        }
38✔
120
    }
123✔
121

122
    return {x_dim_indvar, y_dim_indvar, z_dim_indvar};
19✔
123
}
19✔
124

125
std::tuple<bool, bool, bool> KernelLocalStorage::
126
    available_dims(std::vector<symbolic::Expression> subsets, analysis::AnalysisManager& analysis_manager) {
15✔
127
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
15✔
128
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
15✔
129

130
    symbolic::Integer iteration_count = get_iteration_count(loop_);
15✔
131

132
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
15✔
133
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
15✔
134

135
    bool x_dim_available = (x_dim_indvar != SymEngine::null);
15✔
136
    bool y_dim_available = (y_dim_indvar != SymEngine::null);
15✔
137
    bool z_dim_available = (z_dim_indvar != SymEngine::null);
15✔
138

139
    if (x_dim_available) {
15✔
140
        bool x_used = false;
15✔
141
        for (auto subset : subsets) {
30✔
142
            for (auto atom : symbolic::atoms(subset)) {
31✔
143
                if (symbolic::eq(atom, x_dim_indvar)) {
31✔
144
                    x_used = true;
11✔
145
                }
11✔
146
            }
31✔
147
        }
30✔
148
        if (x_used) {
15✔
149
            x_dim_available = false;
11✔
150
        }
11✔
151
    }
15✔
152
    if (y_dim_available) {
15✔
153
        bool y_used = false;
15✔
154
        for (auto subset : subsets) {
30✔
155
            for (auto atom : symbolic::atoms(subset)) {
31✔
156
                if (symbolic::eq(atom, y_dim_indvar)) {
31✔
157
                    y_used = true;
5✔
158
                }
5✔
159
            }
31✔
160
        }
30✔
161
        if (y_used) {
15✔
162
            y_dim_available = false;
5✔
163
        }
5✔
164
    }
15✔
165
    if (z_dim_available) {
15✔
166
        bool z_used = false;
×
167
        for (auto subset : subsets) {
×
168
            for (auto atom : symbolic::atoms(subset)) {
×
169
                if (symbolic::eq(atom, z_dim_indvar)) {
×
170
                    z_used = true;
×
171
                }
×
172
            }
×
173
        }
×
174
        if (z_used) {
×
175
            z_dim_available = false;
×
176
        }
×
177
    }
×
178

179
    if (x_dim_available) {
15✔
180
        auto cond = symbolic::Ge(x_dim_size, iteration_count);
4✔
181
        if (symbolic::is_true(cond)) {
4✔
182
            x_dim_available = true;
4✔
183
        }
4✔
184
    }
4✔
185
    if (y_dim_available) {
15✔
186
        auto cond = symbolic::Ge(y_dim_size, iteration_count);
10✔
187
        if (symbolic::is_true(cond)) {
10✔
188
            y_dim_available = true;
10✔
189
        }
10✔
190
    }
10✔
191
    if (z_dim_available) {
15✔
192
        auto cond = symbolic::Ge(z_dim_size, iteration_count);
×
193
        if (symbolic::is_true(cond)) {
×
194
            z_dim_available = true;
×
195
        }
×
196
    }
×
197

198
    return {x_dim_available, y_dim_available, z_dim_available};
15✔
199
}
15✔
200

201
bool KernelLocalStorage::is_candidate(
202
    structured_control_flow::StructuredLoop& loop,
203
    const std::string& container,
204
    builder::StructuredSDFGBuilder& builder,
205
    analysis::AnalysisManager& analysis_manager
206
) {
32✔
207
    auto& sdfg = builder.subject();
32✔
208

209
    // Criterion: transformation cannot be applied twice on the same container
210
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
32✔
211
    std::string shared_container_name = "__daisy_shared_" + container;
32✔
212
    if (containers.find(shared_container_name) != containers.end()) {
32✔
213
        return false;
×
214
    }
×
215

216
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
32✔
217
    auto ancestors = scope_analysis.ancestor_scopes(&loop);
32✔
218

219
    // Criterion: Must not be a GPU map itself
220
    if (auto loop_map = dynamic_cast<structured_control_flow::Map*>(&loop)) {
32✔
221
        if (gpu::is_gpu_schedule(loop_map->schedule_type())) {
7✔
222
            return false;
7✔
223
        }
7✔
224
    }
7✔
225

226
    // Criterion: Must be nested in a GPU schedule
227
    bool is_gpu_scope = false;
25✔
228
    for (auto ancestor : ancestors) {
145✔
229
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
145✔
230
            if (gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
47✔
231
                is_gpu_scope = true;
47✔
232
            } else if (ancestor_map->schedule_type().value() == ScheduleType_Sequential::value()) {
47✔
233
                continue;
×
234
            } else {
×
235
                return false;
×
236
            }
×
237
        }
47✔
238
    }
145✔
239
    if (!is_gpu_scope) {
25✔
240
        return false;
1✔
241
    }
1✔
242

243
    // Criterion: Container is contiguous (Maybe can be relaxed later)
244
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
24✔
245
    auto type = type_analysis.get_outer_type(container);
24✔
246
    auto& peeled_type = types::peel_to_innermost_element(*type);
24✔
247
    if (peeled_type.type_id() == types::TypeID::Pointer) {
24✔
248
        return false;
1✔
249
    }
1✔
250

251
    auto& inner_body = loop.root();
23✔
252
    auto& users = analysis_manager.get<analysis::Users>();
23✔
253
    analysis::UsersView inner_body_users(users, inner_body);
23✔
254

255
    // Criterion: Container is read-only
256
    if (!inner_body_users.writes(container).empty() || !inner_body_users.views(container).empty() ||
23✔
257
        !inner_body_users.moves(container).empty()) {
23✔
258
        return false;
3✔
259
    }
3✔
260
    if (inner_body_users.reads(container).empty()) {
20✔
261
        return false;
1✔
262
    }
1✔
263

264
    // Criterion: Memory accesses do not depend on moving symbols
265
    for (auto& user : inner_body_users.uses(container)) {
28✔
266
        auto& subsets = user->subsets();
28✔
267
        for (auto& subset : subsets) {
28✔
268
            for (auto& expr : subset) {
28✔
269
                for (auto& atom : symbolic::atoms(expr)) {
29✔
270
                    if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
29✔
271
                        auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
29✔
272
                        if (!inner_body_users.moves(symbol->get_name()).empty()) {
29✔
273
                            return false;
×
274
                        }
×
275
                    }
29✔
276
                }
29✔
277
            }
28✔
278
        }
28✔
279
    }
28✔
280

281
    // Limitations: single memory access
282
    if (inner_body_users.reads(container).size() != 1) {
19✔
283
        return false;
7✔
284
    }
7✔
285
    auto read = inner_body_users.reads(container).at(0);
12✔
286
    if (read->subsets().size() != 1) {
12✔
287
        return false;
×
288
    }
×
289
    auto subsets = read->subsets().at(0);
12✔
290

291
    // Criterion: more than one GPU dimension is available
292
    symbolic::SymbolVec indvars;
12✔
293
    for (auto node : ancestors) {
68✔
294
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
68✔
295
            auto schedule_type = ancestor_map->schedule_type();
23✔
296
            if (!gpu::is_gpu_schedule(schedule_type)) {
23✔
NEW
297
                continue;
×
NEW
298
            }
×
299
            indvars.push_back(ancestor_map->indvar());
23✔
300
        }
23✔
301
    }
68✔
302
    if (indvars.size() <= 1) {
12✔
303
        return false;
1✔
304
    }
1✔
305

306
    indvars.push_back(loop.indvar());
11✔
307

308
    // Criterion: Memory access is polynomial of
309
    // c_0 * a + c_1 * b + c_2 * c + c_3 * k, where a, b, c are x-threads, y-threads, z-threads
310
    // and k is the inner loop index
311
    for (auto subset : subsets) {
22✔
312
        if (symbolic::polynomial(subset, indvars) == SymEngine::null) {
22✔
313
            return false;
×
314
        }
×
315
    }
22✔
316

317
    // Criterion: inner indvar is used in memory access
318
    bool uses_inner_indvar = false;
11✔
319
    for (auto subset : subsets) {
22✔
320
        for (auto atom : symbolic::atoms(subset)) {
23✔
321
            if (symbolic::eq(atom, loop.indvar())) {
23✔
322
                uses_inner_indvar = true;
9✔
323
            }
9✔
324
        }
23✔
325
    }
22✔
326
    if (!uses_inner_indvar) {
11✔
327
        return false;
2✔
328
    }
2✔
329

330
    // Criterion: Containers in subset expressions are not written to in the loop
331
    for (auto subset : subsets) {
17✔
332
        for (auto atom : symbolic::atoms(subset)) {
18✔
333
            if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
18✔
334
                auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
18✔
335
                if (!inner_body_users.writes(symbol->get_name()).empty()) {
18✔
336
                    return false;
1✔
337
                }
1✔
338
            }
18✔
339
        }
18✔
340
    }
17✔
341

342
    return true;
8✔
343
}
9✔
344

345
bool KernelLocalStorage::
346
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
26✔
347
    if (!is_candidate(loop_, container_, builder, analysis_manager)) {
26✔
348
        return false;
18✔
349
    }
18✔
350

351
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
8✔
352
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
8✔
353

354
    // Criterion: Iteration count is known and an Integer
355
    symbolic::Integer iteration_count = get_iteration_count(loop_);
8✔
356
    if (iteration_count == SymEngine::null) {
8✔
357
        return false;
1✔
358
    }
1✔
359

360
    // Criterion: All block dimensions are known and an Integer
361
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
7✔
362
    if (x_dim_size == SymEngine::null || y_dim_size == SymEngine::null || z_dim_size == SymEngine::null) {
7✔
NEW
363
        return false;
×
NEW
364
    }
×
365

366
    // Criterion: Has a free dimension to map to and that dimension is big enough
367
    auto& users = analysis_manager.get<analysis::Users>();
7✔
368
    analysis::UsersView inner_body_users(users, loop_.root());
7✔
369
    auto read = inner_body_users.reads(container_).at(0);
7✔
370
    auto subsets = read->subsets().at(0);
7✔
371

372
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
7✔
373
    if (!x_dim_available && !y_dim_available && !z_dim_available) {
7✔
374
        return false;
1✔
375
    }
1✔
376

377
    return true;
6✔
378
};
7✔
379

380
void KernelLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
4✔
381
    auto& sdfg = builder.subject();
4✔
382

383
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
4✔
384
    auto ancestors = scope_analysis.ancestor_scopes(&loop_);
4✔
385

386
    auto& users = analysis_manager.get<analysis::Users>();
4✔
387

388
    auto& inner_body = this->loop_.root();
4✔
389
    analysis::UsersView inner_body_users(users, inner_body);
4✔
390

391
    // Detect GPU backend from ancestor map schedule types
392
    bool is_rocm = false;
4✔
393
    for (auto node : ancestors) {
26✔
394
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
26✔
395
            if (ancestor_map->schedule_type().value() == "ROCM") {
8✔
396
                is_rocm = true;
×
397
                break;
×
398
            }
×
399
        }
8✔
400
    }
26✔
401

402
    std::string thread_prefix = is_rocm ? "__daisy_hip_thread_idx_" : "__daisy_cuda_thread_idx_";
4✔
403
    std::string x_name = thread_prefix + "x";
4✔
404
    std::string y_name = thread_prefix + "y";
4✔
405
    std::string z_name = thread_prefix + "z";
4✔
406
    symbolic::Symbol x_symbol = symbolic::symbol(x_name);
4✔
407
    symbolic::Symbol y_symbol = symbolic::symbol(y_name);
4✔
408
    symbolic::Symbol z_symbol = symbolic::symbol(z_name);
4✔
409

410
    auto index_type = types::Scalar(types::PrimitiveType::Int32);
4✔
411
    index_type.storage_type(types::StorageType::NV_Symbol());
4✔
412

413
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
4✔
414
    if (containers.find(x_name) == containers.end()) {
4✔
415
        builder.add_container(x_name, index_type);
3✔
416
    }
3✔
417
    if (containers.find(y_name) == containers.end()) {
4✔
418
        builder.add_container(y_name, index_type);
3✔
419
    }
3✔
420
    if (containers.find(z_name) == containers.end()) {
4✔
421
        builder.add_container(z_name, index_type);
3✔
422
    }
3✔
423

424
    /**
425
        1. Add new shared memory container
426
        2. Add barrier before loop
427
        3. add copyin branch before loop
428
        4. Add barrier before loop
429
        5. replace container in loop
430
        6. replace subset expressions in loop
431
    */
432

433
    symbolic::Integer iteration_count = get_iteration_count(loop_);
4✔
434

435
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
4✔
436
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
4✔
437

438
    auto parent = scope_analysis.parent_scope(&loop_);
4✔
439
    auto parent_seq = static_cast<structured_control_flow::Sequence*>(parent);
4✔
440
    auto& seq = builder.add_sequence_before(*parent_seq, loop_, {}, loop_.debug_info());
4✔
441

442
    // 1. Add new shared memory container
443
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
4✔
444
    auto type = type_analysis.get_outer_type(container_);
4✔
445
    auto& peeled_type = types::peel_to_innermost_element(*type);
4✔
446
    auto read = inner_body_users.reads(this->container_).at(0);
4✔
447
    auto subsets = read->subsets().at(0);
4✔
448

449
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
4✔
450

451
    // get free dim
452
    symbolic::Symbol target_dim;
4✔
453
    auto [dim_x, dim_y, dim_z] = available_dims(subsets, analysis_manager);
4✔
454

455
    if (dim_x) {
4✔
456
        target_dim = x_symbol;
1✔
457
    } else if (dim_y) {
3✔
458
        target_dim = y_symbol;
3✔
459
    } else if (dim_z) {
3✔
460
        target_dim = z_symbol;
×
461
    } else {
×
462
        throw InvalidSDFGException("No available GPU tiling dimension found!");
×
463
    }
×
464

465
    // std::unique_ptr<types::IType> element_type;
466

467
    // if (peeled_type.type_id() == types::TypeID::Structure) {
468
    //     auto struct_type = static_cast<const types::Structure&>(peeled_type);
469
    //     types::Structure new_struct_type(
470
    //         types::StorageType::NV_Shared(), 8, {}, struct_type.name()
471
    //     );
472
    //     element_type = new_struct_type.clone();
473
    // } else if (peeled_type.type_id() == types::TypeID::Scalar) {
474
    //     auto scalar_type = static_cast<const types::Scalar&>(peeled_type);
475
    //     types::Scalar new_scalar_type(
476
    //         types::StorageType::NV_Shared(), 8, {}, scalar_type.primitive_type()
477
    //     );
478
    //     element_type = new_scalar_type.clone();
479
    // } else {
480
    //     throw InvalidSDFGException(
481
    //         "Unsupported peeled type for KernelLocalStorage."
482
    //     );
483
    // }
484

485
    auto generic_storage = is_rocm ? types::StorageType("AMD_Generic") : types::StorageType::NV_Generic();
4✔
486

487
    types::Array tile_array_type(types::StorageType::NV_Shared(), 8, {}, peeled_type, iteration_count);
4✔
488
    types::Array z_array_type(generic_storage, 8, {}, tile_array_type, z_dim_size);
4✔
489
    types::Array* pred_y;
4✔
490
    if (symbolic::eq(target_dim, z_symbol)) {
4✔
491
        pred_y = &tile_array_type;
×
492
    } else {
4✔
493
        pred_y = &z_array_type;
4✔
494
    }
4✔
495
    types::Array y_array_type(generic_storage, 8, {}, *pred_y, y_dim_size);
4✔
496
    types::Array* pred_x;
4✔
497
    if (symbolic::eq(target_dim, y_symbol)) {
4✔
498
        pred_x = &z_array_type;
3✔
499
    } else {
3✔
500
        pred_x = &y_array_type;
1✔
501
    }
1✔
502
    types::Array x_array_type(generic_storage, 8, {}, *pred_x, x_dim_size);
4✔
503
    types::Array* final_type;
4✔
504
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
505
        final_type = &y_array_type;
1✔
506
    } else {
3✔
507
        final_type = &x_array_type;
3✔
508
    }
3✔
509

510
    std::string shared_container_name = "__daisy_shared_" + container_;
4✔
511
    builder.add_container(shared_container_name, *final_type);
4✔
512

513
    // 2. Add barrier before loop
514
    auto& sync_block1 = builder.add_block(seq);
4✔
515

516
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block1, {});
4✔
517

518
    // 3. add copyin branch before loop
519
    auto& if_else = builder.add_if_else(seq);
4✔
520

521
    auto condition = symbolic::subs(loop_.condition(), loop_.indvar(), symbolic::add(target_dim, offset_));
4✔
522
    auto& branch = builder.add_case(if_else, condition);
4✔
523

524
    auto& copyin_block = builder.add_block(branch);
4✔
525

526
    auto& access_in = builder.add_access(copyin_block, container_);
4✔
527
    auto& access_out = builder.add_access(copyin_block, shared_container_name);
4✔
528

529
    auto& tasklet = builder.add_tasklet(copyin_block, data_flow::TaskletCode::assign, "out_", {"in_"});
4✔
530

531
    std::vector<symbolic::Expression> copyin_subsets;
4✔
532
    for (auto subset : subsets) {
8✔
533
        auto substituted = symbolic::subs(subset, loop_.indvar(), symbolic::add(target_dim, offset_));
8✔
534
        copyin_subsets.push_back(substituted);
8✔
535
    }
8✔
536

537
    builder.add_computational_memlet(copyin_block, access_in, tasklet, "in_", copyin_subsets, *type);
4✔
538

539
    std::vector<symbolic::Expression> shared_access_subsets = {x_symbol, y_symbol, z_symbol, target_dim};
4✔
540

541
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
542
        shared_access_subsets.erase(shared_access_subsets.begin());
1✔
543
    } else if (symbolic::eq(target_dim, y_symbol)) {
3✔
544
        shared_access_subsets.erase(shared_access_subsets.begin() + 1);
3✔
545
    } else if (symbolic::eq(target_dim, z_symbol)) {
3✔
546
        shared_access_subsets.erase(shared_access_subsets.begin() + 2);
×
547
    }
×
548

549
    builder.add_computational_memlet(copyin_block, tasklet, "out_", access_out, shared_access_subsets);
4✔
550

551
    // 4. Add barrier before loop
552

553
    auto& sync_block2 = builder.add_block(seq);
4✔
554

555
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block2, {});
4✔
556

557
    // 5. replace container in loop
558
    loop_.replace(symbolic::symbol(container_), symbolic::symbol(shared_container_name));
4✔
559

560
    // 6. replace subset expressions in loop
561
    std::vector<symbolic::Expression> read_shared_access_subsets;
4✔
562
    symbolic::Expression substituted_dimension;
4✔
563
    for (auto& subset : shared_access_subsets) {
12✔
564
        auto substituted = symbolic::subs(subset, target_dim, symbolic::sub(loop_.indvar(), offset_));
12✔
565
        read_shared_access_subsets.push_back(substituted);
12✔
566
    }
12✔
567

568
    auto access_node = static_cast<data_flow::AccessNode*>(read->element());
4✔
569
    for (auto& oedge : access_node->get_parent().out_edges(*access_node)) {
4✔
570
        oedge.set_subset(read_shared_access_subsets);
4✔
571
        oedge.set_base_type(*final_type);
4✔
572
    }
4✔
573

574
    // End of transformation
575

576
    passes::SequenceFusion sf_pass;
4✔
577
    passes::DeadCFGElimination dce_pass;
4✔
578
    passes::TrivialArrayElimination tae_pass;
4✔
579
    bool applies = false;
4✔
580
    do {
8✔
581
        applies = false;
8✔
582
        applies |= dce_pass.run(builder, analysis_manager);
8✔
583
        applies |= sf_pass.run(builder, analysis_manager);
8✔
584
        applies |= tae_pass.run(builder, analysis_manager);
8✔
585
    } while (applies);
8✔
586
};
4✔
587

588
void KernelLocalStorage::to_json(nlohmann::json& j) const {
1✔
589
    j["transformation_type"] = this->name();
1✔
590

591
    std::string loop_type;
1✔
592
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
1✔
593
        loop_type = "for";
1✔
594
    } else if (dynamic_cast<structured_control_flow::While*>(&loop_)) {
1✔
595
        loop_type = "while";
×
596
    } else if (dynamic_cast<structured_control_flow::Map*>(&loop_)) {
×
597
        loop_type = "map";
×
598
    } else {
×
599
        loop_type = "unknown";
×
600
    }
×
601

602
    j["subgraph"] = {{"0", {{"element_id", this->loop_.element_id()}, {"type", loop_type}}}};
1✔
603

604
    j["parameters"] = {{"offset", serializer::JSONSerializer::expression(offset_)}, {"container", this->container_}};
1✔
605

606
    // Legacy fields for backward compatibility
607
    j["loop_element_id"] = this->loop_.element_id();
1✔
608
    j["offset"] = serializer::JSONSerializer::expression(offset_);
1✔
609
    j["container"] = this->container_;
1✔
610
};
1✔
611

612
KernelLocalStorage KernelLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
613
    size_t loop_id;
1✔
614
    if (desc.contains("subgraph")) {
1✔
615
        const auto& node_desc = desc.at("subgraph").at("0");
1✔
616
        loop_id = node_desc.at("element_id").get<size_t>();
1✔
617
    } else {
1✔
618
        loop_id = desc.at("loop_element_id").get<size_t>();
×
619
    }
×
620

621
    auto element = builder.find_element_by_id(loop_id);
1✔
622
    if (!element) {
1✔
623
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
624
    }
×
625
    auto outer_loop = dynamic_cast<structured_control_flow::For*>(element);
1✔
626

627
    nlohmann::json offset_json;
1✔
628
    std::string container;
1✔
629
    if (desc.contains("parameters")) {
1✔
630
        const auto& params = desc.at("parameters");
1✔
631
        if (params.contains("offset")) {
1✔
632
            offset_json = params.at("offset");
1✔
633
        }
1✔
634
        if (params.contains("container")) {
1✔
635
            container = params.at("container").get<std::string>();
1✔
636
        }
1✔
637
    }
1✔
638
    if (offset_json.is_null() && desc.contains("offset")) {
1✔
639
        offset_json = desc.at("offset");
×
640
    }
×
641
    if (container.empty() && desc.contains("container")) {
1✔
642
        container = desc.at("container").get<std::string>();
×
643
    }
×
644

645
    auto offset = symbolic::parse(offset_json);
1✔
646

647
    return KernelLocalStorage(*outer_loop, offset, container);
1✔
648
};
1✔
649

650
} // namespace transformations
651
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc