• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28106147644

24 Jun 2026 02:32PM UTC coverage: 61.922% (+0.1%) from 61.779%
28106147644

Pull #806

github

web-flow
Merge 2be414d54 into 57cc1db99
Pull Request #806: Map Collapse for Multiple targets in a neste sequence

165 of 185 new or added lines in 2 files covered. (89.19%)

419 existing lines in 30 files now uncovered.

37705 of 60891 relevant lines covered (61.92%)

1004.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.07
/opt/src/transformations/offloading/kernel_local_storage.cpp
1
#include "sdfg/transformations/offloading/kernel_local_storage.h"
2

3
#include <set>
4
#include <string>
5
#include <tuple>
6
#include <vector>
7

8
#include "sdfg/analysis/type_analysis.h"
9
#include "sdfg/builder/structured_sdfg_builder.h"
10
#include "sdfg/data_flow/access_node.h"
11
#include "sdfg/data_flow/library_node.h"
12
#include "sdfg/data_flow/library_nodes/barrier_local_node.h"
13
#include "sdfg/data_flow/tasklet.h"
14
#include "sdfg/exceptions.h"
15
#include "sdfg/passes/dataflow/trivial_array_elimination.h"
16
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
17
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
18
#include "sdfg/structured_control_flow/control_flow_node.h"
19
#include "sdfg/structured_control_flow/for.h"
20
#include "sdfg/structured_control_flow/if_else.h"
21
#include "sdfg/structured_control_flow/map.h"
22
#include "sdfg/structured_control_flow/sequence.h"
23
#include "sdfg/structured_control_flow/structured_loop.h"
24
#include "sdfg/symbolic/polynomials.h"
25
#include "sdfg/symbolic/symbolic.h"
26
#include "sdfg/targets/gpu/gpu_schedule_type.h"
27
#include "sdfg/types/array.h"
28
#include "sdfg/types/structure.h"
29
#include "sdfg/types/type.h"
30
#include "sdfg/types/utils.h"
31
#include "symengine/symengine_rcp.h"
32

33
namespace sdfg {
34
namespace transformations {
35

36
KernelLocalStorage::KernelLocalStorage(
37
    structured_control_flow::StructuredLoop& loop, symbolic::Expression offset, const data_flow::AccessNode& access_node
38
)
39
    : loop_(loop), offset_(offset), access_node_(access_node), container_{access_node.data()} {};
23✔
40

41
std::string KernelLocalStorage::name() const { return "KernelLocalStorage"; };
6✔
42

UNCOV
43
bool KernelLocalStorage::reads_container(std::string container, analysis::UsersView& body_users) {
×
44
    if (body_users.reads(container).size() == 1) {
×
45
        return true;
×
46
    }
×
47
    return false;
×
48
}
×
49

UNCOV
50
bool KernelLocalStorage::uses_inner_indvar(analysis::UsersView& body_users) {
×
51
    bool result = false;
×
52
    for (auto& user : body_users.reads(this->container_)) {
×
53
        auto& subsets = user->subsets();
×
54
        if (subsets.size() == 0) {
×
55
            continue;
×
56
        }
×
57
        if (subsets.size() == 1) { // TODO: Handle multiple subsets
×
58
            for (auto access : subsets.at(0)) {
×
59
                result |= symbolic::uses(access, loop_.indvar());
×
60
            }
×
61
        }
×
62
    }
×
63
    return result;
×
64
};
×
65

66
std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer> KernelLocalStorage::
67
    dim_size(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
26✔
68
    symbolic::Integer x_dim_size = symbolic::one();
26✔
69
    symbolic::Integer y_dim_size = symbolic::one();
26✔
70
    symbolic::Integer z_dim_size = symbolic::one();
26✔
71

72
    for (auto node : ancestors) {
168✔
73
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
168✔
74
            auto schedule_type = ancestor_map->schedule_type();
52✔
75
            if (!gpu::is_gpu_schedule(schedule_type)) {
52✔
UNCOV
76
                continue;
×
77
            }
×
78
            auto dim = gpu::gpu_dimension(schedule_type);
52✔
79
            if (dim == gpu::GPUDimension::X) {
52✔
80
                x_dim_size = gpu::gpu_block_size(schedule_type);
26✔
81
            } else if (dim == gpu::GPUDimension::Y) {
26✔
82
                y_dim_size = gpu::gpu_block_size(schedule_type);
26✔
83
            } else if (dim == gpu::GPUDimension::Z) {
26✔
UNCOV
84
                z_dim_size = gpu::gpu_block_size(schedule_type);
×
85
            } else {
×
86
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
87
            }
×
88
        }
52✔
89
    }
168✔
90

91
    return {x_dim_size, y_dim_size, z_dim_size};
26✔
92
};
26✔
93

94
std::tuple<symbolic::Symbol, symbolic::Symbol, symbolic::Symbol> KernelLocalStorage::
95
    dim_indvars(const std::vector<structured_control_flow::ControlFlowNode*> ancestors) {
19✔
96
    symbolic::Symbol x_dim_indvar = SymEngine::null;
19✔
97
    symbolic::Symbol y_dim_indvar = SymEngine::null;
19✔
98
    symbolic::Symbol z_dim_indvar = SymEngine::null;
19✔
99

100
    for (auto node : ancestors) {
123✔
101
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
123✔
102
            auto schedule_type = ancestor_map->schedule_type();
38✔
103
            if (!gpu::is_gpu_schedule(schedule_type)) {
38✔
UNCOV
104
                continue;
×
105
            }
×
106
            auto dim = gpu::gpu_dimension(schedule_type);
38✔
107
            if (dim == gpu::GPUDimension::X) {
38✔
108
                x_dim_indvar = ancestor_map->indvar();
19✔
109
            } else if (dim == gpu::GPUDimension::Y) {
19✔
110
                y_dim_indvar = ancestor_map->indvar();
19✔
111
            } else if (dim == gpu::GPUDimension::Z) {
19✔
UNCOV
112
                z_dim_indvar = ancestor_map->indvar();
×
113
            } else {
×
114
                throw InvalidSDFGException("Unknown dimension in GPU Schedule type: " + std::to_string((int) dim));
×
115
            }
×
116
        }
38✔
117
    }
123✔
118

119
    return {x_dim_indvar, y_dim_indvar, z_dim_indvar};
19✔
120
}
19✔
121

122
std::tuple<bool, bool, bool> KernelLocalStorage::
123
    available_dims(std::vector<symbolic::Expression> subsets, analysis::AnalysisManager& analysis_manager) {
15✔
124
    auto ancestors = ControlFlowNode::parent_chain(loop_);
15✔
125

126
    auto num_iterations = loop_.num_iterations();
15✔
127
    if (num_iterations.is_null() || !SymEngine::is_a<SymEngine::Integer>(*num_iterations)) {
15✔
128
        num_iterations = loop_.num_iterations_approx();
11✔
129
    }
11✔
130
    symbolic::Integer iteration_count = SymEngine::rcp_static_cast<const SymEngine::Integer>(num_iterations);
15✔
131

132
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
15✔
133
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
15✔
134

135
    bool x_dim_available = (x_dim_indvar != SymEngine::null);
15✔
136
    bool y_dim_available = (y_dim_indvar != SymEngine::null);
15✔
137
    bool z_dim_available = (z_dim_indvar != SymEngine::null);
15✔
138

139
    if (x_dim_available) {
15✔
140
        bool x_used = false;
15✔
141
        for (auto subset : subsets) {
30✔
142
            for (auto atom : symbolic::atoms(subset)) {
31✔
143
                if (symbolic::eq(atom, x_dim_indvar)) {
31✔
144
                    x_used = true;
11✔
145
                }
11✔
146
            }
31✔
147
        }
30✔
148
        if (x_used) {
15✔
149
            x_dim_available = false;
11✔
150
        }
11✔
151
    }
15✔
152
    if (y_dim_available) {
15✔
153
        bool y_used = false;
15✔
154
        for (auto subset : subsets) {
30✔
155
            for (auto atom : symbolic::atoms(subset)) {
31✔
156
                if (symbolic::eq(atom, y_dim_indvar)) {
31✔
157
                    y_used = true;
5✔
158
                }
5✔
159
            }
31✔
160
        }
30✔
161
        if (y_used) {
15✔
162
            y_dim_available = false;
5✔
163
        }
5✔
164
    }
15✔
165
    if (z_dim_available) {
15✔
UNCOV
166
        bool z_used = false;
×
167
        for (auto subset : subsets) {
×
168
            for (auto atom : symbolic::atoms(subset)) {
×
169
                if (symbolic::eq(atom, z_dim_indvar)) {
×
170
                    z_used = true;
×
171
                }
×
172
            }
×
173
        }
×
174
        if (z_used) {
×
175
            z_dim_available = false;
×
176
        }
×
177
    }
×
178

179
    if (x_dim_available) {
15✔
180
        auto cond = symbolic::Ge(x_dim_size, iteration_count);
4✔
181
        if (symbolic::is_true(cond)) {
4✔
182
            x_dim_available = true;
4✔
183
        }
4✔
184
    }
4✔
185
    if (y_dim_available) {
15✔
186
        auto cond = symbolic::Ge(y_dim_size, iteration_count);
10✔
187
        if (symbolic::is_true(cond)) {
10✔
188
            y_dim_available = true;
10✔
189
        }
10✔
190
    }
10✔
191
    if (z_dim_available) {
15✔
UNCOV
192
        auto cond = symbolic::Ge(z_dim_size, iteration_count);
×
193
        if (symbolic::is_true(cond)) {
×
194
            z_dim_available = true;
×
195
        }
×
196
    }
×
197

198
    return {x_dim_available, y_dim_available, z_dim_available};
15✔
199
}
15✔
200

201
bool KernelLocalStorage::is_candidate(
202
    structured_control_flow::StructuredLoop& loop,
203
    const std::string& container,
204
    builder::StructuredSDFGBuilder& builder,
205
    analysis::AnalysisManager& analysis_manager
206
) {
20✔
207
    auto& sdfg = builder.subject();
20✔
208

209
    // Criterion: transformation cannot be applied twice on the same container
210
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
20✔
211
    std::string shared_container_name = "__daisy_shared_" + container;
20✔
212
    if (containers.find(shared_container_name) != containers.end()) {
20✔
UNCOV
213
        return false;
×
214
    }
×
215

216
    auto ancestors = ControlFlowNode::parent_chain(loop);
20✔
217

218
    // Criterion: Must not be a GPU map itself
219
    if (auto loop_map = dynamic_cast<structured_control_flow::Map*>(&loop)) {
20✔
220
        if (gpu::is_gpu_schedule(loop_map->schedule_type())) {
3✔
221
            return false;
3✔
222
        }
3✔
223
    }
3✔
224

225
    // Criterion: Must be nested in a GPU schedule
226
    bool is_gpu_scope = false;
17✔
227
    for (auto ancestor : ancestors) {
91✔
228
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
91✔
229
            if (gpu::is_gpu_schedule(ancestor_map->schedule_type())) {
31✔
230
                is_gpu_scope = true;
31✔
231
            } else if (ancestor_map->schedule_type().value() == ScheduleType_Sequential::value()) {
31✔
UNCOV
232
                continue;
×
233
            } else {
×
234
                return false;
×
235
            }
×
236
        }
31✔
237
    }
91✔
238
    if (!is_gpu_scope) {
17✔
239
        return false;
1✔
240
    }
1✔
241

242
    // Criterion: Container is contiguous (Maybe can be relaxed later)
243
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
16✔
244
    auto type = type_analysis.get_outer_type(container);
16✔
245
    auto& peeled_type = types::peel_to_innermost_element(*type);
16✔
246
    if (peeled_type.type_id() == types::TypeID::Pointer) {
16✔
247
        return false;
1✔
248
    }
1✔
249

250
    auto& inner_body = loop.root();
15✔
251
    auto& users = analysis_manager.get<analysis::Users>();
15✔
252
    analysis::UsersView inner_body_users(users, inner_body);
15✔
253

254
    // Criterion: Container is read-only
255
    if (!inner_body_users.writes(container).empty() || !inner_body_users.views(container).empty() ||
15✔
256
        !inner_body_users.moves(container).empty()) {
15✔
257
        return false;
2✔
258
    }
2✔
259
    if (inner_body_users.reads(container).empty()) {
13✔
260
        return false;
1✔
261
    }
1✔
262

263
    // Criterion: Memory accesses do not depend on moving symbols
264
    for (auto& user : inner_body_users.uses(container)) {
13✔
265
        auto& subsets = user->subsets();
13✔
266
        for (auto& subset : subsets) {
13✔
267
            for (auto& expr : subset) {
26✔
268
                for (auto& atom : symbolic::atoms(expr)) {
27✔
269
                    if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
27✔
270
                        auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
27✔
271
                        if (!inner_body_users.moves(symbol->get_name()).empty()) {
27✔
UNCOV
272
                            return false;
×
273
                        }
×
274
                    }
27✔
275
                }
27✔
276
            }
26✔
277
        }
13✔
278
    }
13✔
279

280
    // Limitations: single memory access
281
    if (inner_body_users.reads(container).size() != 1) {
12✔
282
        return false;
1✔
283
    }
1✔
284
    auto read = inner_body_users.reads(container).at(0);
11✔
285
    if (read->subsets().size() != 1) {
11✔
UNCOV
286
        return false;
×
287
    }
×
288
    auto subsets = read->subsets().at(0);
11✔
289

290
    // Criterion: more than one GPU dimension is available
291
    symbolic::SymbolVec indvars;
11✔
292
    for (auto node : ancestors) {
63✔
293
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
63✔
294
            auto schedule_type = ancestor_map->schedule_type();
21✔
295
            if (!gpu::is_gpu_schedule(schedule_type)) {
21✔
UNCOV
296
                continue;
×
297
            }
×
298
            indvars.push_back(ancestor_map->indvar());
21✔
299
        }
21✔
300
    }
63✔
301
    if (indvars.size() <= 1) {
11✔
302
        return false;
1✔
303
    }
1✔
304

305
    indvars.push_back(loop.indvar());
10✔
306

307
    // Criterion: Memory access is polynomial of
308
    // c_0 * a + c_1 * b + c_2 * c + c_3 * k, where a, b, c are x-threads, y-threads, z-threads
309
    // and k is the inner loop index
310
    for (auto subset : subsets) {
20✔
311
        if (symbolic::polynomial(subset, indvars) == SymEngine::null) {
20✔
UNCOV
312
            return false;
×
313
        }
×
314
    }
20✔
315

316
    // Criterion: inner indvar is used in memory access
317
    bool uses_inner_indvar = false;
10✔
318
    for (auto subset : subsets) {
20✔
319
        for (auto atom : symbolic::atoms(subset)) {
21✔
320
            if (symbolic::eq(atom, loop.indvar())) {
21✔
321
                uses_inner_indvar = true;
8✔
322
            }
8✔
323
        }
21✔
324
    }
20✔
325
    if (!uses_inner_indvar) {
10✔
326
        return false;
2✔
327
    }
2✔
328

329
    // Criterion: Containers in subset expressions are not written to in the loop
330
    for (auto subset : subsets) {
15✔
331
        for (auto atom : symbolic::atoms(subset)) {
16✔
332
            if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
16✔
333
                auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
16✔
334
                if (!inner_body_users.writes(symbol->get_name()).empty()) {
16✔
335
                    return false;
1✔
336
                }
1✔
337
            }
16✔
338
        }
16✔
339
    }
15✔
340

341
    return true;
7✔
342
}
8✔
343

344
bool KernelLocalStorage::
345
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
19✔
346
    // Criterion 1: Loop has integer iteration count
347
    auto num_iterations = loop_.num_iterations();
19✔
348
    if (num_iterations.is_null() || !SymEngine::is_a<SymEngine::Integer>(*num_iterations)) {
19✔
349
        num_iterations = loop_.num_iterations_approx();
7✔
350
        if (num_iterations.is_null() || !SymEngine::is_a<SymEngine::Integer>(*num_iterations)) {
7✔
351
            return false;
1✔
352
        }
1✔
353
    }
7✔
354

355
    // Check if valid candidate
356
    if (!is_candidate(loop_, container_, builder, analysis_manager)) {
18✔
357
        return false;
11✔
358
    }
11✔
359

360
    // Criterion: All block dimensions are known and an Integer
361
    auto ancestors = ControlFlowNode::parent_chain(loop_);
7✔
362
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
7✔
363
    if (x_dim_size == SymEngine::null || y_dim_size == SymEngine::null || z_dim_size == SymEngine::null) {
7✔
UNCOV
364
        return false;
×
365
    }
×
366

367
    // Criterion: Has a free dimension to map to and that dimension is big enough
368
    auto& users = analysis_manager.get<analysis::Users>();
7✔
369
    analysis::UsersView inner_body_users(users, loop_.root());
7✔
370
    auto read = inner_body_users.reads(container_).at(0);
7✔
371
    auto subsets = read->subsets().at(0);
7✔
372

373
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
7✔
374
    if (!x_dim_available && !y_dim_available && !z_dim_available) {
7✔
375
        return false;
1✔
376
    }
1✔
377

378
    return true;
6✔
379
};
7✔
380

381
void KernelLocalStorage::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
4✔
382
    auto& sdfg = builder.subject();
4✔
383

384
    auto ancestors = ControlFlowNode::parent_chain(loop_);
4✔
385

386
    auto& users = analysis_manager.get<analysis::Users>();
4✔
387

388
    auto& inner_body = this->loop_.root();
4✔
389
    analysis::UsersView inner_body_users(users, inner_body);
4✔
390

391
    // Detect GPU backend from ancestor map schedule types
392
    bool is_rocm = false;
4✔
393
    for (auto node : ancestors) {
26✔
394
        if (auto ancestor_map = dynamic_cast<structured_control_flow::Map*>(node)) {
26✔
395
            if (ancestor_map->schedule_type().value() == "ROCM") {
8✔
UNCOV
396
                is_rocm = true;
×
397
                break;
×
398
            }
×
399
        }
8✔
400
    }
26✔
401

402
    std::string thread_prefix = is_rocm ? "__daisy_hip_thread_idx_" : "__daisy_cuda_thread_idx_";
4✔
403
    std::string x_name = thread_prefix + "x";
4✔
404
    std::string y_name = thread_prefix + "y";
4✔
405
    std::string z_name = thread_prefix + "z";
4✔
406
    symbolic::Symbol x_symbol = symbolic::symbol(x_name);
4✔
407
    symbolic::Symbol y_symbol = symbolic::symbol(y_name);
4✔
408
    symbolic::Symbol z_symbol = symbolic::symbol(z_name);
4✔
409

410
    auto index_type = types::Scalar(types::PrimitiveType::Int32);
4✔
411
    index_type.storage_type(types::StorageType::NV_Symbol());
4✔
412

413
    std::set<std::string> containers(sdfg.containers().begin(), sdfg.containers().end());
4✔
414
    if (containers.find(x_name) == containers.end()) {
4✔
415
        builder.add_container(x_name, index_type);
3✔
416
    }
3✔
417
    if (containers.find(y_name) == containers.end()) {
4✔
418
        builder.add_container(y_name, index_type);
3✔
419
    }
3✔
420
    if (containers.find(z_name) == containers.end()) {
4✔
421
        builder.add_container(z_name, index_type);
3✔
422
    }
3✔
423

424
    /**
425
        1. Add new shared memory container
426
        2. Add barrier before loop
427
        3. add copyin branch before loop
428
        4. Add barrier before loop
429
        5. replace container in loop
430
        6. replace subset expressions in loop
431
    */
432

433
    auto num_iterations = loop_.num_iterations();
4✔
434
    if (num_iterations.is_null() || !SymEngine::is_a<SymEngine::Integer>(*num_iterations)) {
4✔
435
        num_iterations = loop_.num_iterations_approx();
3✔
436
    }
3✔
437
    symbolic::Integer iteration_count = SymEngine::rcp_static_cast<const SymEngine::Integer>(num_iterations);
4✔
438

439
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(ancestors);
4✔
440
    auto [x_dim_indvar, y_dim_indvar, z_dim_indvar] = dim_indvars(ancestors);
4✔
441

442
    auto parent = loop_.get_parent();
4✔
443
    auto parent_seq = static_cast<structured_control_flow::Sequence*>(parent);
4✔
444
    auto& seq = builder.add_sequence_before(*parent_seq, loop_, {}, loop_.debug_info());
4✔
445

446
    // 1. Add new shared memory container
447
    auto& type_analysis = analysis_manager.get<analysis::TypeAnalysis>();
4✔
448
    auto type = type_analysis.get_outer_type(container_);
4✔
449
    auto& peeled_type = types::peel_to_innermost_element(*type);
4✔
450
    auto read = inner_body_users.reads(this->container_).at(0);
4✔
451
    auto subsets = read->subsets().at(0);
4✔
452

453
    auto [x_dim_available, y_dim_available, z_dim_available] = available_dims(subsets, analysis_manager);
4✔
454

455
    // get free dim
456
    symbolic::Symbol target_dim;
4✔
457
    auto [dim_x, dim_y, dim_z] = available_dims(subsets, analysis_manager);
4✔
458

459
    if (dim_x) {
4✔
460
        target_dim = x_symbol;
1✔
461
    } else if (dim_y) {
3✔
462
        target_dim = y_symbol;
3✔
463
    } else if (dim_z) {
3✔
UNCOV
464
        target_dim = z_symbol;
×
465
    } else {
×
466
        throw InvalidSDFGException("No available GPU tiling dimension found!");
×
467
    }
×
468

469
    // std::unique_ptr<types::IType> element_type;
470

471
    // if (peeled_type.type_id() == types::TypeID::Structure) {
472
    //     auto struct_type = static_cast<const types::Structure&>(peeled_type);
473
    //     types::Structure new_struct_type(
474
    //         types::StorageType::NV_Shared(), 8, {}, struct_type.name()
475
    //     );
476
    //     element_type = new_struct_type.clone();
477
    // } else if (peeled_type.type_id() == types::TypeID::Scalar) {
478
    //     auto scalar_type = static_cast<const types::Scalar&>(peeled_type);
479
    //     types::Scalar new_scalar_type(
480
    //         types::StorageType::NV_Shared(), 8, {}, scalar_type.primitive_type()
481
    //     );
482
    //     element_type = new_scalar_type.clone();
483
    // } else {
484
    //     throw InvalidSDFGException(
485
    //         "Unsupported peeled type for KernelLocalStorage."
486
    //     );
487
    // }
488

489
    auto generic_storage = is_rocm ? types::StorageType("AMD_Generic") : types::StorageType::NV_Generic();
4✔
490

491
    types::Array tile_array_type(types::StorageType::NV_Shared(), 8, {}, peeled_type, iteration_count);
4✔
492
    types::Array z_array_type(generic_storage, 8, {}, tile_array_type, z_dim_size);
4✔
493
    types::Array* pred_y;
4✔
494
    if (symbolic::eq(target_dim, z_symbol)) {
4✔
UNCOV
495
        pred_y = &tile_array_type;
×
496
    } else {
4✔
497
        pred_y = &z_array_type;
4✔
498
    }
4✔
499
    types::Array y_array_type(generic_storage, 8, {}, *pred_y, y_dim_size);
4✔
500
    types::Array* pred_x;
4✔
501
    if (symbolic::eq(target_dim, y_symbol)) {
4✔
502
        pred_x = &z_array_type;
3✔
503
    } else {
3✔
504
        pred_x = &y_array_type;
1✔
505
    }
1✔
506
    types::Array x_array_type(generic_storage, 8, {}, *pred_x, x_dim_size);
4✔
507
    types::Array* final_type;
4✔
508
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
509
        final_type = &y_array_type;
1✔
510
    } else {
3✔
511
        final_type = &x_array_type;
3✔
512
    }
3✔
513

514
    std::string shared_container_name = "__daisy_shared_" + container_;
4✔
515
    builder.add_container(shared_container_name, *final_type);
4✔
516

517
    // 2. Add barrier before loop
518
    auto& sync_block1 = builder.add_block(seq);
4✔
519

520
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block1, {});
4✔
521

522
    // 3. add copyin branch before loop
523
    auto& if_else = builder.add_if_else(seq);
4✔
524

525
    auto condition = symbolic::subs(loop_.condition(), loop_.indvar(), symbolic::add(target_dim, offset_));
4✔
526
    auto& branch = builder.add_case(if_else, condition);
4✔
527

528
    auto& copyin_block = builder.add_block(branch);
4✔
529

530
    auto& access_in = builder.add_access(copyin_block, container_);
4✔
531
    auto& access_out = builder.add_access(copyin_block, shared_container_name);
4✔
532

533
    auto& tasklet = builder.add_tasklet(copyin_block, data_flow::TaskletCode::assign, "out_", {"in_"});
4✔
534

535
    std::vector<symbolic::Expression> copyin_subsets;
4✔
536
    for (auto subset : subsets) {
8✔
537
        auto substituted = symbolic::subs(subset, loop_.indvar(), symbolic::add(target_dim, offset_));
8✔
538
        copyin_subsets.push_back(substituted);
8✔
539
    }
8✔
540

541
    builder.add_computational_memlet(copyin_block, access_in, tasklet, "in_", copyin_subsets, *type);
4✔
542

543
    std::vector<symbolic::Expression> shared_access_subsets = {x_symbol, y_symbol, z_symbol, target_dim};
4✔
544

545
    if (symbolic::eq(target_dim, x_symbol)) {
4✔
546
        shared_access_subsets.erase(shared_access_subsets.begin());
1✔
547
    } else if (symbolic::eq(target_dim, y_symbol)) {
3✔
548
        shared_access_subsets.erase(shared_access_subsets.begin() + 1);
3✔
549
    } else if (symbolic::eq(target_dim, z_symbol)) {
3✔
UNCOV
550
        shared_access_subsets.erase(shared_access_subsets.begin() + 2);
×
551
    }
×
552

553
    builder.add_computational_memlet(copyin_block, tasklet, "out_", access_out, shared_access_subsets);
4✔
554

555
    // 4. Add barrier before loop
556

557
    auto& sync_block2 = builder.add_block(seq);
4✔
558

559
    builder.add_library_node<data_flow::BarrierLocalNode>(sync_block2, {});
4✔
560

561
    // 5. replace container in loop
562
    loop_.replace(symbolic::symbol(container_), symbolic::symbol(shared_container_name));
4✔
563

564
    // 6. replace subset expressions in loop
565
    std::vector<symbolic::Expression> read_shared_access_subsets;
4✔
566
    symbolic::Expression substituted_dimension;
4✔
567
    for (auto& subset : shared_access_subsets) {
12✔
568
        auto substituted = symbolic::subs(subset, target_dim, symbolic::sub(loop_.indvar(), offset_));
12✔
569
        read_shared_access_subsets.push_back(substituted);
12✔
570
    }
12✔
571

572
    auto access_node = static_cast<data_flow::AccessNode*>(read->element());
4✔
573
    for (auto& oedge : access_node->get_parent().out_edges(*access_node)) {
4✔
574
        oedge.set_subset(read_shared_access_subsets);
4✔
575
        oedge.set_base_type(*final_type);
4✔
576
    }
4✔
577

578
    // End of transformation
579

580
    passes::SequenceFusion sf_pass;
4✔
581
    passes::DeadCFGElimination dce_pass;
4✔
582
    passes::TrivialArrayElimination tae_pass;
4✔
583
    bool applies = false;
4✔
584
    do {
8✔
585
        applies = false;
8✔
586
        applies |= dce_pass.run(builder, analysis_manager);
8✔
587
        applies |= sf_pass.run(builder, analysis_manager);
8✔
588
        applies |= tae_pass.run(builder, analysis_manager);
8✔
589
    } while (applies);
8✔
590
};
4✔
591

592
void KernelLocalStorage::to_json(nlohmann::json& j) const {
2✔
593
    j["transformation_type"] = this->name();
2✔
594
    j["parameters"] = nlohmann::json::object();
2✔
595
    j["parameters"]["offset"] = serializer::JSONSerializer::expression(offset_);
2✔
596

597
    serializer::JSONSerializer ser_flat(false);
2✔
598
    j["subgraph"] = nlohmann::json::object();
2✔
599
    j["subgraph"]["0"] = nlohmann::json::object();
2✔
600
    ser_flat.serialize_node(j["subgraph"]["0"], loop_);
2✔
601

602
    j["subgraph"]["1"] = nlohmann::json::object();
2✔
603
    j["subgraph"]["1"]["element_id"] = access_node_.element_id();
2✔
604
    j["subgraph"]["1"]["type"] = "access_node";
2✔
605
};
2✔
606

607
KernelLocalStorage KernelLocalStorage::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
2✔
608
    size_t loop_id;
2✔
609
    const auto& node_desc = desc.at("subgraph").at("0");
2✔
610
    loop_id = node_desc.at("element_id").get<size_t>();
2✔
611

612
    auto element = builder.find_element_by_id(loop_id);
2✔
613
    if (!element) {
2✔
UNCOV
614
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
UNCOV
615
    }
×
616
    auto outer_loop = dynamic_cast<structured_control_flow::StructuredLoop*>(element);
2✔
617
    if (!outer_loop) {
2✔
UNCOV
618
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");
×
UNCOV
619
    }
×
620

621
    auto access_node = dynamic_cast<
2✔
622
        data_flow::AccessNode*>(builder.find_element_by_id(desc.at("subgraph").at("1").at("element_id").get<size_t>()));
2✔
623
    if (!access_node) {
2✔
UNCOV
624
        throw InvalidTransformationDescriptionException(
×
UNCOV
625
            "Access node with ID " + std::to_string(desc.at("subgraph").at("1").at("element_id").get<size_t>()) +
×
626
            " not found."
×
627
        );
×
UNCOV
628
    }
×
629

630
    const auto& params = desc.at("parameters");
2✔
631
    nlohmann::json offset_json = params.at("offset");
2✔
632
    auto offset = symbolic::parse(offset_json);
2✔
633

634
    return KernelLocalStorage(*outer_loop, offset, *access_node);
2✔
635
};
2✔
636

637
} // namespace transformations
638
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc