• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / sdfglib / 15075964231

16 May 2025 07:28PM UTC coverage: 63.623% (+0.1%) from 63.496%
15075964231

push

github

web-flow
Merge pull request #16 from daisytuner/segfaults

Enable Wall, Werror and Wpedantic

98 of 120 new or added lines in 39 files covered. (81.67%)

4 existing lines in 4 files now uncovered.

8633 of 13569 relevant lines covered (63.62%)

483.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.93
/src/transformations/kernel_local_storage.cpp
1
#include "sdfg/transformations/kernel_local_storage.h"
2

3
#include <tuple>
4
#include <utility>
5

6
#include "sdfg/builder/structured_sdfg_builder.h"
7
#include "sdfg/data_flow/library_node.h"
8
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
9
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
10
#include "sdfg/structured_control_flow/if_else.h"
11
#include "sdfg/structured_control_flow/kernel.h"
12
#include "sdfg/structured_control_flow/sequence.h"
13
#include "sdfg/symbolic/symbolic.h"
14
#include "sdfg/transformations/utils.h"
15
#include "sdfg/types/array.h"
16
#include "sdfg/types/pointer.h"
17
#include "sdfg/types/scalar.h"
18
#include "sdfg/types/type.h"
19
#include "symengine/integer.h"
20
#include "symengine/symbol.h"
21
#include "symengine/symengine_rcp.h"
22

23
namespace sdfg {
24
namespace transformations {
25

26
KernelLocalStorage::KernelLocalStorage(structured_control_flow::Sequence& parent,
1✔
27
                                       structured_control_flow::For& outer_loop,
28
                                       structured_control_flow::For& inner_loop,
29
                                       std::string container)
30
    : parent_(parent), outer_loop_(outer_loop), inner_loop_(inner_loop), container_(container) {};
1✔
31

32
std::string KernelLocalStorage::name() { return "KernelLocalStorage"; };
×
33

34
bool KernelLocalStorage::reads_container(std::string container, const Sequence& sequence,
×
35
                                         analysis::UsersView& body_users) {
NEW
36
    if (body_users.reads(container).size() == 1) {
×
NEW
37
        return true;
×
38
    }
NEW
39
    return false;
×
40
}
×
41

42
bool KernelLocalStorage::uses_inner_indvar(const structured_control_flow::Kernel* kernel,
×
43
                                           const structured_control_flow::Sequence& body,
44
                                           analysis::UsersView& body_users) {
45
    bool result = false;
×
46
    for (auto& user : body_users.reads(this->container_)) {
×
47
        auto& subsets = user->subsets();
×
48
        if (subsets.size() == 1) {            // TODO: Handle multiple subsets
×
49
            if (subsets.at(0).size() == 1) {  // TODO: Handle multiple dimensions
×
50
                result |= symbolic::uses(subsets.at(0).at(0), inner_loop_.indvar());
×
51
            }
×
52
        }
×
53
    }
×
54
    return result;
×
55
};
×
56

57
std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer> KernelLocalStorage::dim_size(
1✔
58
    const structured_control_flow::Kernel* kernel, symbolic::Assumptions& assumptions) {
59
    symbolic::Integer x_dim_size = SymEngine::null;
1✔
60
    symbolic::Integer y_dim_size = SymEngine::null;
1✔
61
    symbolic::Integer z_dim_size = SymEngine::null;
1✔
62

63
    auto x_ub = assumptions[kernel->blockDim_x()].upper_bound();
1✔
64
    x_dim_size = SymEngine::rcp_static_cast<const SymEngine::Integer>(x_ub);
1✔
65

66
    auto y_ub = assumptions[kernel->blockDim_y()].upper_bound();
1✔
67
    y_dim_size = SymEngine::rcp_static_cast<const SymEngine::Integer>(y_ub);
1✔
68

69
    auto z_ub = assumptions[kernel->blockDim_z()].upper_bound();
1✔
70
    z_dim_size = SymEngine::rcp_static_cast<const SymEngine::Integer>(z_ub);
1✔
71

72
    return std::make_tuple(x_dim_size, y_dim_size, z_dim_size);
1✔
73
};
1✔
74

75
bool KernelLocalStorage::can_be_applied(Schedule& schedule) {
1✔
76
    auto& analysis_manager = schedule.analysis_manager();
1✔
77
    auto& builder = schedule.builder();
1✔
78

79
    auto& sdfg = builder.subject();
1✔
80
    auto& root = sdfg.root();
1✔
81
    auto& inner_body = this->inner_loop_.root();
1✔
82

83
    // Criterion: Check if parent is a kernel
84
    if (root.size() != 1) {
1✔
85
        return false;
×
86
    }
87
    auto kernel = dynamic_cast<const sdfg::structured_control_flow::Kernel*>(&root.at(0).first);
1✔
88
    if (!kernel) {
1✔
89
        return false;
×
90
    }
91

92
    // Criterion: Container is pointer to scalar type
93
    auto& type = sdfg.type(this->container_);
1✔
94
    auto pointer_type = dynamic_cast<const types::Pointer*>(&type);
1✔
95
    if (!pointer_type) {
1✔
96
        return false;
×
97
    }
98
    if (!dynamic_cast<const types::Scalar*>(&pointer_type->pointee_type())) {
1✔
99
        return false;
×
100
    }
101

102
    // Criterion: Iteration count is known and an Integer
103
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
1✔
104
    auto assumptions = assumptions_analysis.get(inner_body);
1✔
105
    symbolic::Integer iteration_count = get_iteration_count(inner_loop_);
1✔
106
    if (iteration_count == SymEngine::null) {
1✔
107
        return false;
×
108
    }
109

110
    // Criterion: All block dimensions are known and an Integer
111
    auto x_ub = assumptions[kernel->blockDim_x()].upper_bound();
1✔
112
    auto x_lb = assumptions[kernel->blockDim_x()].lower_bound();
1✔
113
    if (!symbolic::eq(x_ub, x_lb)) {
1✔
114
        return false;
×
115
    }
116
    if (!SymEngine::is_a<SymEngine::Integer>(*x_ub)) {
1✔
117
        return false;
×
118
    }
119

120
    auto y_ub = assumptions[kernel->blockDim_y()].upper_bound();
1✔
121
    auto y_lb = assumptions[kernel->blockDim_y()].lower_bound();
1✔
122
    if (!symbolic::eq(y_ub, y_lb)) {
1✔
123
        return false;
×
124
    }
125
    if (!SymEngine::is_a<SymEngine::Integer>(*y_ub)) {
1✔
126
        return false;
×
127
    }
128

129
    auto z_ub = assumptions[kernel->blockDim_z()].upper_bound();
1✔
130
    auto z_lb = assumptions[kernel->blockDim_z()].lower_bound();
1✔
131
    if (!symbolic::eq(z_ub, z_lb)) {
1✔
132
        return false;
×
133
    }
134
    if (!SymEngine::is_a<SymEngine::Integer>(*z_ub)) {
1✔
135
        return false;
×
136
    }
137

138
    // Criteria related to memory accesses
139
    auto& users = analysis_manager.get<analysis::Users>();
1✔
140
    analysis::UsersView inner_body_users(users, inner_body);
1✔
141

142
    // Criterion: Container is read-only
143
    if (!inner_body_users.writes(this->container_).empty() ||
2✔
144
        !inner_body_users.views(this->container_).empty() ||
2✔
145
        !inner_body_users.moves(this->container_).empty()) {
1✔
146
        return false;
×
147
    }
148
    if (inner_body_users.reads(this->container_).empty()) {
1✔
149
        return false;
×
150
    }
151

152
    // Collect moving symbols
153

154
    // Criterion: Memory accesses do not depend on moving symbols
155
    for (auto& user : inner_body_users.uses(this->container_)) {
2✔
156
        auto& subsets = user->subsets();
1✔
157
        for (auto& subset : subsets) {
2✔
158
            for (auto& expr : subset) {
2✔
159
                for (auto& atom : symbolic::atoms(expr)) {
5✔
160
                    if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
4✔
161
                        auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
4✔
162
                        if (!inner_body_users.moves(symbol->get_name()).empty()) {
4✔
163
                            return false;
×
164
                        }
165
                    }
4✔
166
                }
167
            }
168
        }
169
    }
1✔
170

171
    // Criterion: Check if all memory accesses are affine w.r.t the inner loop index
172

173
    // Limitations: single memory access
174
    if (inner_body_users.reads(this->container_).size() != 1) {
1✔
175
        return false;
×
176
    }
177
    auto read = inner_body_users.reads(this->container_).at(0);
1✔
178
    if (read->subsets().size() != 1) {
1✔
179
        return false;
×
180
    }
181
    auto subset = read->subsets().at(0);
1✔
182
    if (subset.size() != 1) {
1✔
183
        return false;
×
184
    }
185

186
    // Criterion: Memory access is polynomial of
187
    // c_0 * a + c_1 * b + c_2 * c + c_3 * k, where a, b, c are x-threads, y-threads, z-threads
188
    // and k is the inner loop index
189
    auto a = symbolic::add(kernel->threadIdx_x(),
2✔
190
                           symbolic::mul(kernel->blockIdx_x(), kernel->blockDim_x()));
1✔
191
    auto b = symbolic::add(kernel->threadIdx_y(),
2✔
192
                           symbolic::mul(kernel->blockIdx_y(), kernel->blockDim_y()));
1✔
193
    auto c = symbolic::add(kernel->threadIdx_z(),
2✔
194
                           symbolic::mul(kernel->blockIdx_z(), kernel->blockDim_z()));
1✔
195

196
    auto access = subset.at(0);
1✔
197
    access = symbolic::subs(access, a, symbolic::symbol("a"));
1✔
198
    access = symbolic::subs(access, b, symbolic::symbol("b"));
1✔
199
    access = symbolic::subs(access, c, symbolic::symbol("c"));
1✔
200

201
    // TODO: Real structuring of polynomial
202
    /* auto poly = symbolic::polynomial(access);
203
    if (poly == SymEngine::null) {
204
        return false;
205
    } */
206

207
    return true;
1✔
208
};
1✔
209

210
void KernelLocalStorage::apply(Schedule& schedule) {
1✔
211
    auto& analysis_manager = schedule.analysis_manager();
1✔
212
    auto& builder = schedule.builder();
1✔
213
    auto& sdfg = builder.subject();
1✔
214
    auto& users = analysis_manager.get<analysis::Users>();
1✔
215

216
    auto& inner_body = this->inner_loop_.root();
1✔
217
    analysis::UsersView inner_body_users(users, inner_body);
1✔
218

219
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
1✔
220
    auto assumptions = assumptions_analysis.get(inner_body);
1✔
221

222
    const sdfg::structured_control_flow::Kernel* kernel =
1✔
223
        dynamic_cast<const sdfg::structured_control_flow::Kernel*>(
1✔
224
            &schedule.sdfg().root().at(0).first);
1✔
225

226
    symbolic::Integer iteration_count = get_iteration_count(inner_loop_);
1✔
227

228
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(kernel, assumptions);
1✔
229

230
    // calculate shared memory shape
231
    std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer, symbolic::Integer>
232
        shared_memory_shape = std::make_tuple(iteration_count, x_dim_size, y_dim_size, z_dim_size);
1✔
233

234
    // Get primitive type of container
235
    const types::Pointer* pointer =
1✔
236
        static_cast<const types::Pointer*>(&sdfg.type(this->container_));
1✔
237
    const types::Scalar* base_type =
1✔
238
        static_cast<const types::Scalar*>(&pointer->pointee_type());  // must be scalar or struct
1✔
239

240
    const types::Scalar type(base_type->primitive_type(), types::DeviceLocation::nvptx, 3);
1✔
241

242
    // Allocate shared memory before the outer loop, starting from z, y, x, iteration_count
243
    types::Array shared_memory(type, std::get<0>(shared_memory_shape), types::DeviceLocation::nvptx,
1✔
244
                               3);
245
    types::Array shared_memory_x(shared_memory, std::get<1>(shared_memory_shape),
1✔
246
                                 types::DeviceLocation::nvptx, 3);
247
    types::Array shared_memory_y(shared_memory_x, std::get<2>(shared_memory_shape),
1✔
248
                                 types::DeviceLocation::nvptx, 3);
249
    types::Array shared_memory_z(shared_memory_y, std::get<3>(shared_memory_shape),
1✔
250
                                 types::DeviceLocation::nvptx, 3);
251

252
    builder.add_container("__daisy_share_" + this->container_, shared_memory_z);
1✔
253

254
    bool has_tid_x = false;
1✔
255
    bool has_tid_y = false;
1✔
256
    bool has_tid_z = false;
1✔
257
    for (auto container : sdfg.containers()) {
8✔
258
        if (container == kernel->threadIdx_x()->get_name()) {
7✔
259
            has_tid_x = true;
×
260
        }
×
261
        if (container == kernel->threadIdx_y()->get_name()) {
7✔
262
            has_tid_y = true;
×
263
        }
×
264
        if (container == kernel->threadIdx_z()->get_name()) {
7✔
265
            has_tid_z = true;
×
266
        }
×
267
    }
7✔
268
    if (!has_tid_x) {
1✔
269
        builder.add_container(kernel->threadIdx_x()->get_name(),
2✔
270
                              types::Scalar(types::PrimitiveType::Int32));
1✔
271
    }
1✔
272
    if (!has_tid_y) {
1✔
273
        builder.add_container(kernel->threadIdx_y()->get_name(),
2✔
274
                              types::Scalar(types::PrimitiveType::Int32));
1✔
275
    }
1✔
276
    if (!has_tid_z) {
1✔
277
        builder.add_container(kernel->threadIdx_z()->get_name(),
2✔
278
                              types::Scalar(types::PrimitiveType::Int32));
1✔
279
    }
1✔
280

281
    // Deconstrunct array accesses into dimensions
282
    // Read from global memory to shared memory. Ensure the data access bounds are correct
283
    auto& outer_body = this->outer_loop_.root();
1✔
284

285
    builder.add_container("__daisy_shared_indvar_" + this->container_,
2✔
286
                          types::Scalar(types::Scalar(types::PrimitiveType::Int32)));
1✔
287

288
    symbolic::Symbol indvar = symbolic::symbol("__daisy_shared_indvar_" + this->container_);
1✔
289
    symbolic::Expression init_expr =
290
        symbolic::subs(inner_loop_.init(), inner_loop_.indvar(), indvar);
1✔
291
    symbolic::Condition condition_expr =
292
        symbolic::subs(inner_loop_.condition(), inner_loop_.indvar(), indvar);
1✔
293
    symbolic::Expression update_expr =
294
        symbolic::subs(inner_loop_.update(), inner_loop_.indvar(), indvar);
1✔
295
    auto& copyin_for = builder
2✔
296
                           .add_for_before(outer_body, this->inner_loop_, indvar, condition_expr,
1✔
297
                                           init_expr, update_expr)
298
                           .first;
1✔
299

300
    auto& copyin_block = builder.add_block(copyin_for.root());
1✔
301

302
    auto& access_node_in = builder.add_access(copyin_block, this->container_);
1✔
303
    auto& access_node_out = builder.add_access(copyin_block, "__daisy_share_" + this->container_);
1✔
304
    auto& tasklet_copy_in = builder.add_tasklet(copyin_block, data_flow::TaskletCode::assign,
2✔
305
                                                {"_out", *base_type}, {{"_in", *base_type}});
1✔
306

307
    symbolic::Expression read_expr =
308
        inner_body_users.reads(this->container_).at(0)->subsets().at(0).at(0);
1✔
309
    read_expr = symbolic::subs(read_expr, inner_loop_.indvar(), indvar);
1✔
310
    builder.add_memlet(copyin_block, access_node_in, "void", tasklet_copy_in, "_in", {read_expr});
1✔
311

312
    // Set the access indices
313

314
    std::tuple<symbolic::Expression, symbolic::Expression, symbolic::Expression,
315
               symbolic::Expression>
316
        shared_access_scheme_write =
317
            std::make_tuple(kernel->threadIdx_z(), kernel->threadIdx_y(), kernel->threadIdx_x(),
2✔
318
                            symbolic::sub(indvar, outer_loop_.indvar()));
1✔
319
    builder.add_memlet(
2✔
320
        copyin_block, tasklet_copy_in, "_out", access_node_out, "void",
1✔
321
        {std::get<0>(shared_access_scheme_write), std::get<1>(shared_access_scheme_write),
1✔
322
         std::get<2>(shared_access_scheme_write), std::get<3>(shared_access_scheme_write)});
1✔
323

324
    // Replace global memory accesses with shared memory accesses
325
    builder.add_container("__daisy_share_wrapper_" + this->container_, *base_type);
1✔
326
    inner_body.replace(symbolic::symbol(this->container_),
2✔
327
                       symbolic::symbol("__daisy_share_wrapper_" + this->container_));
1✔
328

329
    auto& read_block =
1✔
330
        builder.add_block_before(inner_loop_.root(), inner_loop_.root().at(0).first).first;
1✔
331
    auto& read_node_in = builder.add_access(read_block, "__daisy_share_" + this->container_);
1✔
332
    auto& read_node_out =
1✔
333
        builder.add_access(read_block, "__daisy_share_wrapper_" + this->container_);
1✔
334

335
    auto& tasklet_read = builder.add_tasklet(read_block, data_flow::TaskletCode::assign,
2✔
336
                                             {"_out", *base_type}, {{"_in", *base_type}});
1✔
337

338
    std::tuple<symbolic::Expression, symbolic::Expression, symbolic::Expression,
339
               symbolic::Expression>
340
        shared_access_scheme_read =
341
            std::make_tuple(kernel->threadIdx_z(), kernel->threadIdx_y(), kernel->threadIdx_x(),
2✔
342
                            symbolic::sub(inner_loop_.indvar(), outer_loop_.indvar()));
1✔
343

344
    builder.add_memlet(
2✔
345
        read_block, read_node_in, "void", tasklet_read, "_in",
1✔
346
        {std::get<0>(shared_access_scheme_read), std::get<1>(shared_access_scheme_read),
1✔
347
         std::get<2>(shared_access_scheme_read), std::get<3>(shared_access_scheme_read)});
1✔
348

349
    builder.add_memlet(read_block, tasklet_read, "_out", read_node_out, "void", {});
1✔
350

351
    auto& sync_block = builder.add_block_before(outer_body, this->inner_loop_).first;
1✔
352
    builder.add_library_node(sync_block, data_flow::LibraryNodeType::LocalBarrier, {}, {}, true);
1✔
353

354
    // End of transformation
355

356
    analysis_manager.invalidate_all();
1✔
357

358
    passes::SequenceFusion sf_pass;
1✔
359
    passes::DeadCFGElimination dce_pass;
1✔
360
    bool applies = false;
1✔
361
    do {
1✔
362
        applies = false;
1✔
363
        applies |= dce_pass.run(schedule.builder(), analysis_manager);
1✔
364
        applies |= sf_pass.run(schedule.builder(), analysis_manager);
1✔
365
    } while (applies);
1✔
366
};
1✔
367

368
}  // namespace transformations
369
}  // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc