• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / sdfglib / 15494289007

06 Jun 2025 03:36PM UTC coverage: 57.304% (-0.4%) from 57.704%
15494289007

push

github

web-flow
Merge pull request #60 from daisytuner/kernels

removes kernel node in favor of function types

78 of 99 new or added lines in 11 files covered. (78.79%)

91 existing lines in 14 files now uncovered.

7583 of 13233 relevant lines covered (57.3%)

116.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.58
/src/transformations/kernel_local_storage.cpp
1
#include "sdfg/transformations/kernel_local_storage.h"
2

3
#include <tuple>
4
#include <utility>
5

6
#include "sdfg/builder/structured_sdfg_builder.h"
7
#include "sdfg/data_flow/library_node.h"
8
#include "sdfg/passes/structured_control_flow/dead_cfg_elimination.h"
9
#include "sdfg/passes/structured_control_flow/sequence_fusion.h"
10
#include "sdfg/structured_control_flow/if_else.h"
11
#include "sdfg/structured_control_flow/sequence.h"
12
#include "sdfg/symbolic/symbolic.h"
13
#include "sdfg/transformations/utils.h"
14
#include "sdfg/types/array.h"
15
#include "sdfg/types/pointer.h"
16
#include "sdfg/types/scalar.h"
17
#include "sdfg/types/type.h"
18
#include "symengine/integer.h"
19
#include "symengine/symbol.h"
20
#include "symengine/symengine_rcp.h"
21

22
namespace sdfg {
23
namespace transformations {
24

25
KernelLocalStorage::KernelLocalStorage(structured_control_flow::Sequence& parent,
1✔
26
                                       structured_control_flow::For& outer_loop,
27
                                       structured_control_flow::For& inner_loop,
28
                                       std::string container)
29
    : parent_(parent), outer_loop_(outer_loop), inner_loop_(inner_loop), container_(container) {};
1✔
30

31
std::string KernelLocalStorage::name() { return "KernelLocalStorage"; };
×
32

NEW
33
bool KernelLocalStorage::reads_container(std::string container, analysis::UsersView& body_users) {
×
UNCOV
34
    if (body_users.reads(container).size() == 1) {
×
35
        return true;
×
36
    }
37
    return false;
×
38
}
×
39

NEW
40
bool KernelLocalStorage::uses_inner_indvar(analysis::UsersView& body_users) {
×
UNCOV
41
    bool result = false;
×
42
    for (auto& user : body_users.reads(this->container_)) {
×
43
        auto& subsets = user->subsets();
×
44
        if (subsets.size() == 1) {            // TODO: Handle multiple subsets
×
45
            if (subsets.at(0).size() == 1) {  // TODO: Handle multiple dimensions
×
46
                result |= symbolic::uses(subsets.at(0).at(0), inner_loop_.indvar());
×
47
            }
×
48
        }
×
49
    }
×
50
    return result;
×
51
};
×
52

53
std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer> KernelLocalStorage::dim_size(
1✔
54
    symbolic::Assumptions& assumptions) {
55
    symbolic::Integer x_dim_size = SymEngine::null;
1✔
56
    symbolic::Integer y_dim_size = SymEngine::null;
1✔
57
    symbolic::Integer z_dim_size = SymEngine::null;
1✔
58

59
    auto x_ub = assumptions[symbolic::blockDim_x()].upper_bound();
1✔
60
    x_dim_size = SymEngine::rcp_static_cast<const SymEngine::Integer>(x_ub);
1✔
61

62
    auto y_ub = assumptions[symbolic::blockDim_y()].upper_bound();
1✔
63
    y_dim_size = SymEngine::rcp_static_cast<const SymEngine::Integer>(y_ub);
1✔
64

65
    auto z_ub = assumptions[symbolic::blockDim_z()].upper_bound();
1✔
66
    z_dim_size = SymEngine::rcp_static_cast<const SymEngine::Integer>(z_ub);
1✔
67

68
    return std::make_tuple(x_dim_size, y_dim_size, z_dim_size);
1✔
69
};
1✔
70

71
bool KernelLocalStorage::can_be_applied(Schedule& schedule) {
1✔
72
    auto& analysis_manager = schedule.analysis_manager();
1✔
73
    auto& builder = schedule.builder();
1✔
74

75
    auto& sdfg = builder.subject();
1✔
76
    if (sdfg.type() != FunctionType::NV_GLOBAL) {
1✔
UNCOV
77
        return false;
×
78
    }
79

80
    auto& inner_body = this->inner_loop_.root();
1✔
81

82
    // Criterion: Container is pointer to scalar type
83
    auto& type = sdfg.type(this->container_);
1✔
84
    auto pointer_type = dynamic_cast<const types::Pointer*>(&type);
1✔
85
    if (!pointer_type) {
1✔
86
        return false;
×
87
    }
88
    if (!dynamic_cast<const types::Scalar*>(&pointer_type->pointee_type())) {
1✔
89
        return false;
×
90
    }
91

92
    // Criterion: Iteration count is known and an Integer
93
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
1✔
94
    auto assumptions = assumptions_analysis.get(inner_body);
1✔
95
    symbolic::Integer iteration_count = get_iteration_count(inner_loop_);
1✔
96
    if (iteration_count == SymEngine::null) {
1✔
97
        return false;
×
98
    }
99

100
    // Criterion: All block dimensions are known and an Integer
101
    auto x_ub = assumptions[symbolic::blockDim_x()].upper_bound();
1✔
102
    auto x_lb = assumptions[symbolic::blockDim_x()].lower_bound();
1✔
103
    if (!symbolic::eq(x_ub, x_lb)) {
1✔
NEW
104
        std::cout << "x_ub: " << x_ub->__str__() << " x_lb: " << x_lb->__str__() << std::endl;
×
UNCOV
105
        return false;
×
106
    }
107
    if (!SymEngine::is_a<SymEngine::Integer>(*x_ub)) {
1✔
108
        return false;
×
109
    }
110

111
    auto y_ub = assumptions[symbolic::blockDim_y()].upper_bound();
1✔
112
    auto y_lb = assumptions[symbolic::blockDim_y()].lower_bound();
1✔
113
    if (!symbolic::eq(y_ub, y_lb)) {
1✔
114
        return false;
×
115
    }
116
    if (!SymEngine::is_a<SymEngine::Integer>(*y_ub)) {
1✔
117
        return false;
×
118
    }
119

120
    auto z_ub = assumptions[symbolic::blockDim_z()].upper_bound();
1✔
121
    auto z_lb = assumptions[symbolic::blockDim_z()].lower_bound();
1✔
122
    if (!symbolic::eq(z_ub, z_lb)) {
1✔
123
        return false;
×
124
    }
125
    if (!SymEngine::is_a<SymEngine::Integer>(*z_ub)) {
1✔
126
        return false;
×
127
    }
128

129
    // Criteria related to memory accesses
130
    auto& users = analysis_manager.get<analysis::Users>();
1✔
131
    analysis::UsersView inner_body_users(users, inner_body);
1✔
132

133
    // Criterion: Container is read-only
134
    if (!inner_body_users.writes(this->container_).empty() ||
2✔
135
        !inner_body_users.views(this->container_).empty() ||
2✔
136
        !inner_body_users.moves(this->container_).empty()) {
1✔
137
        return false;
×
138
    }
139
    if (inner_body_users.reads(this->container_).empty()) {
1✔
140
        return false;
×
141
    }
142

143
    // Collect moving symbols
144

145
    // Criterion: Memory accesses do not depend on moving symbols
146
    for (auto& user : inner_body_users.uses(this->container_)) {
2✔
147
        auto& subsets = user->subsets();
1✔
148
        for (auto& subset : subsets) {
2✔
149
            for (auto& expr : subset) {
2✔
150
                for (auto& atom : symbolic::atoms(expr)) {
5✔
151
                    if (SymEngine::is_a<SymEngine::Symbol>(*atom)) {
4✔
152
                        auto symbol = SymEngine::rcp_static_cast<const SymEngine::Symbol>(atom);
4✔
153
                        if (!inner_body_users.moves(symbol->get_name()).empty()) {
4✔
154
                            return false;
×
155
                        }
156
                    }
4✔
157
                }
158
            }
159
        }
160
    }
1✔
161

162
    // Criterion: Check if all memory accesses are affine w.r.t the inner loop index
163

164
    // Limitations: single memory access
165
    if (inner_body_users.reads(this->container_).size() != 1) {
1✔
166
        return false;
×
167
    }
168
    auto read = inner_body_users.reads(this->container_).at(0);
1✔
169
    if (read->subsets().size() != 1) {
1✔
170
        return false;
×
171
    }
172
    auto subset = read->subsets().at(0);
1✔
173
    if (subset.size() != 1) {
1✔
174
        return false;
×
175
    }
176

177
    // Criterion: Memory access is polynomial of
178
    // c_0 * a + c_1 * b + c_2 * c + c_3 * k, where a, b, c are x-threads, y-threads, z-threads
179
    // and k is the inner loop index
180
    auto a = symbolic::add(symbolic::threadIdx_x(),
2✔
181
                           symbolic::mul(symbolic::symbol("blockIdx.x"), symbolic::blockDim_x()));
1✔
182
    auto b = symbolic::add(symbolic::threadIdx_y(),
2✔
183
                           symbolic::mul(symbolic::symbol("blockIdx.y"), symbolic::blockDim_y()));
1✔
184
    auto c = symbolic::add(symbolic::threadIdx_z(),
2✔
185
                           symbolic::mul(symbolic::symbol("blockIdx.z"), symbolic::blockDim_z()));
1✔
186

187
    auto access = subset.at(0);
1✔
188
    access = symbolic::subs(access, a, symbolic::symbol("a"));
1✔
189
    access = symbolic::subs(access, b, symbolic::symbol("b"));
1✔
190
    access = symbolic::subs(access, c, symbolic::symbol("c"));
1✔
191

192
    // TODO: Real structuring of polynomial
193
    /* auto poly = symbolic::polynomial(access);
194
    if (poly == SymEngine::null) {
195
        return false;
196
    } */
197

198
    return true;
1✔
199
};
1✔
200

201
void KernelLocalStorage::apply(Schedule& schedule) {
1✔
202
    auto& analysis_manager = schedule.analysis_manager();
1✔
203
    auto& builder = schedule.builder();
1✔
204
    auto& sdfg = builder.subject();
1✔
205
    auto& users = analysis_manager.get<analysis::Users>();
1✔
206

207
    auto& inner_body = this->inner_loop_.root();
1✔
208
    analysis::UsersView inner_body_users(users, inner_body);
1✔
209

210
    auto& assumptions_analysis = analysis_manager.get<analysis::AssumptionsAnalysis>();
1✔
211
    auto assumptions = assumptions_analysis.get(inner_body);
1✔
212

213
    symbolic::Integer iteration_count = get_iteration_count(inner_loop_);
1✔
214

215
    auto [x_dim_size, y_dim_size, z_dim_size] = dim_size(assumptions);
1✔
216

217
    // calculate shared memory shape
218
    std::tuple<symbolic::Integer, symbolic::Integer, symbolic::Integer, symbolic::Integer>
219
        shared_memory_shape = std::make_tuple(iteration_count, x_dim_size, y_dim_size, z_dim_size);
1✔
220

221
    // Get primitive type of container
222
    const types::Pointer* pointer =
1✔
223
        static_cast<const types::Pointer*>(&sdfg.type(this->container_));
1✔
224
    const types::Scalar* base_type =
1✔
225
        static_cast<const types::Scalar*>(&pointer->pointee_type());  // must be scalar or struct
1✔
226

227
    const types::Scalar type(types::StorageType::NV_Shared, base_type->alignment(), "",
2✔
228
                             base_type->primitive_type());
1✔
229

230
    // Allocate shared memory before the outer loop, starting from z, y, x, iteration_count
231
    types::Array shared_memory(types::StorageType::NV_Shared, type.alignment(), "", type,
2✔
232
                               std::get<0>(shared_memory_shape));
1✔
233
    types::Array shared_memory_x(types::StorageType::NV_Shared, type.alignment(), "", shared_memory,
2✔
234
                                 std::get<1>(shared_memory_shape));
1✔
235
    types::Array shared_memory_y(types::StorageType::NV_Shared, type.alignment(), "",
2✔
236
                                 shared_memory_x, std::get<2>(shared_memory_shape));
1✔
237
    types::Array shared_memory_z(types::StorageType::NV_Shared, type.alignment(), "",
2✔
238
                                 shared_memory_y, std::get<3>(shared_memory_shape));
1✔
239

240
    builder.add_container("__daisy_share_" + this->container_, shared_memory_z);
1✔
241

242
    // Deconstrunct array accesses into dimensions
243
    // Read from global memory to shared memory. Ensure the data access bounds are correct
244
    auto& outer_body = this->outer_loop_.root();
1✔
245

246
    builder.add_container(
2✔
247
        "__daisy_shared_indvar_" + this->container_,
1✔
248
        types::Scalar(types::StorageType::NV_Generic, 0, "", types::PrimitiveType::Int32));
1✔
249

250
    symbolic::Symbol indvar = symbolic::symbol("__daisy_shared_indvar_" + this->container_);
1✔
251
    symbolic::Expression init_expr =
252
        symbolic::subs(inner_loop_.init(), inner_loop_.indvar(), indvar);
1✔
253
    symbolic::Condition condition_expr =
254
        symbolic::subs(inner_loop_.condition(), inner_loop_.indvar(), indvar);
1✔
255
    symbolic::Expression update_expr =
256
        symbolic::subs(inner_loop_.update(), inner_loop_.indvar(), indvar);
1✔
257
    auto& copyin_for = builder
2✔
258
                           .add_for_before(outer_body, this->inner_loop_, indvar, condition_expr,
1✔
259
                                           init_expr, update_expr)
260
                           .first;
1✔
261

262
    auto& copyin_block = builder.add_block(copyin_for.root());
1✔
263

264
    auto& access_node_in = builder.add_access(copyin_block, this->container_);
1✔
265
    auto& access_node_out = builder.add_access(copyin_block, "__daisy_share_" + this->container_);
1✔
266
    auto& tasklet_copy_in = builder.add_tasklet(copyin_block, data_flow::TaskletCode::assign,
2✔
267
                                                {"_out", *base_type}, {{"_in", *base_type}});
1✔
268

269
    symbolic::Expression read_expr =
270
        inner_body_users.reads(this->container_).at(0)->subsets().at(0).at(0);
1✔
271
    read_expr = symbolic::subs(read_expr, inner_loop_.indvar(), indvar);
1✔
272
    builder.add_memlet(copyin_block, access_node_in, "void", tasklet_copy_in, "_in", {read_expr});
1✔
273

274
    // Set the access indices
275

276
    std::tuple<symbolic::Expression, symbolic::Expression, symbolic::Expression,
277
               symbolic::Expression>
278
        shared_access_scheme_write =
279
            std::make_tuple(symbolic::threadIdx_z(), symbolic::threadIdx_y(),
2✔
280
                            symbolic::threadIdx_x(), symbolic::sub(indvar, outer_loop_.indvar()));
1✔
281
    builder.add_memlet(
2✔
282
        copyin_block, tasklet_copy_in, "_out", access_node_out, "void",
1✔
283
        {std::get<0>(shared_access_scheme_write), std::get<1>(shared_access_scheme_write),
1✔
284
         std::get<2>(shared_access_scheme_write), std::get<3>(shared_access_scheme_write)});
1✔
285

286
    // Replace global memory accesses with shared memory accesses
287
    builder.add_container("__daisy_share_wrapper_" + this->container_, *base_type);
1✔
288
    inner_body.replace(symbolic::symbol(this->container_),
2✔
289
                       symbolic::symbol("__daisy_share_wrapper_" + this->container_));
1✔
290

291
    auto& read_block =
1✔
292
        builder.add_block_before(inner_loop_.root(), inner_loop_.root().at(0).first).first;
1✔
293
    auto& read_node_in = builder.add_access(read_block, "__daisy_share_" + this->container_);
1✔
294
    auto& read_node_out =
1✔
295
        builder.add_access(read_block, "__daisy_share_wrapper_" + this->container_);
1✔
296

297
    auto& tasklet_read = builder.add_tasklet(read_block, data_flow::TaskletCode::assign,
2✔
298
                                             {"_out", *base_type}, {{"_in", *base_type}});
1✔
299

300
    std::tuple<symbolic::Expression, symbolic::Expression, symbolic::Expression,
301
               symbolic::Expression>
302
        shared_access_scheme_read = std::make_tuple(
1✔
303
            symbolic::threadIdx_z(), symbolic::threadIdx_y(), symbolic::threadIdx_x(),
1✔
304
            symbolic::sub(inner_loop_.indvar(), outer_loop_.indvar()));
1✔
305

306
    builder.add_memlet(
2✔
307
        read_block, read_node_in, "void", tasklet_read, "_in",
1✔
308
        {std::get<0>(shared_access_scheme_read), std::get<1>(shared_access_scheme_read),
1✔
309
         std::get<2>(shared_access_scheme_read), std::get<3>(shared_access_scheme_read)});
1✔
310

311
    builder.add_memlet(read_block, tasklet_read, "_out", read_node_out, "void", {});
1✔
312

313
    auto& sync_block = builder.add_block_before(outer_body, this->inner_loop_).first;
1✔
314
    builder.add_library_node(sync_block, data_flow::LibraryNodeType::LocalBarrier, {}, {}, true);
1✔
315

316
    // End of transformation
317

318
    analysis_manager.invalidate_all();
1✔
319

320
    passes::SequenceFusion sf_pass;
1✔
321
    passes::DeadCFGElimination dce_pass;
1✔
322
    bool applies = false;
1✔
323
    do {
1✔
324
        applies = false;
1✔
325
        applies |= dce_pass.run(schedule.builder(), analysis_manager);
1✔
326
        applies |= sf_pass.run(schedule.builder(), analysis_manager);
1✔
327
    } while (applies);
1✔
328
};
1✔
329

330
}  // namespace transformations
331
}  // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc